From 209954cbc7d0ce1a190fc725d20ce303d74d2680 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Thu, 14 Nov 2024 10:26:16 -0500
Subject: [PATCH 001/368] x86/mm/tlb: Update mm_cpumask lazily

On busy multi-threaded workloads, there can be significant contention
on the mm_cpumask at context switch time.

Reduce that contention by updating mm_cpumask lazily, setting the CPU bit
at context switch time (if not already set), and clearing the CPU bit at
the first TLB flush sent to a CPU where the process isn't running.

When a flurry of TLB flushes for a process happen, only the first one
will be sent to CPUs where the process isn't running. The others will
be sent to CPUs where the process is currently running.

On an AMD Milan system with 36 cores, there is a noticeable difference:
$ hackbench --groups 20 --loops 10000

  Before: ~4.5s +/- 0.1s
  After:  ~4.2s +/- 0.1s

Signed-off-by: Rik van Riel <riel@surriel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mel Gorman <mgorman@suse.de>
Link: https://lore.kernel.org/r/20241114152723.1294686-2-riel@surriel.com
---
 arch/x86/kernel/alternative.c | 10 +++++++---
 arch/x86/mm/tlb.c             | 19 +++++++++----------
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index d17518ca19b8b..8b66a555d2f03 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1825,11 +1825,18 @@ static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm)
 	return temp_state;
 }
 
+__ro_after_init struct mm_struct *poking_mm;
+__ro_after_init unsigned long poking_addr;
+
 static inline void unuse_temporary_mm(temp_mm_state_t prev_state)
 {
 	lockdep_assert_irqs_disabled();
+
 	switch_mm_irqs_off(NULL, prev_state.mm, current);
 
+	/* Clear the cpumask, to indicate no TLB flushing is needed anywhere */
+	cpumask_clear_cpu(raw_smp_processor_id(), mm_cpumask(poking_mm));
+
 	/*
 	 * Restore the breakpoints if they were disabled before the temporary mm
 	 * was loaded.
@@ -1838,9 +1845,6 @@ static inline void unuse_temporary_mm(temp_mm_state_t prev_state)
 		hw_breakpoint_restore();
 }
 
-__ro_after_init struct mm_struct *poking_mm;
-__ro_after_init unsigned long poking_addr;
-
 static void text_poke_memcpy(void *dst, const void *src, size_t len)
 {
 	memcpy(dst, src, len);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index b0d5a644fc84d..cc4e57ae690f5 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -606,18 +606,15 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
 		cond_mitigation(tsk);
 
 		/*
-		 * Stop remote flushes for the previous mm.
-		 * Skip kernel threads; we never send init_mm TLB flushing IPIs,
-		 * but the bitmap manipulation can cause cache line contention.
+		 * Leave this CPU in prev's mm_cpumask. Atomic writes to
+		 * mm_cpumask can be expensive under contention. The CPU
+		 * will be removed lazily at TLB flush time.
 		 */
-		if (prev != &init_mm) {
-			VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu,
-						mm_cpumask(prev)));
-			cpumask_clear_cpu(cpu, mm_cpumask(prev));
-		}
+		VM_WARN_ON_ONCE(prev != &init_mm && !cpumask_test_cpu(cpu,
+				mm_cpumask(prev)));
 
 		/* Start receiving IPIs and then read tlb_gen (and LAM below) */
-		if (next != &init_mm)
+		if (next != &init_mm && !cpumask_test_cpu(cpu, mm_cpumask(next)))
 			cpumask_set_cpu(cpu, mm_cpumask(next));
 		next_tlb_gen = atomic64_read(&next->context.tlb_gen);
 
@@ -761,8 +758,10 @@ static void flush_tlb_func(void *info)
 		count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
 
 		/* Can only happen on remote CPUs */
-		if (f->mm && f->mm != loaded_mm)
+		if (f->mm && f->mm != loaded_mm) {
+			cpumask_clear_cpu(raw_smp_processor_id(), mm_cpumask(f->mm));
 			return;
+		}
 	}
 
 	if (unlikely(loaded_mm == &init_mm))

From 2815a56e4b7252a836969f5674ee356ea1ce482c Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Thu, 14 Nov 2024 10:26:17 -0500
Subject: [PATCH 002/368] x86/mm/tlb: Add tracepoint for TLB flush IPI to stale
 CPU

Add a tracepoint when we send a TLB flush IPI to a CPU that used
to be in the mm_cpumask, but isn't any more.

Suggested-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Rik van Riel <riel@surriel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20241114152723.1294686-3-riel@surriel.com
---
 arch/x86/mm/tlb.c        | 1 +
 include/linux/mm_types.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index cc4e57ae690f5..1aac4fa90d3d8 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -760,6 +760,7 @@ static void flush_tlb_func(void *info)
 		/* Can only happen on remote CPUs */
 		if (f->mm && f->mm != loaded_mm) {
 			cpumask_clear_cpu(raw_smp_processor_id(), mm_cpumask(f->mm));
+			trace_tlb_flush(TLB_REMOTE_WRONG_CPU, 0);
 			return;
 		}
 	}
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6e3bdf8e38bca..6b6f05404304c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1335,6 +1335,7 @@ enum tlb_flush_reason {
 	TLB_LOCAL_SHOOTDOWN,
 	TLB_LOCAL_MM_SHOOTDOWN,
 	TLB_REMOTE_SEND_IPI,
+	TLB_REMOTE_WRONG_CPU,
 	NR_TLB_FLUSH_REASONS,
 };
 

From e8a99af68c068865dbac7f3330e97bf8e96edf33 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Tue, 3 Dec 2024 15:44:17 +0800
Subject: [PATCH 003/368] tools/power turbostat: Add initial support for
 PantherLake

Add initial support for PantherLake.
It shares the same features with Lunarlake.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 58a487c225a73..540336138ce9f 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1024,6 +1024,7 @@ static const struct platform_data turbostat_pdata[] = {
 	{ INTEL_ARROWLAKE_U, &adl_features },
 	{ INTEL_ARROWLAKE, &adl_features },
 	{ INTEL_LUNARLAKE_M, &lnl_features },
+	{ INTEL_PANTHERLAKE_L, &lnl_features },
 	{ INTEL_ATOM_SILVERMONT, &slv_features },
 	{ INTEL_ATOM_SILVERMONT_D, &slvd_features },
 	{ INTEL_ATOM_AIRMONT, &amt_features },

From 6b47ed23e2f1bc2c177da47437970e6208ac9ea0 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Tue, 3 Dec 2024 15:44:18 +0800
Subject: [PATCH 004/368] tools/power turbostat: Add initial support for
 ClearwaterForest

Add initial support for ClearwaterForest.
It shares the same features with SierraForest.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 540336138ce9f..e203f109dd2e2 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1037,6 +1037,7 @@ static const struct platform_data turbostat_pdata[] = {
 	{ INTEL_ATOM_GRACEMONT, &adl_features },
 	{ INTEL_ATOM_CRESTMONT_X, &srf_features },
 	{ INTEL_ATOM_CRESTMONT, &grr_features },
+	{ INTEL_ATOM_DARKMONT_X, &srf_features },
 	{ INTEL_XEON_PHI_KNL, &knl_features },
 	{ INTEL_XEON_PHI_KNM, &knl_features },
 	/*

From 9e47f8adb053b69e2e8310551e6fd5156704cef4 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 3 Dec 2024 12:23:22 -0500
Subject: [PATCH 005/368] tools/power turbostat: update turbostat(8)

Clarify how to get the latest version.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 | 28 ++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index a7f7ed01421c1..59b89e6b25bf0 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -516,14 +516,40 @@ that they count at TSC rate, which is true on all processors tested to date.
 Volume 3B: System Programming Guide"
 https://www.intel.com/products/processor/manuals/
 
+.SH RUN THE LATEST VERSION
+If turbostat complains that it doesn't recognize your processor,
+please try the latest version.
+
+The latest version of turbostat does not require the latest version of the Linux kernel.
+However, some features, such as perf(1) counters, do require kernel support.
+
+The latest turbostat release is available in the upstream Linux Kernel source tree.
+eg. "git pull https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git"
+and run make in tools/power/x86/turbostat/.
+
+n.b. "make install" will update your system manually, but a distro update may subsequently downgrade your turbostat to an older version.
+For this reason, manually installing to /usr/local/bin may be what you want.
+
+Note that turbostat/Makefile has a "make snapshot" target, which will create a tar file
+that can build without a local kernel source tree.
+
+If the upstream version isn't new enough, the development tree can be found here:
+"git pull https://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git turbostat"
+
+If the development tree doesn't work, please contact the author via chat,
+or via email with the word "turbostat" on the Subject line.
+
 .SH FILES
 .ta
 .nf
+/sys/bus/event_source/devices/
 /dev/cpu/*/msr
+/sys/class/intel_pmt/
+/sys/devices/system/cpu/
 .fi
 
 .SH "SEE ALSO"
-msr(4), vmstat(8)
+perf(1), msr(4), vmstat(8)
 .PP
 .SH AUTHOR
 .nf

From 4133be39e216130a86382fb5cfbaf6851a6f7a45 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Tue, 3 Dec 2024 15:51:16 +0800
Subject: [PATCH 006/368] tools/power turbostat: Exit on unsupported Intel
 models

Turbostat requires per-platform enabling for Intel CPU models due to
platform-specific features. When running on unsupported Intel CPU
models, turbostat currently operates with limited default features,
which can lead to users unknowingly using an outdated version of the
tool.

Enhance turbostat to exit by default when run on unsupported Intel CPU
models, with a clear message to users, informing them that their CPU
model is not supported and advising them to update to the latest version
of turbostat for full functionality.

[lenb: updated error message wording]

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index e203f109dd2e2..5e894b71003c9 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1079,6 +1079,10 @@ void probe_platform_features(unsigned int family, unsigned int model)
 			return;
 		}
 	}
+
+	fprintf(stderr, "Unsupported platform detected.\n"
+		"\tSee RUN THE LATEST VERSION on turbostat(8)\n");
+	exit(1);
 }
 
 /* Model specific support End */

From 48c62ba1b407140229e92f5cfae6ae113fc4af8e Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Tue, 3 Dec 2024 15:51:17 +0800
Subject: [PATCH 007/368] tools/power turbostat: Exit on unsupported Vendors

Turbostat currently supports x86 processors from Intel, AMD, and Hygon.
The behavior of turbostat on CPUs from other vendors has not been
evaluated and may lead to incorrect or undefined behavior.

Enhance turbostat to exit by default when running on an unsupported CPU
vendor. This ensures that users are aware that their CPU is not
currently supported by turbostat, guiding them to seek support for their
specific hardware through future patches.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 5e894b71003c9..cb659b274554b 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1056,9 +1056,9 @@ void probe_platform_features(unsigned int family, unsigned int model)
 {
 	int i;
 
-	platform = &default_features;
 
 	if (authentic_amd || hygon_genuine) {
+		platform = &default_features;
 		if (max_extended_level >= 0x80000007) {
 			unsigned int eax, ebx, ecx, edx;
 
@@ -1071,7 +1071,7 @@ void probe_platform_features(unsigned int family, unsigned int model)
 	}
 
 	if (!genuine_intel)
-		return;
+		goto end;
 
 	for (i = 0; turbostat_pdata[i].features; i++) {
 		if (VFM_FAMILY(turbostat_pdata[i].vfm) == family && VFM_MODEL(turbostat_pdata[i].vfm) == model) {
@@ -1080,6 +1080,10 @@ void probe_platform_features(unsigned int family, unsigned int model)
 		}
 	}
 
+end:
+	if (platform)
+		return;
+
 	fprintf(stderr, "Unsupported platform detected.\n"
 		"\tSee RUN THE LATEST VERSION on turbostat(8)\n");
 	exit(1);

From cc63f89ef9db70f74c563317d36028bb5e6196a1 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Tue, 3 Dec 2024 15:51:18 +0800
Subject: [PATCH 008/368] tools/power turbostat: Improve --help output

Improve the `--help` output of turbostat by standardizing the format
and enhancing readability. The following changes are made to ensure
consistency and clarity in the help message:
1. Use a consistent pattern for each parameter's help message:
   - Display the parameter and its input (if any) on the same line,
     separated by a space.
   - Provide the detailed description on a separate line.
2. Ensure that the first character of each description is in lower-case.

These changes make the help output more uniform and easier to read,
helping users quickly understand the available options and their usage.

No functional change.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 41 +++++++++++++++++----------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index cb659b274554b..5165450a81877 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2145,41 +2145,52 @@ void help(void)
 		"when COMMAND completes.\n"
 		"If no COMMAND is specified, turbostat wakes every 5-seconds\n"
 		"to print statistics, until interrupted.\n"
-		"  -a, --add	add a counter\n"
+		"  -a, --add counter\n"
+		"		add a counter\n"
 		"		  eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
 		"		  eg. --add perf/cstate_pkg/c2-residency,package,delta,percent,perfPC2\n"
 		"		  eg. --add pmt,name=XTAL,type=raw,domain=package0,offset=0,lsb=0,msb=63,guid=0x1a067102\n"
-		"  -c, --cpu	cpu-set	limit output to summary plus cpu-set:\n"
+		"  -c, --cpu cpu-set\n"
+		"		limit output to summary plus cpu-set:\n"
 		"		  {core | package | j,k,l..m,n-p }\n"
-		"  -d, --debug	displays usec, Time_Of_Day_Seconds and more debugging\n"
+		"  -d, --debug\n"
+		"		displays usec, Time_Of_Day_Seconds and more debugging\n"
 		"		debug messages are printed to stderr\n"
-		"  -D, --Dump	displays the raw counter values\n"
-		"  -e, --enable	[all | column]\n"
+		"  -D, --Dump\n"
+		"		displays the raw counter values\n"
+		"  -e, --enable [all | column]\n"
 		"		shows all or the specified disabled column\n"
-		"  -H, --hide [column|column,column,...]\n"
+		"  -H, --hide [column | column,column,...]\n"
 		"		hide the specified column(s)\n"
 		"  -i, --interval sec.subsec\n"
-		"		Override default 5-second measurement interval\n"
-		"  -J, --Joules	displays energy in Joules instead of Watts\n"
-		"  -l, --list	list column headers only\n"
-		"  -M, --no-msr Disable all uses of the MSR driver\n"
-		"  -P, --no-perf Disable all uses of the perf API\n"
+		"		override default 5-second measurement interval\n"
+		"  -J, --Joules\n"
+		"		displays energy in Joules instead of Watts\n"
+		"  -l, --list\n"
+		"		list column headers only\n"
+		"  -M, --no-msr\n"
+		"		disable all uses of the MSR driver\n"
+		"  -P, --no-perf\n"
+		"		disable all uses of the perf API\n"
 		"  -n, --num_iterations num\n"
 		"		number of the measurement iterations\n"
 		"  -N, --header_iterations num\n"
 		"		print header every num iterations\n"
 		"  -o, --out file\n"
 		"		create or truncate \"file\" for all output\n"
-		"  -q, --quiet	skip decoding system configuration header\n"
-		"  -s, --show [column|column,column,...]\n"
+		"  -q, --quiet\n"
+		"		skip decoding system configuration header\n"
+		"  -s, --show [column | column,column,...]\n"
 		"		show only the specified column(s)\n"
 		"  -S, --Summary\n"
 		"		limits output to 1-line system summary per interval\n"
 		"  -T, --TCC temperature\n"
 		"		sets the Thermal Control Circuit temperature in\n"
 		"		  degrees Celsius\n"
-		"  -h, --help	print this help message\n"
-		"  -v, --version	print version information\n" "\n" "For more help, run \"man turbostat\"\n");
+		"  -h, --help\n"
+		"		print this help message\n"
+		"  -v, --version\n"
+		"		print version information\n" "\n" "For more help, run \"man turbostat\"\n");
 }
 
 /*

From 3d94026af328d3d355d15c1d7fe73278f77c6a42 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Tue, 3 Dec 2024 15:51:19 +0800
Subject: [PATCH 009/368] tools/power turbostat: Introduce --force parameter

Turbostat currently exits under the following conditions:
1. When running on non-Intel/AMD/Hygon x86 vendors.
2. When running on Intel models that lack specific platform features.

Introduce a new `--force` parameter that allows turbostat to run on
these unsupported platforms with minimal default feature support. This
provides users with the flexibility to gather basic information even on
unsupported systems.

[lenb: updated warning message text]

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 5165450a81877..7accc4a733667 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -326,6 +326,7 @@ unsigned int rapl_joules;
 unsigned int summary_only;
 unsigned int list_header_only;
 unsigned int dump_only;
+unsigned int force_load;
 unsigned int has_aperf;
 unsigned int has_aperf_access;
 unsigned int has_epb;
@@ -1058,7 +1059,8 @@ void probe_platform_features(unsigned int family, unsigned int model)
 
 
 	if (authentic_amd || hygon_genuine) {
-		platform = &default_features;
+		/* fallback to default features on unsupported models */
+		force_load++;
 		if (max_extended_level >= 0x80000007) {
 			unsigned int eax, ebx, ecx, edx;
 
@@ -1067,7 +1069,7 @@ void probe_platform_features(unsigned int family, unsigned int model)
 			if ((edx & (1 << 14)) && family >= 0x17)
 				platform = &amd_features_with_rapl;
 		}
-		return;
+		goto end;
 	}
 
 	if (!genuine_intel)
@@ -1081,6 +1083,11 @@ void probe_platform_features(unsigned int family, unsigned int model)
 	}
 
 end:
+	if (force_load && !platform) {
+		fprintf(outf, "Forced to run on unsupported platform!\n");
+		platform = &default_features;
+	}
+
 	if (platform)
 		return;
 
@@ -2160,6 +2167,8 @@ void help(void)
 		"		displays the raw counter values\n"
 		"  -e, --enable [all | column]\n"
 		"		shows all or the specified disabled column\n"
+		"  -f, --force\n"
+		"		force load turbostat with minimum default features on unsupported platforms.\n"
 		"  -H, --hide [column | column,column,...]\n"
 		"		hide the specified column(s)\n"
 		"  -i, --interval sec.subsec\n"
@@ -9942,6 +9951,7 @@ void cmdline(int argc, char **argv)
 		{ "Dump", no_argument, 0, 'D' },
 		{ "debug", no_argument, 0, 'd' },	/* internal, not documented */
 		{ "enable", required_argument, 0, 'e' },
+		{ "force", no_argument, 0, 'f' },
 		{ "interval", required_argument, 0, 'i' },
 		{ "IPC", no_argument, 0, 'I' },
 		{ "num_iterations", required_argument, 0, 'n' },
@@ -10002,6 +10012,9 @@ void cmdline(int argc, char **argv)
 			/* --enable specified counter */
 			bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
 			break;
+		case 'f':
+			force_load++;
+			break;
 		case 'd':
 			debug++;
 			ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);

From 953753db887f9d70f70f61d6ecbe5cf209107672 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Thu, 5 Dec 2024 10:46:30 -0500
Subject: [PATCH 010/368] x86/mm/tlb: Also remove local CPU from mm_cpumask if
 stale

The code in flush_tlb_func() that removes a remote CPU from the
cpumask if it is no longer running the target mm is also needed
on the originating CPU of a TLB flush, now that CPUs are no
longer cleared from the mm_cpumask at context switch time.

Flushing the TLB when we are not running the target mm is
harmless, because the CPU's tlb_gen only gets updated to
match the mm_tlb_gen, but it does hit this warning:

        WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);

  [ 210.343902][ T4668] WARNING: CPU: 38 PID: 4668 at arch/x86/mm/tlb.c:815 flush_tlb_func (arch/x86/mm/tlb.c:815)

Removing both local and remote CPUs from the mm_cpumask
when doing a flush for a not currently loaded mm avoids
that warning.

Reported-by: kernel test robot <oliver.sang@intel.com>
Tested-by: kernel test robot <oliver.sang@intel.com>
Signed-off-by: Rik van Riel <riel@surriel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20241205104630.755706ca@fangorn
Closes: https://lore.kernel.org/oe-lkp/202412051551.690e9656-lkp@intel.com
---
 arch/x86/mm/tlb.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 1aac4fa90d3d8..3c30817ec6a21 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -756,13 +756,13 @@ static void flush_tlb_func(void *info)
 	if (!local) {
 		inc_irq_stat(irq_tlb_count);
 		count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+	}
 
-		/* Can only happen on remote CPUs */
-		if (f->mm && f->mm != loaded_mm) {
-			cpumask_clear_cpu(raw_smp_processor_id(), mm_cpumask(f->mm));
-			trace_tlb_flush(TLB_REMOTE_WRONG_CPU, 0);
-			return;
-		}
+	/* The CPU was left in the mm_cpumask of the target mm. Clear it. */
+	if (f->mm && f->mm != loaded_mm) {
+		cpumask_clear_cpu(raw_smp_processor_id(), mm_cpumask(f->mm));
+		trace_tlb_flush(TLB_REMOTE_WRONG_CPU, 0);
+		return;
 	}
 
 	if (unlikely(loaded_mm == &init_mm))

From 6db2526c1d694c91c6e05e2f186c085e9460f202 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@fb.com>
Date: Wed, 4 Dec 2024 21:03:16 -0500
Subject: [PATCH 011/368] x86/mm/tlb: Only trim the mm_cpumask once a second

Setting and clearing CPU bits in the mm_cpumask is only ever done
by the CPU itself, from the context switch code or the TLB flush
code.

Synchronization is handled by switch_mm_irqs_off() blocking interrupts.

Sending TLB flush IPIs to CPUs that are in the mm_cpumask, but no
longer running the program causes a regression in the will-it-scale
tlbflush2 test. This test is contrived, but a large regression here
might cause a small regression in some real world workload.

Instead of always sending IPIs to CPUs that are in the mm_cpumask,
but no longer running the program, send these IPIs only once a second.

The rest of the time we can skip over CPUs where the loaded_mm is
different from the target mm.

Reported-by: kernel test roboto <oliver.sang@intel.com>
Signed-off-by: Rik van Riel <riel@surriel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20241204210316.612ee573@fangorn
Closes: https://lore.kernel.org/oe-lkp/202411282207.6bd28eae-lkp@intel.com/
---
 arch/x86/include/asm/mmu.h         |  2 ++
 arch/x86/include/asm/mmu_context.h |  1 +
 arch/x86/include/asm/tlbflush.h    |  1 +
 arch/x86/mm/tlb.c                  | 35 +++++++++++++++++++++++++++---
 4 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index ce4677b8b7356..3b496cdcb74b3 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -37,6 +37,8 @@ typedef struct {
 	 */
 	atomic64_t tlb_gen;
 
+	unsigned long next_trim_cpumask;
+
 #ifdef CONFIG_MODIFY_LDT_SYSCALL
 	struct rw_semaphore	ldt_usr_sem;
 	struct ldt_struct	*ldt;
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 2886cb668d7fa..795fdd53bd0a6 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -151,6 +151,7 @@ static inline int init_new_context(struct task_struct *tsk,
 
 	mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
 	atomic64_set(&mm->context.tlb_gen, 0);
+	mm->context.next_trim_cpumask = jiffies + HZ;
 
 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
 	if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 69e79fff41b80..02fc2aa06e9e0 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -222,6 +222,7 @@ struct flush_tlb_info {
 	unsigned int		initiating_cpu;
 	u8			stride_shift;
 	u8			freed_tables;
+	u8			trim_cpumask;
 };
 
 void flush_tlb_local(void);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 3c30817ec6a21..458a5d5be5943 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -892,9 +892,36 @@ static void flush_tlb_func(void *info)
 			nr_invalidate);
 }
 
-static bool tlb_is_not_lazy(int cpu, void *data)
+static bool should_flush_tlb(int cpu, void *data)
 {
-	return !per_cpu(cpu_tlbstate_shared.is_lazy, cpu);
+	struct flush_tlb_info *info = data;
+
+	/* Lazy TLB will get flushed at the next context switch. */
+	if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu))
+		return false;
+
+	/* No mm means kernel memory flush. */
+	if (!info->mm)
+		return true;
+
+	/* The target mm is loaded, and the CPU is not lazy. */
+	if (per_cpu(cpu_tlbstate.loaded_mm, cpu) == info->mm)
+		return true;
+
+	/* In cpumask, but not the loaded mm? Periodically remove by flushing. */
+	if (info->trim_cpumask)
+		return true;
+
+	return false;
+}
+
+static bool should_trim_cpumask(struct mm_struct *mm)
+{
+	if (time_after(jiffies, READ_ONCE(mm->context.next_trim_cpumask))) {
+		WRITE_ONCE(mm->context.next_trim_cpumask, jiffies + HZ);
+		return true;
+	}
+	return false;
 }
 
 DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared);
@@ -928,7 +955,7 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask,
 	if (info->freed_tables)
 		on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true);
 	else
-		on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func,
+		on_each_cpu_cond_mask(should_flush_tlb, flush_tlb_func,
 				(void *)info, 1, cpumask);
 }
 
@@ -979,6 +1006,7 @@ static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
 	info->freed_tables	= freed_tables;
 	info->new_tlb_gen	= new_tlb_gen;
 	info->initiating_cpu	= smp_processor_id();
+	info->trim_cpumask	= 0;
 
 	return info;
 }
@@ -1021,6 +1049,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 	 * flush_tlb_func_local() directly in this case.
 	 */
 	if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) {
+		info->trim_cpumask = should_trim_cpumask(mm);
 		flush_tlb_multi(mm_cpumask(mm), info);
 	} else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
 		lockdep_assert_irqs_enabled();

From 9d93db0d1881c9e37e1528cd796e20ff13b7692c Mon Sep 17 00:00:00 2001
From: Gautam Somani <gautamsomani@gmail.com>
Date: Sun, 1 Dec 2024 03:41:02 +0900
Subject: [PATCH 012/368] x86/mm/selftests: Fix typo in lam.c

Change the spelling from metadate -> metadata

Signed-off-by: Gautam Somani <gautamsomani@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20241130184102.2182-1-gautamsomani@gmail.com
---
 tools/testing/selftests/x86/lam.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c
index 0ea4f6813930b..4d4a76532dc9a 100644
--- a/tools/testing/selftests/x86/lam.c
+++ b/tools/testing/selftests/x86/lam.c
@@ -237,7 +237,7 @@ static uint64_t set_metadata(uint64_t src, unsigned long lam)
  * both pointers should point to the same address.
  *
  * @return:
- * 0: value on the pointer with metadate and value on original are same
+ * 0: value on the pointer with metadata and value on original are same
  * 1: not same.
  */
 static int handle_lam_test(void *src, unsigned int lam)

From dd4059634dab548c904eeae2660ba3c8f7ce843c Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Mon, 2 Dec 2024 09:31:39 +0200
Subject: [PATCH 013/368] x86/mtrr: Rename mtrr_overwrite_state() to
 guest_force_mtrr_state()

Rename the helper to better reflect its function.

Suggested-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Reviewed-by: Michael Kelley <mhklinux@outlook.com>
Link: https://lore.kernel.org/r/20241202073139.448208-1-kirill.shutemov@linux.intel.com
---
 arch/x86/hyperv/ivm.c              |  2 +-
 arch/x86/include/asm/mtrr.h        | 10 +++++-----
 arch/x86/kernel/cpu/mtrr/generic.c |  6 +++---
 arch/x86/kernel/cpu/mtrr/mtrr.c    |  2 +-
 arch/x86/kernel/kvm.c              |  2 +-
 arch/x86/xen/enlighten_pv.c        |  4 ++--
 6 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 60fc3ed728304..90aabe1fd3b66 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -664,7 +664,7 @@ void __init hv_vtom_init(void)
 	x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility;
 
 	/* Set WB as the default cache mode. */
-	mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
+	guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK);
 }
 
 #endif /* defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) */
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index 4218248083d98..c69e269937c56 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -58,8 +58,8 @@ struct mtrr_state_type {
  */
 # ifdef CONFIG_MTRR
 void mtrr_bp_init(void);
-void mtrr_overwrite_state(struct mtrr_var_range *var, unsigned int num_var,
-			  mtrr_type def_type);
+void guest_force_mtrr_state(struct mtrr_var_range *var, unsigned int num_var,
+			    mtrr_type def_type);
 extern u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform);
 extern void mtrr_save_fixed_ranges(void *);
 extern void mtrr_save_state(void);
@@ -75,9 +75,9 @@ void mtrr_disable(void);
 void mtrr_enable(void);
 void mtrr_generic_set_state(void);
 #  else
-static inline void mtrr_overwrite_state(struct mtrr_var_range *var,
-					unsigned int num_var,
-					mtrr_type def_type)
+static inline void guest_force_mtrr_state(struct mtrr_var_range *var,
+					  unsigned int num_var,
+					  mtrr_type def_type)
 {
 }
 
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 7b29ebda024f4..2fdfda2b60e4f 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -423,7 +423,7 @@ void __init mtrr_copy_map(void)
 }
 
 /**
- * mtrr_overwrite_state - set static MTRR state
+ * guest_force_mtrr_state - set static MTRR state for a guest
  *
  * Used to set MTRR state via different means (e.g. with data obtained from
  * a hypervisor).
@@ -436,8 +436,8 @@ void __init mtrr_copy_map(void)
  * @num_var: length of the @var array
  * @def_type: default caching type
  */
-void mtrr_overwrite_state(struct mtrr_var_range *var, unsigned int num_var,
-			  mtrr_type def_type)
+void guest_force_mtrr_state(struct mtrr_var_range *var, unsigned int num_var,
+			    mtrr_type def_type)
 {
 	unsigned int i;
 
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
index 989d368be04fc..ecbda0341a8a3 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.c
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
@@ -625,7 +625,7 @@ void mtrr_save_state(void)
 static int __init mtrr_init_finalize(void)
 {
 	/*
-	 * Map might exist if mtrr_overwrite_state() has been called or if
+	 * Map might exist if guest_force_mtrr_state() has been called or if
 	 * mtrr_enabled() returns true.
 	 */
 	mtrr_copy_map();
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 21e9e48453541..7a422a6c5983c 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -983,7 +983,7 @@ static void __init kvm_init_platform(void)
 	x86_platform.apic_post_init = kvm_apic_init;
 
 	/* Set WB as the default cache mode for SEV-SNP and TDX */
-	mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
+	guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK);
 }
 
 #if defined(CONFIG_AMD_MEM_ENCRYPT)
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index d6818c6cafda1..633469fab5362 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -171,7 +171,7 @@ static void __init xen_set_mtrr_data(void)
 
 	/* Only overwrite MTRR state if any MTRR could be got from Xen. */
 	if (reg)
-		mtrr_overwrite_state(var, reg, MTRR_TYPE_UNCACHABLE);
+		guest_force_mtrr_state(var, reg, MTRR_TYPE_UNCACHABLE);
 #endif
 }
 
@@ -195,7 +195,7 @@ static void __init xen_pv_init_platform(void)
 	if (xen_initial_domain())
 		xen_set_mtrr_data();
 	else
-		mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
+		guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK);
 
 	/* Adjust nr_cpu_ids before "enumeration" happens */
 	xen_smp_count_cpus();

From 0207244ea0e7fcf45e68e24b0fffe964624a22ef Mon Sep 17 00:00:00 2001
From: Drew Fustini <drew@pdp7.com>
Date: Wed, 13 Nov 2024 10:43:36 -0800
Subject: [PATCH 014/368] riscv: defconfig: enable pinctrl and dwmac support
 for TH1520

Enable pinctrl and ethernet dwmac driver for the TH1520 SoC boards like
the BeagleV Ahead and the Sipeed LicheePi 4A.

Signed-off-by: Drew Fustini <drew@pdp7.com>
Reviewed-by: Emil Renner Berthing <emil.renner.berthing@canonical.com>
Link: https://lore.kernel.org/r/20241113184333.829716-1-drew@pdp7.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/configs/defconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index b4a37345703eb..d26e670404b6b 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -167,6 +167,7 @@ CONFIG_PINCTRL_SOPHGO_CV1800B=y
 CONFIG_PINCTRL_SOPHGO_CV1812H=y
 CONFIG_PINCTRL_SOPHGO_SG2000=y
 CONFIG_PINCTRL_SOPHGO_SG2002=y
+CONFIG_PINCTRL_TH1520=y
 CONFIG_GPIO_DWAPB=y
 CONFIG_GPIO_SIFIVE=y
 CONFIG_POWER_RESET_GPIO_RESTART=y
@@ -242,6 +243,7 @@ CONFIG_RTC_DRV_SUN6I=y
 CONFIG_DMADEVICES=y
 CONFIG_DMA_SUN6I=m
 CONFIG_DW_AXI_DMAC=y
+CONFIG_DWMAC_THEAD=m
 CONFIG_VIRTIO_PCI=y
 CONFIG_VIRTIO_BALLOON=y
 CONFIG_VIRTIO_INPUT=y

From 9d0593da9459176396c1f2246efafbc80a828c7f Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Wed, 13 Nov 2024 10:33:21 -0800
Subject: [PATCH 015/368] riscv/futex: Optimize atomic cmpxchg

Remove redundant release/acquire barriers, optimizing the lr/sc sequence
to provide conditional RCsc synchronization, per the RVWMO.

Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
Reviewed-by: Andrea Parri <parri.andrea@gmail.com>
Link: https://lore.kernel.org/r/20241113183321.491113-1-dave@stgolabs.net
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/asm/futex.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h
index fc8130f995c1e..72be100afa236 100644
--- a/arch/riscv/include/asm/futex.h
+++ b/arch/riscv/include/asm/futex.h
@@ -85,7 +85,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 
 	__enable_user_access();
 	__asm__ __volatile__ (
-	"1:	lr.w.aqrl %[v],%[u]			\n"
+	"1:	lr.w %[v],%[u]			        \n"
 	"	bne %[v],%z[ov],3f			\n"
 	"2:	sc.w.aqrl %[t],%z[nv],%[u]		\n"
 	"	bnez %[t],1b				\n"

From aa135d1d0902c49ed45bec98c61c1b4022652b7e Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 20 Dec 2024 09:40:29 +0100
Subject: [PATCH 016/368] x86/mm: Remove unnecessary include of
 <linux/extable.h>

The header file linux/extable.h is included for
search_exception_tables(). That function is no longer used since commit:

  c2508ec5a58db ("mm: introduce new 'lock_mm_and_find_vma()' page fault helper")

Remove it.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20241220084029.473617-1-bigeasy@linutronix.de
---
 arch/x86/mm/fault.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e6c469b323ccb..ef12ff3db9039 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -7,7 +7,6 @@
 #include <linux/sched.h>		/* test_thread_flag(), ...	*/
 #include <linux/sched/task_stack.h>	/* task_stack_*(), ...		*/
 #include <linux/kdebug.h>		/* oops_begin/end, ...		*/
-#include <linux/extable.h>		/* search_exception_tables	*/
 #include <linux/memblock.h>		/* max_low_pfn			*/
 #include <linux/kfence.h>		/* kfence_handle_page_fault	*/
 #include <linux/kprobes.h>		/* NOKPROBE_SYMBOL, ...		*/

From 26f2d6de41795a931d1c16950114dbcf55dfbd75 Mon Sep 17 00:00:00 2001
From: Celeste Liu <coelacanthushex@gmail.com>
Date: Tue, 10 Sep 2024 20:51:07 +0800
Subject: [PATCH 017/368] riscv: defconfig: drop RT_GROUP_SCHED=y

Commit ba6cfef057e1 ("riscv: enable Docker requirements in defconfig")
introduced it because of Docker, but Docker has removed this requirement
since [1] (2023-04-19).

For cgroup v1, if turned on, and there's any cgroup in the "cpu" hierarchy it
needs an RT budget assigned, otherwise the processes in it will not be able to
get RT at all. The problem with RT group scheduling is that it requires the
budget assigned but there's no way we could assign a default budget, since the
values to assign are both upper and lower time limits, are absolute, and need to
be sum up to < 1 for each individal cgroup. That means we cannot really come up
with values that would work by default in the general case.[2]

For cgroup v2, it's almost unusable as well. If it turned on, the cpu controller
can only be enabled when all RT processes are in the root cgroup. But it will
lose the benefits of cgroup v2 if all RT process were placed in the same cgroup.

Red Hat, Gentoo, Arch Linux and Debian all disable it. systemd also doesn't
support it.[3]

[1]: https://github.com/moby/moby/commit/005150ed69c540fb0b5323e0f2208608c1204536
[2]: https://bugzilla.redhat.com/show_bug.cgi?id=1229700
[3]: https://github.com/systemd/systemd/issues/13781#issuecomment-549164383

Acked-by: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>
Signed-off-by: Celeste Liu <CoelacanthusHex@gmail.com>
Acked-by: Charlie Jenkins <charlie@rivosinc.com>
Link: https://lore.kernel.org/r/20240910-fix-riscv-rt_group_sched-v3-1-486e75e5ae6d@gmail.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/configs/defconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index d26e670404b6b..3049869a5ac07 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -10,7 +10,6 @@ CONFIG_MEMCG=y
 CONFIG_BLK_CGROUP=y
 CONFIG_CGROUP_SCHED=y
 CONFIG_CFS_BANDWIDTH=y
-CONFIG_RT_GROUP_SCHED=y
 CONFIG_CGROUP_PIDS=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_HUGETLB=y

From f7c848431632598ff9bce57a659db6af60d75b39 Mon Sep 17 00:00:00 2001
From: Mike Marshall <hubcap@omnibond.com>
Date: Wed, 8 Jan 2025 14:21:08 -0500
Subject: [PATCH 018/368] orangefs: fix a oob in orangefs_debug_write

I got a syzbot report: slab-out-of-bounds Read in
orangefs_debug_write... several people suggested fixes,
I tested Al Viro's suggestion and made this patch.

Signed-off-by: Mike Marshall <hubcap@omnibond.com>
Reported-by: syzbot+fc519d7875f2d9186c1f@syzkaller.appspotmail.com
---
 fs/orangefs/orangefs-debugfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c
index 1b508f5433846..fa41db0884880 100644
--- a/fs/orangefs/orangefs-debugfs.c
+++ b/fs/orangefs/orangefs-debugfs.c
@@ -393,9 +393,9 @@ static ssize_t orangefs_debug_write(struct file *file,
 	 * Thwart users who try to jamb a ridiculous number
 	 * of bytes into the debug file...
 	 */
-	if (count > ORANGEFS_MAX_DEBUG_STRING_LEN + 1) {
+	if (count > ORANGEFS_MAX_DEBUG_STRING_LEN) {
 		silly = count;
-		count = ORANGEFS_MAX_DEBUG_STRING_LEN + 1;
+		count = ORANGEFS_MAX_DEBUG_STRING_LEN;
 	}
 
 	buf = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL);

From 7158c61afdcff436d087a093b45f599bb8805434 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 3 Dec 2024 17:25:05 +0100
Subject: [PATCH 019/368] rtc: RTC_DRV_SPEAR should not default to y when
 compile-testing

Merely enabling compile-testing should not enable additional
functionality.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Link: https://lore.kernel.org/r/7b8eefe3b0150101ba01c3ea55e45aa3134059ba.1733243007.git.geert+renesas@glider.be
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index a60bcc791a480..0bbbf778ecfa3 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -1316,7 +1316,7 @@ config RTC_DRV_SC27XX
 config RTC_DRV_SPEAR
 	tristate "SPEAR ST RTC"
 	depends on PLAT_SPEAR || COMPILE_TEST
-	default y
+	default PLAT_SPEAR
 	help
 	 If you say Y here you will get support for the RTC found on
 	 spear

From 8c28c4993f117e03130a51160229bde7ad388240 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 17 Dec 2024 08:13:26 +0100
Subject: [PATCH 020/368] rtc: use boolean values with device_init_wakeup()

device_init_wakeup() second argument is a bool type. Use proper boolean
values when calling it to match the type and to produce unambiguous code
which is easier to understand.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Link: https://lore.kernel.org/r/20241217071331.3607-2-wsa+renesas@sang-engineering.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-88pm80x.c       | 2 +-
 drivers/rtc/rtc-88pm860x.c      | 2 +-
 drivers/rtc/rtc-amlogic-a4.c    | 6 +++---
 drivers/rtc/rtc-armada38x.c     | 2 +-
 drivers/rtc/rtc-as3722.c        | 2 +-
 drivers/rtc/rtc-at91rm9200.c    | 2 +-
 drivers/rtc/rtc-at91sam9.c      | 2 +-
 drivers/rtc/rtc-cadence.c       | 2 +-
 drivers/rtc/rtc-cmos.c          | 2 +-
 drivers/rtc/rtc-cpcap.c         | 2 +-
 drivers/rtc/rtc-cros-ec.c       | 2 +-
 drivers/rtc/rtc-da9055.c        | 2 +-
 drivers/rtc/rtc-ds3232.c        | 2 +-
 drivers/rtc/rtc-isl1208.c       | 2 +-
 drivers/rtc/rtc-jz4740.c        | 2 +-
 drivers/rtc/rtc-loongson.c      | 4 ++--
 drivers/rtc/rtc-lp8788.c        | 2 +-
 drivers/rtc/rtc-lpc32xx.c       | 2 +-
 drivers/rtc/rtc-max77686.c      | 2 +-
 drivers/rtc/rtc-max8925.c       | 2 +-
 drivers/rtc/rtc-max8997.c       | 2 +-
 drivers/rtc/rtc-meson-vrtc.c    | 2 +-
 drivers/rtc/rtc-mpc5121.c       | 2 +-
 drivers/rtc/rtc-mt6397.c        | 2 +-
 drivers/rtc/rtc-mv.c            | 4 ++--
 drivers/rtc/rtc-mxc.c           | 2 +-
 drivers/rtc/rtc-mxc_v2.c        | 2 +-
 drivers/rtc/rtc-omap.c          | 2 +-
 drivers/rtc/rtc-palmas.c        | 2 +-
 drivers/rtc/rtc-pic32.c         | 2 +-
 drivers/rtc/rtc-pm8xxx.c        | 2 +-
 drivers/rtc/rtc-pxa.c           | 2 +-
 drivers/rtc/rtc-rc5t583.c       | 2 +-
 drivers/rtc/rtc-rc5t619.c       | 2 +-
 drivers/rtc/rtc-renesas-rtca3.c | 2 +-
 drivers/rtc/rtc-rk808.c         | 2 +-
 drivers/rtc/rtc-s3c.c           | 2 +-
 drivers/rtc/rtc-s5m.c           | 2 +-
 drivers/rtc/rtc-sa1100.c        | 2 +-
 drivers/rtc/rtc-sc27xx.c        | 4 ++--
 drivers/rtc/rtc-sh.c            | 2 +-
 drivers/rtc/rtc-spear.c         | 4 ++--
 drivers/rtc/rtc-sun6i.c         | 2 +-
 drivers/rtc/rtc-sunplus.c       | 4 ++--
 drivers/rtc/rtc-tegra.c         | 2 +-
 drivers/rtc/rtc-test.c          | 2 +-
 drivers/rtc/rtc-tps6586x.c      | 2 +-
 drivers/rtc/rtc-tps65910.c      | 2 +-
 drivers/rtc/rtc-twl.c           | 2 +-
 drivers/rtc/rtc-wm831x.c        | 2 +-
 drivers/rtc/rtc-wm8350.c        | 2 +-
 drivers/rtc/rtc-xgene.c         | 4 ++--
 drivers/rtc/rtc-zynqmp.c        | 4 ++--
 53 files changed, 62 insertions(+), 62 deletions(-)

diff --git a/drivers/rtc/rtc-88pm80x.c b/drivers/rtc/rtc-88pm80x.c
index 5c39cf252392d..a3e52a5a708ff 100644
--- a/drivers/rtc/rtc-88pm80x.c
+++ b/drivers/rtc/rtc-88pm80x.c
@@ -308,7 +308,7 @@ static int pm80x_rtc_probe(struct platform_device *pdev)
 	/* remember whether this power up is caused by PMIC RTC or not */
 	info->rtc_dev->dev.platform_data = &pdata->rtc_wakeup;
 
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 
 	return 0;
 out_rtc:
diff --git a/drivers/rtc/rtc-88pm860x.c b/drivers/rtc/rtc-88pm860x.c
index 814230d618427..964cd048fcdba 100644
--- a/drivers/rtc/rtc-88pm860x.c
+++ b/drivers/rtc/rtc-88pm860x.c
@@ -326,7 +326,7 @@ static int pm860x_rtc_probe(struct platform_device *pdev)
 	schedule_delayed_work(&info->calib_work, VRTC_CALIB_INTERVAL);
 #endif	/* VRTC_CALIBRATION */
 
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 
 	return 0;
 }
diff --git a/drivers/rtc/rtc-amlogic-a4.c b/drivers/rtc/rtc-amlogic-a4.c
index 2278b4c98a711..09d78c2cc6918 100644
--- a/drivers/rtc/rtc-amlogic-a4.c
+++ b/drivers/rtc/rtc-amlogic-a4.c
@@ -361,7 +361,7 @@ static int aml_rtc_probe(struct platform_device *pdev)
 				     "failed to get_enable rtc sys clk\n");
 	aml_rtc_init(rtc);
 
-	device_init_wakeup(dev, 1);
+	device_init_wakeup(dev, true);
 	platform_set_drvdata(pdev, rtc);
 
 	rtc->rtc_dev = devm_rtc_allocate_device(dev);
@@ -391,7 +391,7 @@ static int aml_rtc_probe(struct platform_device *pdev)
 	return 0;
 err_clk:
 	clk_disable_unprepare(rtc->sys_clk);
-	device_init_wakeup(dev, 0);
+	device_init_wakeup(dev, false);
 
 	return ret;
 }
@@ -426,7 +426,7 @@ static void aml_rtc_remove(struct platform_device *pdev)
 	struct aml_rtc_data *rtc = dev_get_drvdata(&pdev->dev);
 
 	clk_disable_unprepare(rtc->sys_clk);
-	device_init_wakeup(&pdev->dev, 0);
+	device_init_wakeup(&pdev->dev, false);
 }
 
 static const struct aml_rtc_config a5_rtc_config = {
diff --git a/drivers/rtc/rtc-armada38x.c b/drivers/rtc/rtc-armada38x.c
index 569c1054d6b0b..713fa0d077cde 100644
--- a/drivers/rtc/rtc-armada38x.c
+++ b/drivers/rtc/rtc-armada38x.c
@@ -527,7 +527,7 @@ static __init int armada38x_rtc_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, rtc);
 
 	if (rtc->irq != -1)
-		device_init_wakeup(&pdev->dev, 1);
+		device_init_wakeup(&pdev->dev, true);
 	else
 		clear_bit(RTC_FEATURE_ALARM, rtc->rtc_dev->features);
 
diff --git a/drivers/rtc/rtc-as3722.c b/drivers/rtc/rtc-as3722.c
index 0f21af27f4cfe..9682d6457b7fd 100644
--- a/drivers/rtc/rtc-as3722.c
+++ b/drivers/rtc/rtc-as3722.c
@@ -187,7 +187,7 @@ static int as3722_rtc_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 
 	as3722_rtc->rtc = devm_rtc_device_register(&pdev->dev, "as3722-rtc",
 				&as3722_rtc_ops, THIS_MODULE);
diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c
index 9b3898b8de7cf..f6b0102a843ad 100644
--- a/drivers/rtc/rtc-at91rm9200.c
+++ b/drivers/rtc/rtc-at91rm9200.c
@@ -528,7 +528,7 @@ static int __init at91_rtc_probe(struct platform_device *pdev)
 	 * being wake-capable; if it didn't, do that here.
 	 */
 	if (!device_can_wakeup(&pdev->dev))
-		device_init_wakeup(&pdev->dev, 1);
+		device_init_wakeup(&pdev->dev, true);
 
 	if (at91_rtc_config->has_correction)
 		rtc->ops = &sama5d4_rtc_ops;
diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c
index 15b21da2788f6..38991cca59308 100644
--- a/drivers/rtc/rtc-at91sam9.c
+++ b/drivers/rtc/rtc-at91sam9.c
@@ -353,7 +353,7 @@ static int at91_rtc_probe(struct platform_device *pdev)
 
 	/* platform setup code should have handled this; sigh */
 	if (!device_can_wakeup(&pdev->dev))
-		device_init_wakeup(&pdev->dev, 1);
+		device_init_wakeup(&pdev->dev, true);
 
 	platform_set_drvdata(pdev, rtc);
 
diff --git a/drivers/rtc/rtc-cadence.c b/drivers/rtc/rtc-cadence.c
index bf2a9a1fdea74..8634eea799ab0 100644
--- a/drivers/rtc/rtc-cadence.c
+++ b/drivers/rtc/rtc-cadence.c
@@ -359,7 +359,7 @@ static void cdns_rtc_remove(struct platform_device *pdev)
 	struct cdns_rtc *crtc = platform_get_drvdata(pdev);
 
 	cdns_rtc_alarm_irq_enable(&pdev->dev, 0);
-	device_init_wakeup(&pdev->dev, 0);
+	device_init_wakeup(&pdev->dev, false);
 
 	clk_disable_unprepare(crtc->pclk);
 	clk_disable_unprepare(crtc->ref_clk);
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 78f2ce12c75a7..4bd3a3a04d444 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -864,7 +864,7 @@ static void acpi_cmos_wake_setup(struct device *dev)
 		dev_info(dev, "RTC can wake from S4\n");
 
 	/* RTC always wakes from S1/S2/S3, and often S4/STD */
-	device_init_wakeup(dev, 1);
+	device_init_wakeup(dev, true);
 }
 
 static void cmos_check_acpi_rtc_status(struct device *dev,
diff --git a/drivers/rtc/rtc-cpcap.c b/drivers/rtc/rtc-cpcap.c
index afc8fcba8f888..568a89e79c114 100644
--- a/drivers/rtc/rtc-cpcap.c
+++ b/drivers/rtc/rtc-cpcap.c
@@ -295,7 +295,7 @@ static int cpcap_rtc_probe(struct platform_device *pdev)
 	}
 	disable_irq(rtc->update_irq);
 
-	err = device_init_wakeup(dev, 1);
+	err = device_init_wakeup(dev, true);
 	if (err) {
 		dev_err(dev, "wakeup initialization failed (%d)\n", err);
 		/* ignore error and continue without wakeup support */
diff --git a/drivers/rtc/rtc-cros-ec.c b/drivers/rtc/rtc-cros-ec.c
index 60a48c3ba3ca5..865c2e82c7a5b 100644
--- a/drivers/rtc/rtc-cros-ec.c
+++ b/drivers/rtc/rtc-cros-ec.c
@@ -337,7 +337,7 @@ static int cros_ec_rtc_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	ret = device_init_wakeup(&pdev->dev, 1);
+	ret = device_init_wakeup(&pdev->dev, true);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to initialize wakeup\n");
 		return ret;
diff --git a/drivers/rtc/rtc-da9055.c b/drivers/rtc/rtc-da9055.c
index 844168fcae1e2..05adec6b77bff 100644
--- a/drivers/rtc/rtc-da9055.c
+++ b/drivers/rtc/rtc-da9055.c
@@ -288,7 +288,7 @@ static int da9055_rtc_probe(struct platform_device *pdev)
 	if (ret & DA9055_RTC_ALM_EN)
 		rtc->alarm_enable = 1;
 
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 
 	rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
 					&da9055_rtc_ops, THIS_MODULE);
diff --git a/drivers/rtc/rtc-ds3232.c b/drivers/rtc/rtc-ds3232.c
index dd37b055693c0..19c09c4187462 100644
--- a/drivers/rtc/rtc-ds3232.c
+++ b/drivers/rtc/rtc-ds3232.c
@@ -508,7 +508,7 @@ static int ds3232_probe(struct device *dev, struct regmap *regmap, int irq,
 		return ret;
 
 	if (ds3232->irq > 0)
-		device_init_wakeup(dev, 1);
+		device_init_wakeup(dev, true);
 
 	ds3232_hwmon_register(dev, name);
 
diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c
index 7b82e4a14b7a2..f71a6bb77b2a1 100644
--- a/drivers/rtc/rtc-isl1208.c
+++ b/drivers/rtc/rtc-isl1208.c
@@ -830,7 +830,7 @@ static int isl1208_setup_irq(struct i2c_client *client, int irq)
 					isl1208_driver.driver.name,
 					client);
 	if (!rc) {
-		device_init_wakeup(&client->dev, 1);
+		device_init_wakeup(&client->dev, true);
 		enable_irq_wake(irq);
 	} else {
 		dev_err(&client->dev,
diff --git a/drivers/rtc/rtc-jz4740.c b/drivers/rtc/rtc-jz4740.c
index bafa7d1b9b883..44bba356268ca 100644
--- a/drivers/rtc/rtc-jz4740.c
+++ b/drivers/rtc/rtc-jz4740.c
@@ -367,7 +367,7 @@ static int jz4740_rtc_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, rtc);
 
-	device_init_wakeup(dev, 1);
+	device_init_wakeup(dev, true);
 
 	ret = dev_pm_set_wake_irq(dev, irq);
 	if (ret)
diff --git a/drivers/rtc/rtc-loongson.c b/drivers/rtc/rtc-loongson.c
index 8d713e563d7c0..6f5f4430c2ae3 100644
--- a/drivers/rtc/rtc-loongson.c
+++ b/drivers/rtc/rtc-loongson.c
@@ -329,7 +329,7 @@ static int loongson_rtc_probe(struct platform_device *pdev)
 					     alarm_irq);
 
 		priv->pm_base = regs - priv->config->pm_offset;
-		device_init_wakeup(dev, 1);
+		device_init_wakeup(dev, true);
 
 		if (has_acpi_companion(dev))
 			acpi_install_fixed_event_handler(ACPI_EVENT_RTC,
@@ -360,7 +360,7 @@ static void loongson_rtc_remove(struct platform_device *pdev)
 		acpi_remove_fixed_event_handler(ACPI_EVENT_RTC,
 						loongson_rtc_handler);
 
-	device_init_wakeup(dev, 0);
+	device_init_wakeup(dev, false);
 	loongson_rtc_alarm_irq_enable(dev, 0);
 }
 
diff --git a/drivers/rtc/rtc-lp8788.c b/drivers/rtc/rtc-lp8788.c
index c0b8fbce10827..0793d70507f7c 100644
--- a/drivers/rtc/rtc-lp8788.c
+++ b/drivers/rtc/rtc-lp8788.c
@@ -293,7 +293,7 @@ static int lp8788_rtc_probe(struct platform_device *pdev)
 	rtc->alarm = lp->pdata ? lp->pdata->alarm_sel : DEFAULT_ALARM_SEL;
 	platform_set_drvdata(pdev, rtc);
 
-	device_init_wakeup(dev, 1);
+	device_init_wakeup(dev, true);
 
 	rtc->rdev = devm_rtc_device_register(dev, "lp8788_rtc",
 					&lp8788_rtc_ops, THIS_MODULE);
diff --git a/drivers/rtc/rtc-lpc32xx.c b/drivers/rtc/rtc-lpc32xx.c
index 76ad7031a13dd..74280bffe1b07 100644
--- a/drivers/rtc/rtc-lpc32xx.c
+++ b/drivers/rtc/rtc-lpc32xx.c
@@ -257,7 +257,7 @@ static int lpc32xx_rtc_probe(struct platform_device *pdev)
 			dev_warn(&pdev->dev, "Can't request interrupt.\n");
 			rtc->irq = -1;
 		} else {
-			device_init_wakeup(&pdev->dev, 1);
+			device_init_wakeup(&pdev->dev, true);
 		}
 	}
 
diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c
index a8f4b645c09d2..7bb044d2ac25a 100644
--- a/drivers/rtc/rtc-max77686.c
+++ b/drivers/rtc/rtc-max77686.c
@@ -770,7 +770,7 @@ static int max77686_rtc_probe(struct platform_device *pdev)
 		goto err_rtc;
 	}
 
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 
 	info->rtc_dev = devm_rtc_device_register(&pdev->dev, id->name,
 					&max77686_rtc_ops, THIS_MODULE);
diff --git a/drivers/rtc/rtc-max8925.c b/drivers/rtc/rtc-max8925.c
index 64bb8ac6ef62d..6ce8afbeac680 100644
--- a/drivers/rtc/rtc-max8925.c
+++ b/drivers/rtc/rtc-max8925.c
@@ -270,7 +270,7 @@ static int max8925_rtc_probe(struct platform_device *pdev)
 	/* XXX - isn't this redundant? */
 	platform_set_drvdata(pdev, info);
 
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 
 	info->rtc_dev = devm_rtc_device_register(&pdev->dev, "max8925-rtc",
 					&max8925_rtc_ops, THIS_MODULE);
diff --git a/drivers/rtc/rtc-max8997.c b/drivers/rtc/rtc-max8997.c
index 20e50d9fdf882..e7618d715bd89 100644
--- a/drivers/rtc/rtc-max8997.c
+++ b/drivers/rtc/rtc-max8997.c
@@ -473,7 +473,7 @@ static int max8997_rtc_probe(struct platform_device *pdev)
 	max8997_rtc_enable_wtsr(info, true);
 	max8997_rtc_enable_smpl(info, true);
 
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 
 	info->rtc_dev = devm_rtc_device_register(&pdev->dev, "max8997-rtc",
 					&max8997_rtc_ops, THIS_MODULE);
diff --git a/drivers/rtc/rtc-meson-vrtc.c b/drivers/rtc/rtc-meson-vrtc.c
index 648fa362ec447..5849729f7d01d 100644
--- a/drivers/rtc/rtc-meson-vrtc.c
+++ b/drivers/rtc/rtc-meson-vrtc.c
@@ -74,7 +74,7 @@ static int meson_vrtc_probe(struct platform_device *pdev)
 	if (IS_ERR(vrtc->io_alarm))
 		return PTR_ERR(vrtc->io_alarm);
 
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 
 	platform_set_drvdata(pdev, vrtc);
 
diff --git a/drivers/rtc/rtc-mpc5121.c b/drivers/rtc/rtc-mpc5121.c
index 6003281316031..b90f8337a7e6d 100644
--- a/drivers/rtc/rtc-mpc5121.c
+++ b/drivers/rtc/rtc-mpc5121.c
@@ -303,7 +303,7 @@ static int mpc5121_rtc_probe(struct platform_device *op)
 		return PTR_ERR(rtc->regs);
 	}
 
-	device_init_wakeup(&op->dev, 1);
+	device_init_wakeup(&op->dev, true);
 
 	platform_set_drvdata(op, rtc);
 
diff --git a/drivers/rtc/rtc-mt6397.c b/drivers/rtc/rtc-mt6397.c
index 152699219a2b9..6979d225a78e4 100644
--- a/drivers/rtc/rtc-mt6397.c
+++ b/drivers/rtc/rtc-mt6397.c
@@ -286,7 +286,7 @@ static int mtk_rtc_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 
 	rtc->rtc_dev->ops = &mtk_rtc_ops;
 	rtc->rtc_dev->range_min = RTC_TIMESTAMP_BEGIN_1900;
diff --git a/drivers/rtc/rtc-mv.c b/drivers/rtc/rtc-mv.c
index 51029c5362441..c27ad626d09fc 100644
--- a/drivers/rtc/rtc-mv.c
+++ b/drivers/rtc/rtc-mv.c
@@ -264,7 +264,7 @@ static int __init mv_rtc_probe(struct platform_device *pdev)
 	}
 
 	if (pdata->irq >= 0)
-		device_init_wakeup(&pdev->dev, 1);
+		device_init_wakeup(&pdev->dev, true);
 	else
 		clear_bit(RTC_FEATURE_ALARM, pdata->rtc->features);
 
@@ -287,7 +287,7 @@ static void __exit mv_rtc_remove(struct platform_device *pdev)
 	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
 
 	if (pdata->irq >= 0)
-		device_init_wakeup(&pdev->dev, 0);
+		device_init_wakeup(&pdev->dev, false);
 
 	if (!IS_ERR(pdata->clk))
 		clk_disable_unprepare(pdata->clk);
diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c
index dbb935dbbd8ab..608db97d450c5 100644
--- a/drivers/rtc/rtc-mxc.c
+++ b/drivers/rtc/rtc-mxc.c
@@ -377,7 +377,7 @@ static int mxc_rtc_probe(struct platform_device *pdev)
 	}
 
 	if (pdata->irq >= 0) {
-		device_init_wakeup(&pdev->dev, 1);
+		device_init_wakeup(&pdev->dev, true);
 		ret = dev_pm_set_wake_irq(&pdev->dev, pdata->irq);
 		if (ret)
 			dev_err(&pdev->dev, "failed to enable irq wake\n");
diff --git a/drivers/rtc/rtc-mxc_v2.c b/drivers/rtc/rtc-mxc_v2.c
index 13c041bb79f16..570f27af4732e 100644
--- a/drivers/rtc/rtc-mxc_v2.c
+++ b/drivers/rtc/rtc-mxc_v2.c
@@ -302,7 +302,7 @@ static int mxc_rtc_probe(struct platform_device *pdev)
 	if (pdata->irq < 0)
 		return pdata->irq;
 
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 	ret = dev_pm_set_wake_irq(&pdev->dev, pdata->irq);
 	if (ret)
 		dev_err(&pdev->dev, "failed to enable irq wake\n");
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index c123778e2d9bc..0f90065e352cb 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -920,7 +920,7 @@ static void omap_rtc_remove(struct platform_device *pdev)
 		omap_rtc_power_off_rtc = NULL;
 	}
 
-	device_init_wakeup(&pdev->dev, 0);
+	device_init_wakeup(&pdev->dev, false);
 
 	if (!IS_ERR(rtc->clk))
 		clk_disable_unprepare(rtc->clk);
diff --git a/drivers/rtc/rtc-palmas.c b/drivers/rtc/rtc-palmas.c
index 7256a88b490c9..aecada6bcf8b5 100644
--- a/drivers/rtc/rtc-palmas.c
+++ b/drivers/rtc/rtc-palmas.c
@@ -287,7 +287,7 @@ static int palmas_rtc_probe(struct platform_device *pdev)
 
 	palmas_rtc->irq = platform_get_irq(pdev, 0);
 
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 	palmas_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
 				&palmas_rtc_ops, THIS_MODULE);
 	if (IS_ERR(palmas_rtc->rtc)) {
diff --git a/drivers/rtc/rtc-pic32.c b/drivers/rtc/rtc-pic32.c
index bed3c27e665f3..2812da2c50c51 100644
--- a/drivers/rtc/rtc-pic32.c
+++ b/drivers/rtc/rtc-pic32.c
@@ -330,7 +330,7 @@ static int pic32_rtc_probe(struct platform_device *pdev)
 
 	pic32_rtc_enable(pdata, 1);
 
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 
 	pdata->rtc->ops = &pic32_rtcops;
 	pdata->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c
index 2f32187ecc8d3..b2518aea4218f 100644
--- a/drivers/rtc/rtc-pm8xxx.c
+++ b/drivers/rtc/rtc-pm8xxx.c
@@ -503,7 +503,7 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, rtc_dd);
 
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 
 	rtc_dd->rtc = devm_rtc_allocate_device(&pdev->dev);
 	if (IS_ERR(rtc_dd->rtc))
diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c
index 34d8545c8e155..62ee6b8f9bcd6 100644
--- a/drivers/rtc/rtc-pxa.c
+++ b/drivers/rtc/rtc-pxa.c
@@ -360,7 +360,7 @@ static int __init pxa_rtc_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	device_init_wakeup(dev, 1);
+	device_init_wakeup(dev, true);
 
 	return 0;
 }
diff --git a/drivers/rtc/rtc-rc5t583.c b/drivers/rtc/rtc-rc5t583.c
index eecb49bab56ad..8ba9cda74acf1 100644
--- a/drivers/rtc/rtc-rc5t583.c
+++ b/drivers/rtc/rtc-rc5t583.c
@@ -245,7 +245,7 @@ static int rc5t583_rtc_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "IRQ is not free.\n");
 		return ret;
 	}
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 
 	ricoh_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
 		&rc5t583_rtc_ops, THIS_MODULE);
diff --git a/drivers/rtc/rtc-rc5t619.c b/drivers/rtc/rtc-rc5t619.c
index 711f62eecd798..74d1691020741 100644
--- a/drivers/rtc/rtc-rc5t619.c
+++ b/drivers/rtc/rtc-rc5t619.c
@@ -414,7 +414,7 @@ static int rc5t619_rtc_probe(struct platform_device *pdev)
 
 		} else {
 			/* enable wake */
-			device_init_wakeup(&pdev->dev, 1);
+			device_init_wakeup(&pdev->dev, true);
 			enable_irq_wake(rtc->irq);
 		}
 	} else {
diff --git a/drivers/rtc/rtc-renesas-rtca3.c b/drivers/rtc/rtc-renesas-rtca3.c
index d127933bfc8ad..a056291d38876 100644
--- a/drivers/rtc/rtc-renesas-rtca3.c
+++ b/drivers/rtc/rtc-renesas-rtca3.c
@@ -768,7 +768,7 @@ static int rtca3_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 
 	priv->rtc_dev = devm_rtc_allocate_device(&pdev->dev);
 	if (IS_ERR(priv->rtc_dev))
diff --git a/drivers/rtc/rtc-rk808.c b/drivers/rtc/rtc-rk808.c
index 2d9bcb3ce1e3b..59b8e9a30fe67 100644
--- a/drivers/rtc/rtc-rk808.c
+++ b/drivers/rtc/rtc-rk808.c
@@ -418,7 +418,7 @@ static int rk808_rtc_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 
 	rk808_rtc->rtc = devm_rtc_allocate_device(&pdev->dev);
 	if (IS_ERR(rk808_rtc->rtc))
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index c0ac3bdb2f427..58c957eb753d8 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -456,7 +456,7 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 	dev_dbg(&pdev->dev, "s3c2410_rtc: RTCCON=%02x\n",
 		readw(info->base + S3C2410_RTCCON));
 
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 
 	info->rtc = devm_rtc_allocate_device(&pdev->dev);
 	if (IS_ERR(info->rtc)) {
diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c
index dad294a0ce2aa..36acca5b2639e 100644
--- a/drivers/rtc/rtc-s5m.c
+++ b/drivers/rtc/rtc-s5m.c
@@ -729,7 +729,7 @@ static int s5m_rtc_probe(struct platform_device *pdev)
 				info->irq, ret);
 			return ret;
 		}
-		device_init_wakeup(&pdev->dev, 1);
+		device_init_wakeup(&pdev->dev, true);
 	}
 
 	return devm_rtc_register_device(info->rtc_dev);
diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c
index 13799b1abca1a..1ad93648d69c0 100644
--- a/drivers/rtc/rtc-sa1100.c
+++ b/drivers/rtc/rtc-sa1100.c
@@ -292,7 +292,7 @@ static int sa1100_rtc_probe(struct platform_device *pdev)
 	}
 
 	platform_set_drvdata(pdev, info);
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 
 	return sa1100_rtc_init(pdev, info);
 }
diff --git a/drivers/rtc/rtc-sc27xx.c b/drivers/rtc/rtc-sc27xx.c
index ce7a2ddbbc16b..2b83561d4d280 100644
--- a/drivers/rtc/rtc-sc27xx.c
+++ b/drivers/rtc/rtc-sc27xx.c
@@ -613,14 +613,14 @@ static int sprd_rtc_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 
 	rtc->rtc->ops = &sprd_rtc_ops;
 	rtc->rtc->range_min = 0;
 	rtc->rtc->range_max = 5662310399LL;
 	ret = devm_rtc_register_device(rtc->rtc);
 	if (ret) {
-		device_init_wakeup(&pdev->dev, 0);
+		device_init_wakeup(&pdev->dev, false);
 		return ret;
 	}
 
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index a5df521876ba0..9ea40f40188f3 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -611,7 +611,7 @@ static int __init sh_rtc_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_unmap;
 
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 	return 0;
 
 err_unmap:
diff --git a/drivers/rtc/rtc-spear.c b/drivers/rtc/rtc-spear.c
index 26eed927f8b31..959acff8faff0 100644
--- a/drivers/rtc/rtc-spear.c
+++ b/drivers/rtc/rtc-spear.c
@@ -395,7 +395,7 @@ static int spear_rtc_probe(struct platform_device *pdev)
 		goto err_disable_clock;
 
 	if (!device_can_wakeup(&pdev->dev))
-		device_init_wakeup(&pdev->dev, 1);
+		device_init_wakeup(&pdev->dev, true);
 
 	return 0;
 
@@ -411,7 +411,7 @@ static void spear_rtc_remove(struct platform_device *pdev)
 
 	spear_rtc_disable_interrupt(config);
 	clk_disable_unprepare(config->clk);
-	device_init_wakeup(&pdev->dev, 0);
+	device_init_wakeup(&pdev->dev, false);
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c
index e681c1745866e..e5e6013d080e6 100644
--- a/drivers/rtc/rtc-sun6i.c
+++ b/drivers/rtc/rtc-sun6i.c
@@ -826,7 +826,7 @@ static int sun6i_rtc_probe(struct platform_device *pdev)
 
 	clk_prepare_enable(chip->losc);
 
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 
 	chip->rtc = devm_rtc_allocate_device(&pdev->dev);
 	if (IS_ERR(chip->rtc))
diff --git a/drivers/rtc/rtc-sunplus.c b/drivers/rtc/rtc-sunplus.c
index 9b1ce0e8ba27e..519a06e728d6c 100644
--- a/drivers/rtc/rtc-sunplus.c
+++ b/drivers/rtc/rtc-sunplus.c
@@ -269,7 +269,7 @@ static int sp_rtc_probe(struct platform_device *plat_dev)
 	if (ret)
 		goto free_reset_assert;
 
-	device_init_wakeup(&plat_dev->dev, 1);
+	device_init_wakeup(&plat_dev->dev, true);
 	dev_set_drvdata(&plat_dev->dev, sp_rtc);
 
 	sp_rtc->rtc = devm_rtc_allocate_device(&plat_dev->dev);
@@ -307,7 +307,7 @@ static void sp_rtc_remove(struct platform_device *plat_dev)
 {
 	struct sunplus_rtc *sp_rtc = dev_get_drvdata(&plat_dev->dev);
 
-	device_init_wakeup(&plat_dev->dev, 0);
+	device_init_wakeup(&plat_dev->dev, false);
 	reset_control_assert(sp_rtc->rstc);
 	clk_disable_unprepare(sp_rtc->rtcclk);
 }
diff --git a/drivers/rtc/rtc-tegra.c b/drivers/rtc/rtc-tegra.c
index 79a3102c83549..46788db899533 100644
--- a/drivers/rtc/rtc-tegra.c
+++ b/drivers/rtc/rtc-tegra.c
@@ -319,7 +319,7 @@ static int tegra_rtc_probe(struct platform_device *pdev)
 	writel(0xffffffff, info->base + TEGRA_RTC_REG_INTR_STATUS);
 	writel(0, info->base + TEGRA_RTC_REG_INTR_MASK);
 
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 
 	ret = devm_request_irq(&pdev->dev, info->irq, tegra_rtc_irq_handler,
 			       IRQF_TRIGGER_HIGH, dev_name(&pdev->dev),
diff --git a/drivers/rtc/rtc-test.c b/drivers/rtc/rtc-test.c
index 7e0d8fb264655..a68b8c8841023 100644
--- a/drivers/rtc/rtc-test.c
+++ b/drivers/rtc/rtc-test.c
@@ -132,7 +132,7 @@ static int test_probe(struct platform_device *plat_dev)
 		break;
 	default:
 		rtd->rtc->ops = &test_rtc_ops;
-		device_init_wakeup(&plat_dev->dev, 1);
+		device_init_wakeup(&plat_dev->dev, true);
 	}
 
 	timer_setup(&rtd->alarm, test_rtc_alarm_handler, 0);
diff --git a/drivers/rtc/rtc-tps6586x.c b/drivers/rtc/rtc-tps6586x.c
index e796729fc817c..54c8429b16bfc 100644
--- a/drivers/rtc/rtc-tps6586x.c
+++ b/drivers/rtc/rtc-tps6586x.c
@@ -241,7 +241,7 @@ static int tps6586x_rtc_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 
 	platform_set_drvdata(pdev, rtc);
 	rtc->rtc = devm_rtc_allocate_device(&pdev->dev);
diff --git a/drivers/rtc/rtc-tps65910.c b/drivers/rtc/rtc-tps65910.c
index 2ea1bbfbbc2aa..284aa2f0392b3 100644
--- a/drivers/rtc/rtc-tps65910.c
+++ b/drivers/rtc/rtc-tps65910.c
@@ -418,7 +418,7 @@ static int tps65910_rtc_probe(struct platform_device *pdev)
 	tps_rtc->irq = irq;
 	if (irq != -1) {
 		if (device_property_present(tps65910->dev, "wakeup-source"))
-			device_init_wakeup(&pdev->dev, 1);
+			device_init_wakeup(&pdev->dev, true);
 		else
 			device_set_wakeup_capable(&pdev->dev, 1);
 	} else {
diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c
index 794429182b348..e6106e67e1f40 100644
--- a/drivers/rtc/rtc-twl.c
+++ b/drivers/rtc/rtc-twl.c
@@ -572,7 +572,7 @@ static int twl_rtc_probe(struct platform_device *pdev)
 		return ret;
 
 	platform_set_drvdata(pdev, twl_rtc);
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 
 	twl_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
 					&twl_rtc_ops, THIS_MODULE);
diff --git a/drivers/rtc/rtc-wm831x.c b/drivers/rtc/rtc-wm831x.c
index 640833e210575..218316be942ae 100644
--- a/drivers/rtc/rtc-wm831x.c
+++ b/drivers/rtc/rtc-wm831x.c
@@ -420,7 +420,7 @@ static int wm831x_rtc_probe(struct platform_device *pdev)
 	if (ret & WM831X_RTC_ALM_ENA)
 		wm831x_rtc->alarm_enabled = 1;
 
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 
 	wm831x_rtc->rtc = devm_rtc_allocate_device(&pdev->dev);
 	if (IS_ERR(wm831x_rtc->rtc))
diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c
index 6797eb4d2e493..3bd60d067a5ee 100644
--- a/drivers/rtc/rtc-wm8350.c
+++ b/drivers/rtc/rtc-wm8350.c
@@ -420,7 +420,7 @@ static int wm8350_rtc_probe(struct platform_device *pdev)
 		}
 	}
 
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 
 	wm_rtc->rtc = devm_rtc_device_register(&pdev->dev, "wm8350",
 					&wm8350_rtc_ops, THIS_MODULE);
diff --git a/drivers/rtc/rtc-xgene.c b/drivers/rtc/rtc-xgene.c
index 0813ea1a03c27..6660b664e8dd3 100644
--- a/drivers/rtc/rtc-xgene.c
+++ b/drivers/rtc/rtc-xgene.c
@@ -174,7 +174,7 @@ static int xgene_rtc_probe(struct platform_device *pdev)
 	/* Turn on the clock and the crystal */
 	writel(RTC_CCR_EN, pdata->csr_base + RTC_CCR);
 
-	ret = device_init_wakeup(&pdev->dev, 1);
+	ret = device_init_wakeup(&pdev->dev, true);
 	if (ret) {
 		clk_disable_unprepare(pdata->clk);
 		return ret;
@@ -197,7 +197,7 @@ static void xgene_rtc_remove(struct platform_device *pdev)
 	struct xgene_rtc_dev *pdata = platform_get_drvdata(pdev);
 
 	xgene_rtc_alarm_irq_enable(&pdev->dev, 0);
-	device_init_wakeup(&pdev->dev, 0);
+	device_init_wakeup(&pdev->dev, false);
 	clk_disable_unprepare(pdata->clk);
 }
 
diff --git a/drivers/rtc/rtc-zynqmp.c b/drivers/rtc/rtc-zynqmp.c
index af1abb69d1e32..625f708a7cafc 100644
--- a/drivers/rtc/rtc-zynqmp.c
+++ b/drivers/rtc/rtc-zynqmp.c
@@ -337,7 +337,7 @@ static int xlnx_rtc_probe(struct platform_device *pdev)
 
 	xlnx_init_rtc(xrtcdev);
 
-	device_init_wakeup(&pdev->dev, 1);
+	device_init_wakeup(&pdev->dev, true);
 
 	return devm_rtc_register_device(xrtcdev->rtc);
 }
@@ -345,7 +345,7 @@ static int xlnx_rtc_probe(struct platform_device *pdev)
 static void xlnx_rtc_remove(struct platform_device *pdev)
 {
 	xlnx_rtc_alarm_irq_enable(&pdev->dev, 0);
-	device_init_wakeup(&pdev->dev, 0);
+	device_init_wakeup(&pdev->dev, false);
 }
 
 static int __maybe_unused xlnx_rtc_suspend(struct device *dev)

From aa36314c2d08c47fe835e60c6cf72bf5d0ed4d68 Mon Sep 17 00:00:00 2001
From: Tiwei Bie <tiwei.btw@antgroup.com>
Date: Thu, 28 Nov 2024 16:19:37 +0800
Subject: [PATCH 021/368] um: Remove unused MODULES_LEN macro

It's not used anywhere. And its definition doesn't seem right.

Signed-off-by: Tiwei Bie <tiwei.btw@antgroup.com>
Link: https://patch.msgid.link/20241128081939.2216246-2-tiwei.btw@antgroup.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 arch/um/include/asm/pgtable.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 0bd60afcc37d5..80338fca160dc 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -52,7 +52,6 @@ extern unsigned long end_iomem;
 #define VMALLOC_END	(FIXADDR_START-2*PAGE_SIZE)
 #define MODULES_VADDR	VMALLOC_START
 #define MODULES_END	VMALLOC_END
-#define MODULES_LEN	(MODULES_VADDR - MODULES_END)
 
 #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
 #define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)

From 5bfc4a3a0af3dcf53516e5f0dd9a2649bcd05bad Mon Sep 17 00:00:00 2001
From: Tiwei Bie <tiwei.btw@antgroup.com>
Date: Thu, 28 Nov 2024 16:19:38 +0800
Subject: [PATCH 022/368] um: Remove obsolete fixmap support

It was added to support highmem. But since the highmem support has
been removed by commit a98a6d864d3b ("um: Remove broken highmem
support"), it is no longer needed. Remove it to simplify the code.

Signed-off-by: Tiwei Bie <tiwei.btw@antgroup.com>
Link: https://patch.msgid.link/20241128081939.2216246-3-tiwei.btw@antgroup.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 arch/um/include/asm/fixmap.h  | 56 -----------------------------------
 arch/um/include/asm/pgtable.h |  5 +---
 arch/um/kernel/mem.c          | 15 +++-------
 3 files changed, 5 insertions(+), 71 deletions(-)
 delete mode 100644 arch/um/include/asm/fixmap.h

diff --git a/arch/um/include/asm/fixmap.h b/arch/um/include/asm/fixmap.h
deleted file mode 100644
index 2efac58271880..0000000000000
--- a/arch/um/include/asm/fixmap.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __UM_FIXMAP_H
-#define __UM_FIXMAP_H
-
-#include <asm/processor.h>
-#include <asm/archparam.h>
-#include <asm/page.h>
-#include <linux/threads.h>
-
-/*
- * Here we define all the compile-time 'special' virtual
- * addresses. The point is to have a constant address at
- * compile time, but to set the physical address only
- * in the boot process. We allocate these special  addresses
- * from the end of virtual memory (0xfffff000) backwards.
- * Also this lets us do fail-safe vmalloc(), we
- * can guarantee that these special addresses and
- * vmalloc()-ed addresses never overlap.
- *
- * these 'compile-time allocated' memory buffers are
- * fixed-size 4k pages. (or larger if used with an increment
- * highger than 1) use fixmap_set(idx,phys) to associate
- * physical memory with fixmap indices.
- *
- * TLB entries of such buffers will not be flushed across
- * task switches.
- */
-
-/*
- * on UP currently we will have no trace of the fixmap mechanizm,
- * no page table allocations, etc. This might change in the
- * future, say framebuffers for the console driver(s) could be
- * fix-mapped?
- */
-enum fixed_addresses {
-	__end_of_fixed_addresses
-};
-
-extern void __set_fixmap (enum fixed_addresses idx,
-			  unsigned long phys, pgprot_t flags);
-
-/*
- * used by vmalloc.c.
- *
- * Leave one empty page between vmalloc'ed areas and
- * the start of the fixmap, and leave one page empty
- * at the top of mem..
- */
-
-#define FIXADDR_TOP	(TASK_SIZE - 2 * PAGE_SIZE)
-#define FIXADDR_SIZE	(__end_of_fixed_addresses << PAGE_SHIFT)
-#define FIXADDR_START	(FIXADDR_TOP - FIXADDR_SIZE)
-
-#include <asm-generic/fixmap.h>
-
-#endif
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 80338fca160dc..9be6daca95be4 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -8,8 +8,6 @@
 #ifndef __UM_PGTABLE_H
 #define __UM_PGTABLE_H
 
-#include <asm/fixmap.h>
-
 #define _PAGE_PRESENT	0x001
 #define _PAGE_NEEDSYNC	0x002
 #define _PAGE_RW	0x020
@@ -48,8 +46,7 @@ extern unsigned long end_iomem;
 
 #define VMALLOC_OFFSET	(__va_space)
 #define VMALLOC_START ((end_iomem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))
-#define PKMAP_BASE ((FIXADDR_START - LAST_PKMAP * PAGE_SIZE) & PMD_MASK)
-#define VMALLOC_END	(FIXADDR_START-2*PAGE_SIZE)
+#define VMALLOC_END	(TASK_SIZE-2*PAGE_SIZE)
 #define MODULES_VADDR	VMALLOC_START
 #define MODULES_END	VMALLOC_END
 
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 53248ed04771d..8a0e74ad00d11 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -9,7 +9,6 @@
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
-#include <asm/fixmap.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
 #include <as-layout.h>
@@ -74,6 +73,7 @@ void __init mem_init(void)
 	kmalloc_ok = 1;
 }
 
+#if IS_ENABLED(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA)
 /*
  * Create a page table and place a pointer to it in a middle page
  * directory entry.
@@ -152,7 +152,6 @@ static void __init fixrange_init(unsigned long start, unsigned long end,
 
 static void __init fixaddr_user_init( void)
 {
-#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA
 	long size = FIXADDR_USER_END - FIXADDR_USER_START;
 	pte_t *pte;
 	phys_t p;
@@ -174,13 +173,12 @@ static void __init fixaddr_user_init( void)
 		pte = virt_to_kpte(vaddr);
 		pte_set_val(*pte, p, PAGE_READONLY);
 	}
-#endif
 }
+#endif
 
 void __init paging_init(void)
 {
 	unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 };
-	unsigned long vaddr;
 
 	empty_zero_page = (unsigned long *) memblock_alloc_low(PAGE_SIZE,
 							       PAGE_SIZE);
@@ -191,14 +189,9 @@ void __init paging_init(void)
 	max_zone_pfn[ZONE_NORMAL] = end_iomem >> PAGE_SHIFT;
 	free_area_init(max_zone_pfn);
 
-	/*
-	 * Fixed mappings, only the page table structure has to be
-	 * created - mappings will be set by set_fixmap():
-	 */
-	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
-	fixrange_init(vaddr, FIXADDR_TOP, swapper_pg_dir);
-
+#if IS_ENABLED(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA)
 	fixaddr_user_init();
+#endif
 }
 
 /*

From 06e0e6295957592cfceaa21124a49b55c98470c9 Mon Sep 17 00:00:00 2001
From: Tiwei Bie <tiwei.btw@antgroup.com>
Date: Thu, 28 Nov 2024 16:19:39 +0800
Subject: [PATCH 023/368] um: Count iomem_size only once in physmem calculation

When calculating max_physmem, we've already factored in the space
used by iomem. We don't need to subtract it again.

Signed-off-by: Tiwei Bie <tiwei.btw@antgroup.com>
Link: https://patch.msgid.link/20241128081939.2216246-4-tiwei.btw@antgroup.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 arch/um/kernel/um_arch.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 8037a967225d8..c82e26baefc4f 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -376,9 +376,8 @@ int __init linux_main(int argc, char **argv, char **envp)
 	iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK;
 
 	max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC;
-
-	if (physmem_size + iomem_size > max_physmem) {
-		physmem_size = max_physmem - iomem_size;
+	if (physmem_size > max_physmem) {
+		physmem_size = max_physmem;
 		os_info("Physical memory size shrunk to %llu bytes\n",
 			physmem_size);
 	}

From 70240b5d1f5be2a9d37e43e6dc355d07681d2bbc Mon Sep 17 00:00:00 2001
From: Tiwei Bie <tiwei.btw@antgroup.com>
Date: Thu, 28 Nov 2024 16:31:29 +0800
Subject: [PATCH 024/368] um: Mark parse_host_cpu_flags as __init

It's only invoked during boot from get_host_cpu_features().

Signed-off-by: Tiwei Bie <tiwei.btw@antgroup.com>
Link: https://patch.msgid.link/20241128083137.2219830-2-tiwei.btw@antgroup.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 arch/um/kernel/um_arch.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index c82e26baefc4f..d1d438016a14d 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -264,7 +264,7 @@ EXPORT_SYMBOL(end_iomem);
 
 #define MIN_VMALLOC (32 * 1024 * 1024)
 
-static void parse_host_cpu_flags(char *line)
+static void __init parse_host_cpu_flags(char *line)
 {
 	int i;
 	for (i = 0; i < 32*NCAPINTS; i++) {

From bcd89fd8f5f6caf440a52aa3822316f12bc0732d Mon Sep 17 00:00:00 2001
From: Tiwei Bie <tiwei.btw@antgroup.com>
Date: Thu, 28 Nov 2024 16:31:30 +0800
Subject: [PATCH 025/368] um: Mark parse_cache_line as __init

It's only invoked during boot from get_host_cpu_features().

Signed-off-by: Tiwei Bie <tiwei.btw@antgroup.com>
Link: https://patch.msgid.link/20241128083137.2219830-3-tiwei.btw@antgroup.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 arch/um/kernel/um_arch.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index d1d438016a14d..c48cef206828f 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -272,7 +272,8 @@ static void __init parse_host_cpu_flags(char *line)
 			set_cpu_cap(&boot_cpu_data, i);
 	}
 }
-static void parse_cache_line(char *line)
+
+static void __init parse_cache_line(char *line)
 {
 	long res;
 	char *to_parse = strstr(line, ":");

From ae62ae7994a0f453b1566d04ec090cf1bd534675 Mon Sep 17 00:00:00 2001
From: Tiwei Bie <tiwei.btw@antgroup.com>
Date: Thu, 28 Nov 2024 16:31:31 +0800
Subject: [PATCH 026/368] um: Mark get_top_address as __init

It's only invoked during boot from linux_main().

Signed-off-by: Tiwei Bie <tiwei.btw@antgroup.com>
Link: https://patch.msgid.link/20241128083137.2219830-4-tiwei.btw@antgroup.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 arch/um/kernel/um_arch.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index c48cef206828f..79ea97d4797ec 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -289,7 +289,7 @@ static void __init parse_cache_line(char *line)
 	}
 }
 
-static unsigned long get_top_address(char **envp)
+static unsigned long __init get_top_address(char **envp)
 {
 	unsigned long top_addr = (unsigned long) &top_addr;
 	int i;

From f0c76bc82d4544bde874497e830b2c9fa04bafe1 Mon Sep 17 00:00:00 2001
From: Tiwei Bie <tiwei.btw@antgroup.com>
Date: Thu, 28 Nov 2024 16:31:32 +0800
Subject: [PATCH 027/368] um: Mark set_stklim as __init

It's only invoked during boot from main().

Signed-off-by: Tiwei Bie <tiwei.btw@antgroup.com>
Link: https://patch.msgid.link/20241128083137.2219830-5-tiwei.btw@antgroup.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 arch/um/os-Linux/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index 0afcdeb8995b7..cf9697aa17f32 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -25,7 +25,7 @@
 
 long elf_aux_hwcap;
 
-static void set_stklim(void)
+static void __init set_stklim(void)
 {
 	struct rlimit lim;
 

From c2fdfd779717bf1d4bb569c7cb64502143144cad Mon Sep 17 00:00:00 2001
From: Tiwei Bie <tiwei.btw@antgroup.com>
Date: Thu, 28 Nov 2024 16:31:33 +0800
Subject: [PATCH 028/368] um: Mark install_fatal_handler as __init

It's only invoked during boot from main().

Signed-off-by: Tiwei Bie <tiwei.btw@antgroup.com>
Link: https://patch.msgid.link/20241128083137.2219830-6-tiwei.btw@antgroup.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 arch/um/os-Linux/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index cf9697aa17f32..3f394f25e0592 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -48,7 +48,7 @@ static void last_ditch_exit(int sig)
 	exit(1);
 }
 
-static void install_fatal_handler(int sig)
+static void __init install_fatal_handler(int sig)
 {
 	struct sigaction action;
 

From 0c5258efd69b73bd12f3e68cfe72e1396fdb857c Mon Sep 17 00:00:00 2001
From: Tiwei Bie <tiwei.btw@antgroup.com>
Date: Thu, 28 Nov 2024 16:31:34 +0800
Subject: [PATCH 029/368] um: Mark setup_env_path as __init

It's only invoked during boot from main().

Signed-off-by: Tiwei Bie <tiwei.btw@antgroup.com>
Link: https://patch.msgid.link/20241128083137.2219830-7-tiwei.btw@antgroup.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 arch/um/os-Linux/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index 3f394f25e0592..3f0be24b3633b 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -73,7 +73,7 @@ static void __init install_fatal_handler(int sig)
 
 #define UML_LIB_PATH	":" OS_LIB_PATH "/uml"
 
-static void setup_env_path(void)
+static void __init setup_env_path(void)
 {
 	char *new_path = NULL;
 	char *old_path = NULL;

From 3c68810e150b80b9282e854142922b39489a54a5 Mon Sep 17 00:00:00 2001
From: Tiwei Bie <tiwei.btw@antgroup.com>
Date: Thu, 28 Nov 2024 16:31:35 +0800
Subject: [PATCH 030/368] um: Remove unused PGD_BOUND macro

It's no longer used since commit 11100b1dfb6e ("uml: delete
unused code").

Signed-off-by: Tiwei Bie <tiwei.btw@antgroup.com>
Link: https://patch.msgid.link/20241128083137.2219830-8-tiwei.btw@antgroup.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 arch/um/os-Linux/main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index 3f0be24b3633b..13f3bfeb63785 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -19,7 +19,6 @@
 #include <um_malloc.h>
 #include "internal.h"
 
-#define PGD_BOUND (4 * 1024 * 1024)
 #define STACKSIZE (8 * 1024 * 1024)
 #define THREAD_NAME_LEN (256)
 

From c5e78b8d408add9d3562c7c44a727336ecb5d48b Mon Sep 17 00:00:00 2001
From: Tiwei Bie <tiwei.btw@antgroup.com>
Date: Thu, 28 Nov 2024 16:31:36 +0800
Subject: [PATCH 031/368] um: Remove unused THREAD_NAME_LEN macro

It's no longer used since commit 42fda66387da ("uml: throw out
CONFIG_MODE_TT").

Signed-off-by: Tiwei Bie <tiwei.btw@antgroup.com>
Link: https://patch.msgid.link/20241128083137.2219830-9-tiwei.btw@antgroup.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 arch/um/os-Linux/main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index 13f3bfeb63785..3c63ce19e3bf5 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -20,7 +20,6 @@
 #include "internal.h"
 
 #define STACKSIZE (8 * 1024 * 1024)
-#define THREAD_NAME_LEN (256)
 
 long elf_aux_hwcap;
 

From 7ee1e43a5f493a1332af3ac668cc2a87515c1622 Mon Sep 17 00:00:00 2001
From: Tiwei Bie <tiwei.btw@antgroup.com>
Date: Thu, 28 Nov 2024 16:31:37 +0800
Subject: [PATCH 032/368] um: Remove unused user_context function

It's no longer used since commit 6aa802ce6acc ("uml: throw out
CHOOSE_MODE").

Signed-off-by: Tiwei Bie <tiwei.btw@antgroup.com>
Link: https://patch.msgid.link/20241128083137.2219830-10-tiwei.btw@antgroup.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 arch/um/kernel/process.c           | 8 --------
 arch/x86/um/shared/sysdep/ptrace.h | 2 --
 2 files changed, 10 deletions(-)

diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 30bdc0a87dc85..e5a2d4d897e0c 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -213,14 +213,6 @@ int __uml_cant_sleep(void) {
 	/* Is in_interrupt() really needed? */
 }
 
-int user_context(unsigned long sp)
-{
-	unsigned long stack;
-
-	stack = sp & (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER);
-	return stack != (unsigned long) current_thread_info();
-}
-
 extern exitcall_t __uml_exitcall_begin, __uml_exitcall_end;
 
 void do_uml_exitcalls(void)
diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h
index 2dd4ca6713f8b..8f7476ff6e95d 100644
--- a/arch/x86/um/shared/sysdep/ptrace.h
+++ b/arch/x86/um/shared/sysdep/ptrace.h
@@ -74,8 +74,6 @@ struct uml_pt_regs {
 #define UPT_FAULTINFO(r) (&(r)->faultinfo)
 #define UPT_IS_USER(r) ((r)->is_user)
 
-extern int user_context(unsigned long sp);
-
 extern int arch_init_registers(int pid);
 
 #endif /* __SYSDEP_X86_PTRACE_H */

From 579e7fd383ff3f7a4f685489f8fe18cfd8659074 Mon Sep 17 00:00:00 2001
From: Benjamin Berg <benjamin.berg@intel.com>
Date: Tue, 17 Dec 2024 21:49:06 +0100
Subject: [PATCH 033/368] um: rtc: use RTC time when calculating the alarm

The kernel realtime and the current RTC time may have a (small) offset.
Should the kernel time be slightly in the future, then the timeout is
zero. This is problematic in time-travel mode, as a zero timeout can be
correctly configured and time never advances.

Replace the kernel realtime read with a read of the actual persistent
RTC clock. Also, for time-travel, calculate the exact nanoseconds needed
for the clock to advance.

Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Co-developed-by: Avraham Stern <avraham.stern@intel.com>
Link: https://patch.msgid.link/20241217204906.1408011-1-benjamin@sipsolutions.net
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 arch/um/drivers/rtc_kern.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/um/drivers/rtc_kern.c b/arch/um/drivers/rtc_kern.c
index 134a58f93c859..9158c936c1281 100644
--- a/arch/um/drivers/rtc_kern.c
+++ b/arch/um/drivers/rtc_kern.c
@@ -51,6 +51,7 @@ static int uml_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 
 static int uml_rtc_alarm_irq_enable(struct device *dev, unsigned int enable)
 {
+	struct timespec64 ts;
 	unsigned long long secs;
 
 	if (!enable && !uml_rtc_alarm_enabled)
@@ -58,7 +59,8 @@ static int uml_rtc_alarm_irq_enable(struct device *dev, unsigned int enable)
 
 	uml_rtc_alarm_enabled = enable;
 
-	secs = uml_rtc_alarm_time - ktime_get_real_seconds();
+	read_persistent_clock64(&ts);
+	secs = uml_rtc_alarm_time - ts.tv_sec;
 
 	if (time_travel_mode == TT_MODE_OFF) {
 		if (!enable) {
@@ -73,7 +75,8 @@ static int uml_rtc_alarm_irq_enable(struct device *dev, unsigned int enable)
 
 		if (enable)
 			time_travel_add_event_rel(&uml_rtc_alarm_event,
-						  secs * NSEC_PER_SEC);
+						  secs * NSEC_PER_SEC -
+						  ts.tv_nsec);
 	}
 
 	return 0;

From af10dd16655339a48178221fc4b73fbf171dd739 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 20 Dec 2024 05:14:58 +0000
Subject: [PATCH 034/368] hostfs: Convert to writepages

If we add a migrate_folio operation, we can convert the writepage
operation to writepages.  The large folio support here is illusory;
we would need to kmap each page in turn for proper support.  But we do
remove a few hidden calls to compound_head().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Link: https://patch.msgid.link/20241220051500.1919389-1-willy@infradead.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 fs/hostfs/hostfs_kern.c | 54 +++++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 29 deletions(-)

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 7e51d2cec64b4..844c452534525 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -410,38 +410,33 @@ static const struct file_operations hostfs_dir_fops = {
 	.fsync		= hostfs_fsync,
 };
 
-static int hostfs_writepage(struct page *page, struct writeback_control *wbc)
+static int hostfs_writepages(struct address_space *mapping,
+		struct writeback_control *wbc)
 {
-	struct address_space *mapping = page->mapping;
 	struct inode *inode = mapping->host;
-	char *buffer;
-	loff_t base = page_offset(page);
-	int count = PAGE_SIZE;
-	int end_index = inode->i_size >> PAGE_SHIFT;
-	int err;
-
-	if (page->index >= end_index)
-		count = inode->i_size & (PAGE_SIZE-1);
-
-	buffer = kmap_local_page(page);
-
-	err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count);
-	if (err != count) {
-		if (err >= 0)
-			err = -EIO;
-		mapping_set_error(mapping, err);
-		goto out;
+	struct folio *folio = NULL;
+	loff_t i_size = i_size_read(inode);
+	int err = 0;
+
+	while ((folio = writeback_iter(mapping, wbc, folio, &err))) {
+		loff_t pos = folio_pos(folio);
+		size_t count = folio_size(folio);
+		char *buffer;
+		int ret;
+
+		if (count > i_size - pos)
+			count = i_size - pos;
+
+		buffer = kmap_local_folio(folio, 0);
+		ret = write_file(HOSTFS_I(inode)->fd, &pos, buffer, count);
+		kunmap_local(buffer);
+		folio_unlock(folio);
+		if (ret != count) {
+			err = ret < 0 ? ret : -EIO;
+			mapping_set_error(mapping, err);
+		}
 	}
 
-	if (base > inode->i_size)
-		inode->i_size = base;
-
-	err = 0;
-
- out:
-	kunmap_local(buffer);
-	unlock_page(page);
-
 	return err;
 }
 
@@ -506,11 +501,12 @@ static int hostfs_write_end(struct file *file, struct address_space *mapping,
 }
 
 static const struct address_space_operations hostfs_aops = {
-	.writepage 	= hostfs_writepage,
+	.writepages 	= hostfs_writepages,
 	.read_folio	= hostfs_read_folio,
 	.dirty_folio	= filemap_dirty_folio,
 	.write_begin	= hostfs_write_begin,
 	.write_end	= hostfs_write_end,
+	.migrate_folio	= filemap_migrate_folio,
 };
 
 static int hostfs_inode_update(struct inode *ino, const struct hostfs_stat *st)

From d9ecb92b4fbbbb0a9993017da8d044541ca35886 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 3 Dec 2024 22:59:58 +0900
Subject: [PATCH 035/368] kbuild: deb-pkg: do not include empty hook
 directories

The linux-image package currently includes empty hook directories
(/etc/kernel/{pre,post}{inst,rm}.d/ by default).

These directories were perhaps intended as a fail-safe in case no
hook scripts exist there.

However, they are really unnecessary because the run-parts command is
already guarded by the following check:

    test -d ${debhookdir}/${script}.d && run-parts ...

The only difference is that the run-parts command either runs for empty
directories (resulting in a no-op) or is skipped entirely.

The maintainer scripts will succeed without these dummy directories.

The linux-image packages from the Debian kernel do not contain
/etc/kernel/*.d/, either.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/package/builddeb | 2 --
 1 file changed, 2 deletions(-)

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index ad7aba0f268e1..85fe8f56bb9ba 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -76,8 +76,6 @@ install_maint_scripts () {
 	# so do we; recent versions of dracut and initramfs-tools will obey this.
 	debhookdir=${KDEB_HOOKDIR:-/etc/kernel}
 	for script in postinst postrm preinst prerm; do
-		mkdir -p "${pdir}${debhookdir}/${script}.d"
-
 		mkdir -p "${pdir}/DEBIAN"
 		cat <<-EOF > "${pdir}/DEBIAN/${script}"
 		#!/bin/sh

From ac2c30f98f28a6606af89ce44bff77af5d558fe8 Mon Sep 17 00:00:00 2001
From: Johannes Schauer Marin Rodrigues <josch@mister-muffin.de>
Date: Tue, 3 Dec 2024 17:17:35 +0100
Subject: [PATCH 036/368] kbuild: deb-pkg: allow hooks also in
 /usr/share/kernel

By passing an additional directory to run-parts, allow Debian and its
derivatives to ship maintainer scripts in /usr while at the same time
allowing the local admin to override or disable them by placing hooks of
the same name in /etc. This adds support for the mechanism described in
the UAPI Configuration Files Specification for kernel hooks. The same
idea is also used by udev, systemd or modprobe for their config files.
https://uapi-group.org/specifications/specs/configuration_files_specification/

This functionality relies on run-parts 5.21 or later.  It is the
responsibility of packages installing hooks into /usr/share/kernel to
also declare a Depends: debianutils (>= 5.21).

KDEB_HOOKDIR can be used to change the list of directories that is
searched. By default, /etc/kernel and /usr/share/kernel are hook
directories. Since the list of directories in KDEB_HOOKDIR is separated
by spaces, the paths must not contain the space character themselves.

Signed-off-by: Johannes Schauer Marin Rodrigues <josch@mister-muffin.de>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/package/builddeb | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 85fe8f56bb9ba..3627ca227e5a5 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -5,10 +5,12 @@
 #
 # Simple script to generate a deb package for a Linux kernel. All the
 # complexity of what to do with a kernel after it is installed or removed
-# is left to other scripts and packages: they can install scripts in the
-# /etc/kernel/{pre,post}{inst,rm}.d/ directories (or an alternative location
-# specified in KDEB_HOOKDIR) that will be called on package install and
-# removal.
+# is left to other scripts and packages. Scripts can be placed into the
+# preinst, postinst, prerm and postrm directories in /etc/kernel or
+# /usr/share/kernel. A different list of search directories can be given
+# via KDEB_HOOKDIR. Scripts in directories earlier in the list will
+# override scripts of the same name in later directories.  The script will
+# be called on package installation and removal.
 
 set -eu
 
@@ -74,7 +76,7 @@ install_maint_scripts () {
 	# kernel packages, as well as kernel packages built using make-kpkg.
 	# make-kpkg sets $INITRD to indicate whether an initramfs is wanted, and
 	# so do we; recent versions of dracut and initramfs-tools will obey this.
-	debhookdir=${KDEB_HOOKDIR:-/etc/kernel}
+	debhookdir=${KDEB_HOOKDIR:-/etc/kernel /usr/share/kernel}
 	for script in postinst postrm preinst prerm; do
 		mkdir -p "${pdir}/DEBIAN"
 		cat <<-EOF > "${pdir}/DEBIAN/${script}"
@@ -88,7 +90,15 @@ install_maint_scripts () {
 		# Tell initramfs builder whether it's wanted
 		export INITRD=$(if_enabled_echo CONFIG_BLK_DEV_INITRD Yes No)
 
-		test -d ${debhookdir}/${script}.d && run-parts --arg="${KERNELRELEASE}" --arg="/${installed_image_path}" ${debhookdir}/${script}.d
+		# run-parts will error out if one of its directory arguments does not
+		# exist, so filter the list of hook directories accordingly.
+		hookdirs=
+		for dir in ${debhookdir}; do
+			test -d "\$dir/${script}.d" || continue
+			hookdirs="\$hookdirs \$dir/${script}.d"
+		done
+		hookdirs="\${hookdirs# }"
+		test -n "\$hookdirs" && run-parts --arg="${KERNELRELEASE}" --arg="/${installed_image_path}" \$hookdirs
 		exit 0
 		EOF
 		chmod 755 "${pdir}/DEBIAN/${script}"

From 5f73e7d0386d970a7d0e9de5a58d53114de85033 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 10 Dec 2024 19:06:17 +0900
Subject: [PATCH 037/368] kbuild: refactor cross-compiling linux-headers
 package

Since commit 13b25489b6f8 ("kbuild: change working directory to external
module directory with M="), when cross-building host programs for the
linux-headers package, the "Entering directory" and "Leaving directory"
messages appear multiple times, and each object path shown is relative
to the working directory. This makes it difficult to track which objects
are being rebuilt.

In hindsight, using the external module build (M=) was not a good idea.

This commit simplifies the script by leveraging the run-command target,
resulting in a cleaner build log again.

[Before]

  $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- bindeb-pkg
    [ snip ]
  Rebuilding host programs with aarch64-linux-gnu-gcc...
  make[5]: Entering directory '/home/masahiro/linux'
  make[6]: Entering directory '/home/masahiro/linux/debian/linux-headers-6.13.0-rc1+/usr/src/linux-headers-6.13.0-rc1+'
    HOSTCC  scripts/kallsyms
    HOSTCC  scripts/sorttable
    HOSTCC  scripts/asn1_compiler
  make[6]: Leaving directory '/home/masahiro/linux/debian/linux-headers-6.13.0-rc1+/usr/src/linux-headers-6.13.0-rc1+'
  make[5]: Leaving directory '/home/masahiro/linux'
  make[5]: Entering directory '/home/masahiro/linux'
  make[6]: Entering directory '/home/masahiro/linux/debian/linux-headers-6.13.0-rc1+/usr/src/linux-headers-6.13.0-rc1+'
    HOSTCC  scripts/basic/fixdep
    HOSTCC  scripts/mod/modpost.o
    HOSTCC  scripts/mod/file2alias.o
    HOSTCC  scripts/mod/sumversion.o
    HOSTCC  scripts/mod/symsearch.o
    HOSTLD  scripts/mod/modpost
  make[6]: Leaving directory '/home/masahiro/linux/debian/linux-headers-6.13.0-rc1+/usr/src/linux-headers-6.13.0-rc1+'
  make[5]: Leaving directory '/home/masahiro/linux'

[After]

  $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- bindeb-pkg
    [ snip ]
    HOSTCC  debian/linux-headers-6.13.0-rc1+/usr/src/linux-headers-6.13.0-rc1+/scripts/basic/fixdep
    HOSTCC  debian/linux-headers-6.13.0-rc1+/usr/src/linux-headers-6.13.0-rc1+/scripts/kallsyms
    HOSTCC  debian/linux-headers-6.13.0-rc1+/usr/src/linux-headers-6.13.0-rc1+/scripts/sorttable
    HOSTCC  debian/linux-headers-6.13.0-rc1+/usr/src/linux-headers-6.13.0-rc1+/scripts/asn1_compiler
    HOSTCC  debian/linux-headers-6.13.0-rc1+/usr/src/linux-headers-6.13.0-rc1+/scripts/mod/modpost.o
    HOSTCC  debian/linux-headers-6.13.0-rc1+/usr/src/linux-headers-6.13.0-rc1+/scripts/mod/file2alias.o
    HOSTCC  debian/linux-headers-6.13.0-rc1+/usr/src/linux-headers-6.13.0-rc1+/scripts/mod/sumversion.o
    HOSTCC  debian/linux-headers-6.13.0-rc1+/usr/src/linux-headers-6.13.0-rc1+/scripts/mod/symsearch.o
    HOSTLD  debian/linux-headers-6.13.0-rc1+/usr/src/linux-headers-6.13.0-rc1+/scripts/mod/modpost

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/package/install-extmod-build | 33 ++++++++--------------------
 1 file changed, 9 insertions(+), 24 deletions(-)

diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build
index d3c5b104c0631..bb6e23c1174ec 100755
--- a/scripts/package/install-extmod-build
+++ b/scripts/package/install-extmod-build
@@ -49,17 +49,10 @@ mkdir -p "${destdir}"
 # This caters to host programs that participate in Kbuild. objtool and
 # resolve_btfids are out of scope.
 if [ "${CC}" != "${HOSTCC}" ]; then
-	echo "Rebuilding host programs with ${CC}..."
-
-	# This leverages external module building.
-	# - Clear sub_make_done to allow the top-level Makefile to redo sub-make.
-	# - Filter out --no-print-directory to print "Entering directory" logs
-	#   when Make changes the working directory.
-	unset sub_make_done
-	MAKEFLAGS=$(echo "${MAKEFLAGS}" | sed s/--no-print-directory//)
-
-	cat <<-'EOF' >  "${destdir}/Kbuild"
-	subdir-y := scripts
+	cat "${destdir}/scripts/Makefile" - <<-'EOF' > "${destdir}/scripts/Kbuild"
+	subdir-y += basic
+	hostprogs-always-y += mod/modpost
+	mod/modpost-objs := $(addprefix mod/, modpost.o file2alias.o sumversion.o symsearch.o)
 	EOF
 
 	# HOSTCXX is not overridden. The C++ compiler is used to build:
@@ -67,20 +60,12 @@ if [ "${CC}" != "${HOSTCC}" ]; then
 	# - GCC plugins, which will not work on the installed system even after
 	#   being rebuilt.
 	#
-	# Use the single-target build to avoid the modpost invocation, which
-	# would overwrite Module.symvers.
-	"${MAKE}" HOSTCC="${CC}" KBUILD_OUTPUT=. KBUILD_EXTMOD="${destdir}" scripts/
-
-	cat <<-'EOF' >  "${destdir}/scripts/Kbuild"
-	subdir-y := basic
-	hostprogs-always-y := mod/modpost
-	mod/modpost-objs := $(addprefix mod/, modpost.o file2alias.o sumversion.o symsearch.o)
-	EOF
-
-	# Run once again to rebuild scripts/basic/ and scripts/mod/modpost.
-	"${MAKE}" HOSTCC="${CC}" KBUILD_OUTPUT=. KBUILD_EXTMOD="${destdir}" scripts/
+	# Clear VPATH and srcroot because the source files reside in the output
+	# directory.
+	# shellcheck disable=SC2016 # $(MAKE), $(CC), and $(build) will be expanded by Make
+	"${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC=$(CC) VPATH= srcroot=. $(build)='"${destdir}"/scripts
 
-	rm -f "${destdir}/Kbuild" "${destdir}/scripts/Kbuild"
+	rm -f "${destdir}/scripts/Kbuild"
 fi
 
 find "${destdir}" \( -name '.*.cmd' -o -name '*.o' \) -delete

From 1f937a4bcb0472015818f30f4d3c5546d3f09933 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 10 Dec 2024 19:24:41 +0900
Subject: [PATCH 038/368] kbuild: suppress stdout from merge_config for silent
 builds

merge_config does not respect the Make's -s (--silent) option.

Let's sink the stdout from merge_config for silent builds.

This commit does not cater to the direct invocation of merge_config.sh
(e.g. arch/mips/Makefile).

Reported-by: Leon Romanovsky <leon@kernel.org>
Closes: https://lore.kernel.org/all/e534ce33b0e1060eb85ece8429810f087b034c88.1733234008.git.leonro@nvidia.com/
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Tested-by: Leon Romanovsky <leon@kernel.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
---
 scripts/Makefile.defconf | 13 +++++++------
 scripts/kconfig/Makefile |  4 +++-
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/scripts/Makefile.defconf b/scripts/Makefile.defconf
index 226ea3df3b4b4..a44307f08e9d6 100644
--- a/scripts/Makefile.defconf
+++ b/scripts/Makefile.defconf
@@ -1,6 +1,11 @@
 # SPDX-License-Identifier: GPL-2.0
 # Configuration heplers
 
+cmd_merge_fragments = \
+	$(srctree)/scripts/kconfig/merge_config.sh \
+	$4 -m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$2 \
+	$(foreach config,$3,$(srctree)/arch/$(SRCARCH)/configs/$(config).config)
+
 # Creates 'merged defconfigs'
 # ---------------------------------------------------------------------------
 # Usage:
@@ -8,9 +13,7 @@
 #
 # Input config fragments without '.config' suffix
 define merge_into_defconfig
-	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \
-		-m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$(1) \
-		$(foreach config,$(2),$(srctree)/arch/$(SRCARCH)/configs/$(config).config)
+	$(call cmd,merge_fragments,$1,$2)
 	+$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig
 endef
 
@@ -22,8 +25,6 @@ endef
 #
 # Input config fragments without '.config' suffix
 define merge_into_defconfig_override
-	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \
-		-Q -m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$(1) \
-		$(foreach config,$(2),$(srctree)/arch/$(SRCARCH)/configs/$(config).config)
+	$(call cmd,merge_fragments,$1,$2,-Q)
 	+$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig
 endef
diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index a0a0be38cbdc1..fb50bd4f4103f 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -105,9 +105,11 @@ configfiles = $(wildcard $(srctree)/kernel/configs/$(1) $(srctree)/arch/$(SRCARC
 all-config-fragments = $(call configfiles,*.config)
 config-fragments = $(call configfiles,$@)
 
+cmd_merge_fragments = $(srctree)/scripts/kconfig/merge_config.sh -m $(KCONFIG_CONFIG) $(config-fragments)
+
 %.config: $(obj)/conf
 	$(if $(config-fragments),, $(error $@ fragment does not exists on this architecture))
-	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m $(KCONFIG_CONFIG) $(config-fragments)
+	$(call cmd,merge_fragments)
 	$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig
 
 PHONY += tinyconfig

From 41e86fe7ebe9eaf18c394145cc91b08b5ec932a8 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Wed, 18 Dec 2024 19:37:05 +0900
Subject: [PATCH 039/368] kheaders: exclude include/generated/utsversion.h from
 kheaders_data.tar.xz

CONFIG_IKHEADERS has a reproducibility issue because the contents of
kernel/kheaders_data.tar.xz can vary depending on how you build the
kernel.

If you build the kernel with CONFIG_IKHEADERS enabled from a pristine
state, the tarball does not include include/generated/utsversion.h.

  $ make -s mrproper
  $ make -s defconfig
  $ scripts/config -e CONFIG_IKHEADERS
  $ make -s
  $ tar Jtf kernel/kheaders_data.tar.xz | grep utsversion

However, if you build the kernel with CONFIG_IKHEADERS disabled first
and then enable it later, the tarball does include
include/generated/utsversion.h.

  $ make -s mrproper
  $ make -s defconfig
  $ make -s
  $ scripts/config -e CONFIG_IKHEADERS
  $ make -s
  $ tar Jtf kernel/kheaders_data.tar.xz | grep utsversion
  ./include/generated/utsversion.h

It is not predictable whether a stale include/generated/utsversion.h
remains when kheaders_data.tar.xz is generated.

For better reproducibility, include/generated/utsversions.h should
always be omitted. It is not necessary for the kheaders anyway.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 kernel/gen_kheaders.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh
index 383fd43ac6122..a0e3fbf4afa42 100755
--- a/kernel/gen_kheaders.sh
+++ b/kernel/gen_kheaders.sh
@@ -83,6 +83,10 @@ for f in $dir_list;
 	do find "$f" -name "*.h";
 done | cpio --quiet -pdu $cpio_dir >/dev/null 2>&1
 
+# Always exclude include/generated/utsversion.h
+# Otherwise, the contents of the tarball may vary depending on the build steps.
+rm -f "${cpio_dir}/include/generated/utsversion.h"
+
 # Remove comments except SDPX lines
 find $cpio_dir -type f -print0 |
 	xargs -0 -P8 -n1 perl -pi -e 'BEGIN {undef $/;}; s/\/\*((?!SPDX).)*?\*\///smg;'

From de0cae9273841ca019e438192d08b7358a002973 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Wed, 18 Dec 2024 19:37:06 +0900
Subject: [PATCH 040/368] kheaders: avoid unnecessary process forks of grep

Exclude include/generated/{utsversion.h,autoconf.h} by using the -path
option to reduce the cost of forking new processes.

No functional changes are intended.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 kernel/gen_kheaders.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh
index a0e3fbf4afa42..c2eba1a0d772e 100755
--- a/kernel/gen_kheaders.sh
+++ b/kernel/gen_kheaders.sh
@@ -48,9 +48,9 @@ all_dirs="$all_dirs $dir_list"
 # check include/generated/autoconf.h explicitly.
 #
 # Ignore them for md5 calculation to avoid pointless regeneration.
-headers_md5="$(find $all_dirs -name "*.h"			|
-		grep -v "include/generated/utsversion.h"	|
-		grep -v "include/generated/autoconf.h"	|
+headers_md5="$(find $all_dirs -name "*.h" -a			\
+		! -path include/generated/utsversion.h -a	\
+		! -path include/generated/autoconf.h		|
 		xargs ls -l | md5sum | cut -d ' ' -f1)"
 
 # Any changes to this script will also cause a rebuild of the archive.

From fd2a118c483472f8862cc46981a5230414cd0e67 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Wed, 18 Dec 2024 19:37:07 +0900
Subject: [PATCH 041/368] kheaders: rename the 'cpio_dir' variable to 'tmpdir'

The next commit will get rid of the use of 'cpio' command, as there is
no strong reason to use it just for copying files.

Before that, this commit renames the 'cpio_dir' variable to 'tmpdir'.

No functional changes are intended.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 kernel/gen_kheaders.sh | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh
index c2eba1a0d772e..ddfd1177567f4 100755
--- a/kernel/gen_kheaders.sh
+++ b/kernel/gen_kheaders.sh
@@ -7,7 +7,7 @@ set -e
 sfile="$(readlink -f "$0")"
 outdir="$(pwd)"
 tarfile=$1
-cpio_dir=$outdir/${tarfile%/*}/.tmp_cpio_dir
+tmpdir=$outdir/${tarfile%/*}/.tmp_dir
 
 dir_list="
 include/
@@ -65,15 +65,15 @@ fi
 
 echo "  GEN     $tarfile"
 
-rm -rf $cpio_dir
-mkdir $cpio_dir
+rm -rf "${tmpdir}"
+mkdir "${tmpdir}"
 
 if [ "$building_out_of_srctree" ]; then
 	(
 		cd $srctree
 		for f in $dir_list
 			do find "$f" -name "*.h";
-		done | cpio --quiet -pd $cpio_dir
+		done | cpio --quiet -pd "${tmpdir}"
 	)
 fi
 
@@ -81,23 +81,23 @@ fi
 # of tree builds having stale headers in srctree. Just silence CPIO for now.
 for f in $dir_list;
 	do find "$f" -name "*.h";
-done | cpio --quiet -pdu $cpio_dir >/dev/null 2>&1
+done | cpio --quiet -pdu "${tmpdir}" >/dev/null 2>&1
 
 # Always exclude include/generated/utsversion.h
 # Otherwise, the contents of the tarball may vary depending on the build steps.
-rm -f "${cpio_dir}/include/generated/utsversion.h"
+rm -f "${tmpdir}/include/generated/utsversion.h"
 
 # Remove comments except SDPX lines
-find $cpio_dir -type f -print0 |
+find "${tmpdir}" -type f -print0 |
 	xargs -0 -P8 -n1 perl -pi -e 'BEGIN {undef $/;}; s/\/\*((?!SPDX).)*?\*\///smg;'
 
 # Create archive and try to normalize metadata for reproducibility.
 tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \
     --owner=0 --group=0 --sort=name --numeric-owner --mode=u=rw,go=r,a+X \
-    -I $XZ -cf $tarfile -C $cpio_dir/ . > /dev/null
+    -I $XZ -cf $tarfile -C "${tmpdir}/" . > /dev/null
 
 echo $headers_md5 > kernel/kheaders.md5
 echo "$this_file_md5" >> kernel/kheaders.md5
 echo "$(md5sum $tarfile | cut -d ' ' -f1)" >> kernel/kheaders.md5
 
-rm -rf $cpio_dir
+rm -rf "${tmpdir}"

From 82a1978d0fdc28e561bc4d98ea155dd322f33c19 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Wed, 18 Dec 2024 19:37:08 +0900
Subject: [PATCH 042/368] kheaders: use 'tar' instead of 'cpio' for copying
 files

The 'cpio' command is used solely for copying header files to the
temporary directory. However, there is no strong reason to use 'cpio'
for this purpose. For example, scripts/package/install-extmod-build
uses the 'tar' command to copy files.

This commit replaces the use of 'cpio' with 'tar' because 'tar' is
already used in this script to generate kheaders_data.tar.xz anyway.

Performance-wide, there is no significant difference between 'cpio'
and 'tar'.

[Before]

  $ rm -fr kheaders; mkdir kheaders
  $ time sh -c '
  for f in include arch/x86/include
  do
          find "$f" -name "*.h"
  done | cpio --quiet -pd kheaders
  '
  real    0m0.148s
  user    0m0.021s
  sys     0m0.140s

[After]

  $ rm -fr kheaders; mkdir kheaders
  $ time sh -c '
  for f in include arch/x86/include
  do
          find "$f" -name "*.h"
  done | tar -c -f - -T - | tar -xf - -C kheaders
  '
  real    0m0.098s
  user    0m0.024s
  sys     0m0.131s

Revert commit 69ef0920bdd3 ("Docs: Add cpio requirement to changes.rst")
because 'cpio' is not used anywhere else during the kernel build.
Please note that the built-in initramfs is created by the in-tree tool,
usr/gen_init_cpio, so it does not rely on the external 'cpio' command
at all.

Remove 'cpio' from the package build dependencies as well.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 Documentation/process/changes.rst |  6 ------
 kernel/gen_kheaders.sh            | 13 ++-----------
 scripts/package/PKGBUILD          |  1 -
 scripts/package/mkdebian          |  2 +-
 4 files changed, 3 insertions(+), 19 deletions(-)

diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst
index 82b5e378eebff..a0beca805362d 100644
--- a/Documentation/process/changes.rst
+++ b/Documentation/process/changes.rst
@@ -59,7 +59,6 @@ iptables               1.4.2            iptables -V
 openssl & libcrypto    1.0.0            openssl version
 bc                     1.06.95          bc --version
 Sphinx\ [#f1]_         2.4.4            sphinx-build --version
-cpio                   any              cpio --version
 GNU tar                1.28             tar --version
 gtags (optional)       6.6.5            gtags --version
 mkimage (optional)     2017.01          mkimage --version
@@ -536,11 +535,6 @@ mcelog
 
 - <https://www.mcelog.org/>
 
-cpio
-----
-
-- <https://www.gnu.org/software/cpio/>
-
 Networking
 **********
 
diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh
index ddfd1177567f4..55f493d83b8fa 100755
--- a/kernel/gen_kheaders.sh
+++ b/kernel/gen_kheaders.sh
@@ -14,13 +14,6 @@ include/
 arch/$SRCARCH/include/
 "
 
-if ! command -v cpio >/dev/null; then
-	echo >&2 "***"
-	echo >&2 "*** 'cpio' could not be found."
-	echo >&2 "***"
-	exit 1
-fi
-
 # Support incremental builds by skipping archive generation
 # if timestamps of files being archived are not changed.
 
@@ -73,15 +66,13 @@ if [ "$building_out_of_srctree" ]; then
 		cd $srctree
 		for f in $dir_list
 			do find "$f" -name "*.h";
-		done | cpio --quiet -pd "${tmpdir}"
+		done | tar -c -f - -T - | tar -xf - -C "${tmpdir}"
 	)
 fi
 
-# The second CPIO can complain if files already exist which can happen with out
-# of tree builds having stale headers in srctree. Just silence CPIO for now.
 for f in $dir_list;
 	do find "$f" -name "*.h";
-done | cpio --quiet -pdu "${tmpdir}" >/dev/null 2>&1
+done | tar -c -f - -T - | tar -xf - -C "${tmpdir}"
 
 # Always exclude include/generated/utsversion.h
 # Otherwise, the contents of the tarball may vary depending on the build steps.
diff --git a/scripts/package/PKGBUILD b/scripts/package/PKGBUILD
index dca706617adc7..0cf3a55b05e14 100644
--- a/scripts/package/PKGBUILD
+++ b/scripts/package/PKGBUILD
@@ -22,7 +22,6 @@ license=(GPL-2.0-only)
 makedepends=(
 	bc
 	bison
-	cpio
 	flex
 	gettext
 	kmod
diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian
index b038a1380b8af..b6dd98ca860b4 100755
--- a/scripts/package/mkdebian
+++ b/scripts/package/mkdebian
@@ -205,7 +205,7 @@ Priority: optional
 Maintainer: $maintainer
 Rules-Requires-Root: no
 Build-Depends: debhelper-compat (= 12)
-Build-Depends-Arch: bc, bison, cpio, flex,
+Build-Depends-Arch: bc, bison, flex,
  gcc-${host_gnu} <!pkg.${sourcename}.nokernelheaders>,
  kmod, libelf-dev:native,
  libssl-dev:native, libssl-dev <!pkg.${sourcename}.nokernelheaders>,

From 41a00051283e301f7e0009626ddf591542e30161 Mon Sep 17 00:00:00 2001
From: HONG Yifan <elsk@google.com>
Date: Wed, 18 Dec 2024 20:20:11 +0000
Subject: [PATCH 043/368] kheaders: prevent `find` from seeing perl temp files

Symptom:

The command

    find ... | xargs ... perl -i

occasionally triggers error messages like the following, with the build
still succeeding:

    Can't open <redacted>/kernel/.tmp_dir/include/dt-bindings/clock/XXNX4nW9: No such file or directory.

Analysis:

With strace, the root cause has been identified to be `perl -i` creating
temporary files inside ${tmpdir}, which causes `find` to see the
temporary files and emit the names. `find` is likely implemented with
readdir. POSIX `readdir` says:

    If a file is removed from or added to the directory after the most
    recent call to opendir() or rewinddir(), whether a subsequent call
    to readdir() returns an entry for that file is unspecified.

So if the libc that `find` links against choose to return that entry
in readdir(), a possible sequence of events is the following:

1. find emits foo.h
2. xargs executes `perl -i foo.h`
3. perl (pid=100) creates temporary file `XXXXXXXX`
4. find sees file `XXXXXXXX` and emit it
5. PID 100 exits, cleaning up the temporary file `XXXXXXXX`
6. xargs executes `perl -i XXXXXXXX`
7. perl (pid=200) tries to read the file, but it doesn't exist any more.

... triggering the error message.

One can reproduce the bug with the following command (assuming PWD
contains the list of headers in kheaders.tar.xz)

    for i in $(seq 100); do
        find -type f -print0 |
            xargs -0 -P8 -n1 perl -pi -e 'BEGIN {undef $/;}; s/\/\*((?!SPDX).)*?\*\///smg;';
    done

With a `find` linking against musl libc, the error message is emitted
6/100 times.

The fix:

This change stores the results of `find` before feeding them into xargs.
find and xargs will no longer be able to see temporary files that perl
creates after this change.

Signed-off-by: HONG Yifan <elsk@google.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 kernel/gen_kheaders.sh | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh
index 55f493d83b8fa..c9e5dc068e854 100755
--- a/kernel/gen_kheaders.sh
+++ b/kernel/gen_kheaders.sh
@@ -79,8 +79,13 @@ done | tar -c -f - -T - | tar -xf - -C "${tmpdir}"
 rm -f "${tmpdir}/include/generated/utsversion.h"
 
 # Remove comments except SDPX lines
-find "${tmpdir}" -type f -print0 |
-	xargs -0 -P8 -n1 perl -pi -e 'BEGIN {undef $/;}; s/\/\*((?!SPDX).)*?\*\///smg;'
+# Use a temporary file to store directory contents to prevent find/xargs from
+# seeing temporary files created by perl.
+find "${tmpdir}" -type f -print0 > "${tmpdir}.contents.txt"
+xargs -0 -P8 -n1 \
+	perl -pi -e 'BEGIN {undef $/;}; s/\/\*((?!SPDX).)*?\*\///smg;' \
+	< "${tmpdir}.contents.txt"
+rm -f "${tmpdir}.contents.txt"
 
 # Create archive and try to normalize metadata for reproducibility.
 tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \

From ad2091dee019a68145610081a75fae3b90f0c44d Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eb@emlix.com>
Date: Thu, 19 Dec 2024 08:20:34 +0100
Subject: [PATCH 044/368] kconfig: qconf: use preferred form of QString API

A QString constructed from a character literal of length 0, i.e. "", is not
"null" for historical reasons. This does not matter here so use the preferred
method isEmpty() instead.

Also directly construct empty QString objects instead of passing in an empty
character literal that has to be parsed into an empty object first.

Signed-off-by: Rolf Eike Beer <eb@emlix.com>
Link: https://doc.qt.io/qt-6/qstring.html#distinction-between-null-and-empty-strings
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/kconfig/qconf.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc
index 6c92ef1e16efb..eaa465b0ccf9c 100644
--- a/scripts/kconfig/qconf.cc
+++ b/scripts/kconfig/qconf.cc
@@ -1464,8 +1464,8 @@ void ConfigMainWindow::loadConfig(void)
 {
 	QString str;
 
-	str = QFileDialog::getOpenFileName(this, "", configname);
-	if (str.isNull())
+	str = QFileDialog::getOpenFileName(this, QString(), configname);
+	if (str.isEmpty())
 		return;
 
 	if (conf_read(str.toLocal8Bit().constData()))
@@ -1491,8 +1491,8 @@ void ConfigMainWindow::saveConfigAs(void)
 {
 	QString str;
 
-	str = QFileDialog::getSaveFileName(this, "", configname);
-	if (str.isNull())
+	str = QFileDialog::getSaveFileName(this, QString(), configname);
+	if (str.isEmpty())
 		return;
 
 	if (conf_write(str.toLocal8Bit().constData())) {

From 1cd9502ee9275c6176a7312863f939cca9506114 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 29 Dec 2024 00:45:28 +0900
Subject: [PATCH 045/368] module: get symbol CRC back to unsigned

Commit 71810db27c1c ("modversions: treat symbol CRCs as 32 bit
quantities") changed the CRC fields to s32 because the __kcrctab and
__kcrctab_gpl sections contained relative references to the actual
CRC values stored in the .rodata section when CONFIG_MODULE_REL_CRCS=y.

Commit 7b4537199a4a ("kbuild: link symbol CRCs at final link, removing
CONFIG_MODULE_REL_CRCS") removed this complexity. Now, the __kcrctab
and __kcrctab_gpl sections directly contain the CRC values in all cases.

The genksyms tool outputs unsigned 32-bit CRC values, so u32 is preferred
over s32.

No functional changes are intended.

Regardless of this change, the CRC value is assigned to the u32 variable
'crcval' before the comparison, as seen in kernel/module/version.c:

    crcval = *crc;

It was previously mandatory (but now optional) in order to avoid sign
extension because the following line previously compared 'unsigned long'
and 's32':

    if (versions[i].crc == crcval)
            return 1;

versions[i].crc is still 'unsigned long' for backward compatibility.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
---
 include/linux/module.h   |  4 ++--
 kernel/module/internal.h | 10 +++++-----
 kernel/module/main.c     |  2 +-
 kernel/module/version.c  |  2 +-
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/linux/module.h b/include/linux/module.h
index 94acbacdcdf18..903ef8fe4c04d 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -430,7 +430,7 @@ struct module {
 
 	/* Exported symbols */
 	const struct kernel_symbol *syms;
-	const s32 *crcs;
+	const u32 *crcs;
 	unsigned int num_syms;
 
 #ifdef CONFIG_ARCH_USES_CFI_TRAPS
@@ -448,7 +448,7 @@ struct module {
 	/* GPL-only exported symbols. */
 	unsigned int num_gpl_syms;
 	const struct kernel_symbol *gpl_syms;
-	const s32 *gpl_crcs;
+	const u32 *gpl_crcs;
 	bool using_gplonly_symbols;
 
 #ifdef CONFIG_MODULE_SIG
diff --git a/kernel/module/internal.h b/kernel/module/internal.h
index daef2be839022..f10dc3ea7ff88 100644
--- a/kernel/module/internal.h
+++ b/kernel/module/internal.h
@@ -55,8 +55,8 @@ extern const struct kernel_symbol __start___ksymtab[];
 extern const struct kernel_symbol __stop___ksymtab[];
 extern const struct kernel_symbol __start___ksymtab_gpl[];
 extern const struct kernel_symbol __stop___ksymtab_gpl[];
-extern const s32 __start___kcrctab[];
-extern const s32 __start___kcrctab_gpl[];
+extern const u32 __start___kcrctab[];
+extern const u32 __start___kcrctab_gpl[];
 
 struct load_info {
 	const char *name;
@@ -102,7 +102,7 @@ struct find_symbol_arg {
 
 	/* Output */
 	struct module *owner;
-	const s32 *crc;
+	const u32 *crc;
 	const struct kernel_symbol *sym;
 	enum mod_license license;
 };
@@ -384,7 +384,7 @@ static inline void init_param_lock(struct module *mod) { }
 
 #ifdef CONFIG_MODVERSIONS
 int check_version(const struct load_info *info,
-		  const char *symname, struct module *mod, const s32 *crc);
+		  const char *symname, struct module *mod, const u32 *crc);
 void module_layout(struct module *mod, struct modversion_info *ver, struct kernel_param *kp,
 		   struct kernel_symbol *ks, struct tracepoint * const *tp);
 int check_modstruct_version(const struct load_info *info, struct module *mod);
@@ -393,7 +393,7 @@ int same_magic(const char *amagic, const char *bmagic, bool has_crcs);
 static inline int check_version(const struct load_info *info,
 				const char *symname,
 				struct module *mod,
-				const s32 *crc)
+				const u32 *crc)
 {
 	return 1;
 }
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 5399c182b3cbe..e58bff88b8d63 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -86,7 +86,7 @@ struct mod_tree_root mod_tree __cacheline_aligned = {
 
 struct symsearch {
 	const struct kernel_symbol *start, *stop;
-	const s32 *crcs;
+	const u32 *crcs;
 	enum mod_license license;
 };
 
diff --git a/kernel/module/version.c b/kernel/module/version.c
index 53f43ac5a73e9..4e5731d403af2 100644
--- a/kernel/module/version.c
+++ b/kernel/module/version.c
@@ -13,7 +13,7 @@
 int check_version(const struct load_info *info,
 		  const char *symname,
 			 struct module *mod,
-			 const s32 *crc)
+			 const u32 *crc)
 {
 	Elf_Shdr *sechdrs = info->sechdrs;
 	unsigned int versindex = info->index.vers;

From 5963913bb57f15f198361bc7f1389c756b98f25f Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 29 Dec 2024 00:45:29 +0900
Subject: [PATCH 046/368] modpost: zero-pad CRC values in modversion_info array

I do not think the '#' flag is useful here because adding the explicit
'0x' is clearer. Add the '0' flag to zero-pad the CRC values.

This change gives better alignment in the generated *.mod.c files.
There is no impact to the compiled modules.

[Before]

  $ grep -A5 modversion_info fs/efivarfs/efivarfs.mod.c
  static const struct modversion_info ____versions[]
  __used __section("__versions") = {
          { 0x907d14d, "blocking_notifier_chain_register" },
          { 0x53d3b64, "simple_inode_init_ts" },
          { 0x65487097, "__x86_indirect_thunk_rax" },
          { 0x122c3a7e, "_printk" },

[After]

  $ grep -A5 modversion_info fs/efivarfs/efivarfs.mod.c
  static const struct modversion_info ____versions[]
  __used __section("__versions") = {
          { 0x0907d14d, "blocking_notifier_chain_register" },
          { 0x053d3b64, "simple_inode_init_ts" },
          { 0x65487097, "__x86_indirect_thunk_rax" },
          { 0x122c3a7e, "_printk" },

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/mod/modpost.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 7ea59dc4926b3..dc907014108bb 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1832,7 +1832,7 @@ static void add_versions(struct buffer *b, struct module *mod)
 			      s->name, mod->name);
 			break;
 		}
-		buf_printf(b, "\t{ %#8x, \"%s\" },\n",
+		buf_printf(b, "\t{ 0x%08x, \"%s\" },\n",
 			   s->crc, s->name);
 	}
 

From 45c9c4101d3d2fdfa00852274bbebba65fcc3cf2 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Fri, 3 Jan 2025 16:30:38 +0900
Subject: [PATCH 047/368] genksyms: fix memory leak when the same symbol is
 added from source

When a symbol that is already registered is added again, __add_symbol()
returns without freeing the symbol definition, making it unreachable.

The following test cases demonstrate different memory leak points.

[Test Case 1]

Forward declaration with exactly the same definition

  $ cat foo.c
  #include <linux/export.h>
  void foo(void);
  void foo(void) {}
  EXPORT_SYMBOL(foo);

[Test Case 2]

Forward declaration with a different definition (e.g. attribute)

  $ cat foo.c
  #include <linux/export.h>
  void foo(void);
  __attribute__((__section__(".ref.text"))) void foo(void) {}
  EXPORT_SYMBOL(foo);

[Test Case 3]

Preserving an overridden symbol (compile with KBUILD_PRESERVE=1)

  $ cat foo.c
  #include <linux/export.h>
  void foo(void);
  void foo(void) { }
  EXPORT_SYMBOL(foo);

  $ cat foo.symref
  override foo void foo ( int )

The memory leaks in Test Case 1 and 2 have existed since the introduction
of genksyms into the kernel tree. [1]

The memory leak in Test Case 3 was introduced by commit 5dae9a550a74
("genksyms: allow to ignore symbol checksum changes").

When multiple init_declarators are reduced to an init_declarator_list,
the decl_spec must be duplicated. Otherwise, the following Test Case 4
would result in a double-free bug.

[Test Case 4]

  $ cat foo.c
  #include <linux/export.h>

  extern int foo, bar;

  int foo, bar;
  EXPORT_SYMBOL(foo);

In this case, 'foo' and 'bar' share the same decl_spec, 'int'. It must
be unshared before being passed to add_symbol().

[1]: https://git.kernel.org/pub/scm/linux/kernel/git/history/history.git/commit/?id=46bd1da672d66ccd8a639d3c1f8a166048cca608

Fixes: 5dae9a550a74 ("genksyms: allow to ignore symbol checksum changes")
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/genksyms/genksyms.c |  3 +++
 scripts/genksyms/parse.y    | 14 ++++++++++++--
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c
index 07f9b8cfb2337..8ca46f807b57a 100644
--- a/scripts/genksyms/genksyms.c
+++ b/scripts/genksyms/genksyms.c
@@ -239,6 +239,7 @@ static struct symbol *__add_symbol(const char *name, enum symbol_type type,
 						"unchanged\n");
 				}
 				sym->is_declared = 1;
+				free_list(defn, NULL);
 				return sym;
 			} else if (!sym->is_declared) {
 				if (sym->is_override && flag_preserve) {
@@ -247,6 +248,7 @@ static struct symbol *__add_symbol(const char *name, enum symbol_type type,
 					print_type_name(type, name);
 					fprintf(stderr, " modversion change\n");
 					sym->is_declared = 1;
+					free_list(defn, NULL);
 					return sym;
 				} else {
 					status = is_unknown_symbol(sym) ?
@@ -254,6 +256,7 @@ static struct symbol *__add_symbol(const char *name, enum symbol_type type,
 				}
 			} else {
 				error_with_pos("redefinition of %s", name);
+				free_list(defn, NULL);
 				return sym;
 			}
 			break;
diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y
index 8e9b5e69e8f01..840371d01bf48 100644
--- a/scripts/genksyms/parse.y
+++ b/scripts/genksyms/parse.y
@@ -152,14 +152,19 @@ simple_declaration:
 	;
 
 init_declarator_list_opt:
-	/* empty */				{ $$ = NULL; }
-	| init_declarator_list
+	/* empty */			{ $$ = NULL; }
+	| init_declarator_list		{ free_list(decl_spec, NULL); $$ = $1; }
 	;
 
 init_declarator_list:
 	init_declarator
 		{ struct string_list *decl = *$1;
 		  *$1 = NULL;
+
+		  /* avoid sharing among multiple init_declarators */
+		  if (decl_spec)
+		    decl_spec = copy_list_range(decl_spec, NULL);
+
 		  add_symbol(current_name,
 			     is_typedef ? SYM_TYPEDEF : SYM_NORMAL, decl, is_extern);
 		  current_name = NULL;
@@ -170,6 +175,11 @@ init_declarator_list:
 		  *$3 = NULL;
 		  free_list(*$2, NULL);
 		  *$2 = decl_spec;
+
+		  /* avoid sharing among multiple init_declarators */
+		  if (decl_spec)
+		    decl_spec = copy_list_range(decl_spec, NULL);
+
 		  add_symbol(current_name,
 			     is_typedef ? SYM_TYPEDEF : SYM_NORMAL, decl, is_extern);
 		  current_name = NULL;

From be2fa44b5180a1f021efb40c55fdf63c249c3209 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Fri, 3 Jan 2025 16:30:39 +0900
Subject: [PATCH 048/368] genksyms: fix memory leak when the same symbol is
 read from *.symref file

When a symbol that is already registered is read again from *.symref
file, __add_symbol() removes the previous one from the hash table without
freeing it.

[Test Case]

  $ cat foo.c
  #include <linux/export.h>
  void foo(void);
  void foo(void) {}
  EXPORT_SYMBOL(foo);

  $ cat foo.symref
  foo void foo ( void )
  foo void foo ( void )

When a symbol is removed from the hash table, it must be freed along
with its ->name and ->defn members. However, sym->name cannot be freed
because it is sometimes shared with node->string, but not always. If
sym->name and node->string share the same memory, free(sym->name) could
lead to a double-free bug.

To resolve this issue, always assign a strdup'ed string to sym->name.

Fixes: 64e6c1e12372 ("genksyms: track symbol checksum changes")
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/genksyms/genksyms.c | 8 ++++++--
 scripts/genksyms/genksyms.h | 2 +-
 scripts/genksyms/parse.y    | 4 ++--
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c
index 8ca46f807b57a..c5e8e0e0f9490 100644
--- a/scripts/genksyms/genksyms.c
+++ b/scripts/genksyms/genksyms.c
@@ -272,11 +272,15 @@ static struct symbol *__add_symbol(const char *name, enum symbol_type type,
 				break;
 			}
 		}
+
+		free_list(sym->defn, NULL);
+		free(sym->name);
+		free(sym);
 		--nsyms;
 	}
 
 	sym = xmalloc(sizeof(*sym));
-	sym->name = name;
+	sym->name = xstrdup(name);
 	sym->type = type;
 	sym->defn = defn;
 	sym->expansion_trail = NULL;
@@ -483,7 +487,7 @@ static void read_reference(FILE *f)
 			defn = def;
 			def = read_node(f);
 		}
-		subsym = add_reference_symbol(xstrdup(sym->string), sym->tag,
+		subsym = add_reference_symbol(sym->string, sym->tag,
 					      defn, is_extern);
 		subsym->is_override = is_override;
 		free_node(sym);
diff --git a/scripts/genksyms/genksyms.h b/scripts/genksyms/genksyms.h
index 21ed2ec2d98ca..5621533dcb8e4 100644
--- a/scripts/genksyms/genksyms.h
+++ b/scripts/genksyms/genksyms.h
@@ -32,7 +32,7 @@ struct string_list {
 
 struct symbol {
 	struct symbol *hash_next;
-	const char *name;
+	char *name;
 	enum symbol_type type;
 	struct string_list *defn;
 	struct symbol *expansion_trail;
diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y
index 840371d01bf48..689cb6bb40b65 100644
--- a/scripts/genksyms/parse.y
+++ b/scripts/genksyms/parse.y
@@ -482,12 +482,12 @@ enumerator_list:
 enumerator:
 	IDENT
 		{
-			const char *name = strdup((*$1)->string);
+			const char *name = (*$1)->string;
 			add_symbol(name, SYM_ENUM_CONST, NULL, 0);
 		}
 	| IDENT '=' EXPRESSION_PHRASE
 		{
-			const char *name = strdup((*$1)->string);
+			const char *name = (*$1)->string;
 			struct string_list *expr = copy_list_range(*$3, *$2);
 			add_symbol(name, SYM_ENUM_CONST, expr, 0);
 		}

From f034d186bf9e2857079815e5490e2810a1a287a6 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Fri, 3 Jan 2025 16:30:40 +0900
Subject: [PATCH 049/368] genksyms: reduce the indentation in the for-loop in
 __add_symbol()

To improve readability, reduce the indentation as follows:

  - Use 'continue' earlier when the symbol does not match

  - flip !sym->is_declared to flatten the if-else chain

No functional changes are intended.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/genksyms/genksyms.c | 63 ++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 33 deletions(-)

diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c
index c5e8e0e0f9490..5a90acd693f44 100644
--- a/scripts/genksyms/genksyms.c
+++ b/scripts/genksyms/genksyms.c
@@ -226,41 +226,38 @@ static struct symbol *__add_symbol(const char *name, enum symbol_type type,
 
 	h = crc32(name) % HASH_BUCKETS;
 	for (sym = symtab[h]; sym; sym = sym->hash_next) {
-		if (map_to_ns(sym->type) == map_to_ns(type) &&
-		    strcmp(name, sym->name) == 0) {
-			if (is_reference)
-				/* fall through */ ;
-			else if (sym->type == type &&
-				 equal_list(sym->defn, defn)) {
-				if (!sym->is_declared && sym->is_override) {
-					print_location();
-					print_type_name(type, name);
-					fprintf(stderr, " modversion is "
-						"unchanged\n");
-				}
-				sym->is_declared = 1;
-				free_list(defn, NULL);
-				return sym;
-			} else if (!sym->is_declared) {
-				if (sym->is_override && flag_preserve) {
-					print_location();
-					fprintf(stderr, "ignoring ");
-					print_type_name(type, name);
-					fprintf(stderr, " modversion change\n");
-					sym->is_declared = 1;
-					free_list(defn, NULL);
-					return sym;
-				} else {
-					status = is_unknown_symbol(sym) ?
-						STATUS_DEFINED : STATUS_MODIFIED;
-				}
-			} else {
-				error_with_pos("redefinition of %s", name);
-				free_list(defn, NULL);
-				return sym;
+		if (map_to_ns(sym->type) != map_to_ns(type) ||
+		    strcmp(name, sym->name))
+			continue;
+
+		if (is_reference) {
+			/* fall through */ ;
+		} else if (sym->type == type && equal_list(sym->defn, defn)) {
+			if (!sym->is_declared && sym->is_override) {
+				print_location();
+				print_type_name(type, name);
+				fprintf(stderr, " modversion is unchanged\n");
 			}
-			break;
+			sym->is_declared = 1;
+			free_list(defn, NULL);
+			return sym;
+		} else if (sym->is_declared) {
+			error_with_pos("redefinition of %s", name);
+			free_list(defn, NULL);
+			return sym;
+		} else if (sym->is_override && flag_preserve) {
+			print_location();
+			fprintf(stderr, "ignoring ");
+			print_type_name(type, name);
+			fprintf(stderr, " modversion change\n");
+			sym->is_declared = 1;
+			free_list(defn, NULL);
+			return sym;
+		} else {
+			status = is_unknown_symbol(sym) ?
+					STATUS_DEFINED : STATUS_MODIFIED;
 		}
+		break;
 	}
 
 	if (sym) {

From 2480f53f21b21eb24a33815d4623f54fdb30cf27 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Fri, 3 Jan 2025 16:30:41 +0900
Subject: [PATCH 050/368] genksyms: refactor the return points in the for-loop
 in __add_symbol()

free_list() must be called before returning from this for-loop.

Swap 'break' and the combination of free_list() and 'return'.

This reduces the code and minimizes the risk of introducing memory
leaks in future changes.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/genksyms/genksyms.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c
index 5a90acd693f44..41d6cfce0088f 100644
--- a/scripts/genksyms/genksyms.c
+++ b/scripts/genksyms/genksyms.c
@@ -231,7 +231,7 @@ static struct symbol *__add_symbol(const char *name, enum symbol_type type,
 			continue;
 
 		if (is_reference) {
-			/* fall through */ ;
+			break;
 		} else if (sym->type == type && equal_list(sym->defn, defn)) {
 			if (!sym->is_declared && sym->is_override) {
 				print_location();
@@ -239,25 +239,21 @@ static struct symbol *__add_symbol(const char *name, enum symbol_type type,
 				fprintf(stderr, " modversion is unchanged\n");
 			}
 			sym->is_declared = 1;
-			free_list(defn, NULL);
-			return sym;
 		} else if (sym->is_declared) {
 			error_with_pos("redefinition of %s", name);
-			free_list(defn, NULL);
-			return sym;
 		} else if (sym->is_override && flag_preserve) {
 			print_location();
 			fprintf(stderr, "ignoring ");
 			print_type_name(type, name);
 			fprintf(stderr, " modversion change\n");
 			sym->is_declared = 1;
-			free_list(defn, NULL);
-			return sym;
 		} else {
 			status = is_unknown_symbol(sym) ?
 					STATUS_DEFINED : STATUS_MODIFIED;
+			break;
 		}
-		break;
+		free_list(defn, NULL);
+		return sym;
 	}
 
 	if (sym) {

From 2759bd908f3cc8d286e1fa64ec7ee7f5d1124837 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Fri, 3 Jan 2025 16:30:42 +0900
Subject: [PATCH 051/368] genksyms: use generic macros for hash table
 implementation

Use macros provided by hashtable.h

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/genksyms/genksyms.c | 32 ++++++++++++--------------------
 scripts/genksyms/genksyms.h |  4 +++-
 2 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c
index 41d6cfce0088f..e2cd3dcb469f7 100644
--- a/scripts/genksyms/genksyms.c
+++ b/scripts/genksyms/genksyms.c
@@ -18,12 +18,12 @@
 #include <stdarg.h>
 #include <getopt.h>
 
+#include <hashtable.h>
+
 #include "genksyms.h"
 /*----------------------------------------------------------------------*/
 
-#define HASH_BUCKETS  4096
-
-static struct symbol *symtab[HASH_BUCKETS];
+static HASHTABLE_DEFINE(symbol_hashtable, 1U << 12);
 static FILE *debugfile;
 
 int cur_line = 1;
@@ -151,14 +151,14 @@ static enum symbol_type map_to_ns(enum symbol_type t)
 
 struct symbol *find_symbol(const char *name, enum symbol_type ns, int exact)
 {
-	unsigned long h = crc32(name) % HASH_BUCKETS;
 	struct symbol *sym;
 
-	for (sym = symtab[h]; sym; sym = sym->hash_next)
+	hash_for_each_possible(symbol_hashtable, sym, hnode, crc32(name)) {
 		if (map_to_ns(sym->type) == map_to_ns(ns) &&
 		    strcmp(name, sym->name) == 0 &&
 		    sym->is_declared)
 			break;
+	}
 
 	if (exact && sym && sym->type != ns)
 		return NULL;
@@ -224,8 +224,8 @@ static struct symbol *__add_symbol(const char *name, enum symbol_type type,
 			return NULL;
 	}
 
-	h = crc32(name) % HASH_BUCKETS;
-	for (sym = symtab[h]; sym; sym = sym->hash_next) {
+	h = crc32(name);
+	hash_for_each_possible(symbol_hashtable, sym, hnode, h) {
 		if (map_to_ns(sym->type) != map_to_ns(type) ||
 		    strcmp(name, sym->name))
 			continue;
@@ -257,14 +257,7 @@ static struct symbol *__add_symbol(const char *name, enum symbol_type type,
 	}
 
 	if (sym) {
-		struct symbol **psym;
-
-		for (psym = &symtab[h]; *psym; psym = &(*psym)->hash_next) {
-			if (*psym == sym) {
-				*psym = sym->hash_next;
-				break;
-			}
-		}
+		hash_del(&sym->hnode);
 
 		free_list(sym->defn, NULL);
 		free(sym->name);
@@ -280,8 +273,7 @@ static struct symbol *__add_symbol(const char *name, enum symbol_type type,
 	sym->visited = NULL;
 	sym->is_extern = is_extern;
 
-	sym->hash_next = symtab[h];
-	symtab[h] = sym;
+	hash_add(symbol_hashtable, &sym->hnode, h);
 
 	sym->is_declared = !is_reference;
 	sym->status = status;
@@ -832,9 +824,9 @@ int main(int argc, char **argv)
 	}
 
 	if (flag_debug) {
-		fprintf(debugfile, "Hash table occupancy %d/%d = %g\n",
-			nsyms, HASH_BUCKETS,
-			(double)nsyms / (double)HASH_BUCKETS);
+		fprintf(debugfile, "Hash table occupancy %d/%zd = %g\n",
+			nsyms, HASH_SIZE(symbol_hashtable),
+			(double)nsyms / HASH_SIZE(symbol_hashtable));
 	}
 
 	if (dumpfile)
diff --git a/scripts/genksyms/genksyms.h b/scripts/genksyms/genksyms.h
index 5621533dcb8e4..8c45ada59ece5 100644
--- a/scripts/genksyms/genksyms.h
+++ b/scripts/genksyms/genksyms.h
@@ -14,6 +14,8 @@
 
 #include <stdio.h>
 
+#include <list_types.h>
+
 enum symbol_type {
 	SYM_NORMAL, SYM_TYPEDEF, SYM_ENUM, SYM_STRUCT, SYM_UNION,
 	SYM_ENUM_CONST
@@ -31,7 +33,7 @@ struct string_list {
 };
 
 struct symbol {
-	struct symbol *hash_next;
+	struct hlist_node hnode;
 	char *name;
 	enum symbol_type type;
 	struct string_list *defn;

From a56fece7f302ff1eb49535e66bdd5d03ced0ca20 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Fri, 3 Jan 2025 16:30:43 +0900
Subject: [PATCH 052/368] genksyms: use uint32_t instead of unsigned long for
 calculating CRC

Currently, 'unsigned long' is used for intermediate variables when
calculating CRCs.

The size of 'long' differs depending on the architecture: it is 32 bits
on 32-bit architectures and 64 bits on 64-bit architectures.

The CRC values generated by genksyms represent the compatibility of
exported symbols. Therefore, reproducibility is important. In other
words, we need to ensure that the output is the same when the kernel
source is identical, regardless of whether genksyms is running on a
32-bit or 64-bit build machine.

Fortunately, the output from genksyms is not affected by the build
machine's architecture because only the lower 32 bits of the
'unsigned long' variables are used.

To make it even clearer that the CRC calculation is independent of
the build machine's architecture, this commit explicitly uses the
fixed-width type, uint32_t.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/genksyms/genksyms.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c
index e2cd3dcb469f7..8b0d7ac73dbb0 100644
--- a/scripts/genksyms/genksyms.c
+++ b/scripts/genksyms/genksyms.c
@@ -12,6 +12,7 @@
 
 #include <stdio.h>
 #include <string.h>
+#include <stdint.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <assert.h>
@@ -60,7 +61,7 @@ static void print_type_name(enum symbol_type type, const char *name);
 
 /*----------------------------------------------------------------------*/
 
-static const unsigned int crctab32[] = {
+static const uint32_t crctab32[] = {
 	0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
 	0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
 	0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
@@ -115,19 +116,19 @@ static const unsigned int crctab32[] = {
 	0x2d02ef8dU
 };
 
-static unsigned long partial_crc32_one(unsigned char c, unsigned long crc)
+static uint32_t partial_crc32_one(uint8_t c, uint32_t crc)
 {
 	return crctab32[(crc ^ c) & 0xff] ^ (crc >> 8);
 }
 
-static unsigned long partial_crc32(const char *s, unsigned long crc)
+static uint32_t partial_crc32(const char *s, uint32_t crc)
 {
 	while (*s)
 		crc = partial_crc32_one(*s++, crc);
 	return crc;
 }
 
-static unsigned long crc32(const char *s)
+static uint32_t crc32(const char *s)
 {
 	return partial_crc32(s, 0xffffffff) ^ 0xffffffff;
 }
@@ -517,7 +518,7 @@ static void print_list(FILE * f, struct string_list *list)
 	}
 }
 
-static unsigned long expand_and_crc_sym(struct symbol *sym, unsigned long crc)
+static uint32_t expand_and_crc_sym(struct symbol *sym, uint32_t crc)
 {
 	struct string_list *list = sym->defn;
 	struct string_list **e, **b;
@@ -624,7 +625,7 @@ static unsigned long expand_and_crc_sym(struct symbol *sym, unsigned long crc)
 void export_symbol(const char *name)
 {
 	struct symbol *sym;
-	unsigned long crc;
+	uint32_t crc;
 	int has_changed = 0;
 
 	sym = find_symbol(name, SYM_NORMAL, 0);
@@ -672,7 +673,7 @@ void export_symbol(const char *name)
 	if (flag_dump_defs)
 		fputs(">\n", debugfile);
 
-	printf("#SYMVER %s 0x%08lx\n", name, crc);
+	printf("#SYMVER %s 0x%08lx\n", name, (unsigned long)crc);
 }
 
 /*----------------------------------------------------------------------*/

From f28568841ae0a0dd48dfc5400aaebedf10a54d10 Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Fri, 3 Jan 2025 20:45:23 +0000
Subject: [PATCH 053/368] tools: Add gendwarfksyms

Add a basic DWARF parser, which uses libdw to traverse the debugging
information in an object file and looks for functions and variables.
In follow-up patches, this will be expanded to produce symbol versions
for CONFIG_MODVERSIONS from DWARF.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 MAINTAINERS                           |   7 ++
 kernel/module/Kconfig                 |   8 ++
 scripts/Makefile                      |   1 +
 scripts/gendwarfksyms/.gitignore      |   2 +
 scripts/gendwarfksyms/Makefile        |   8 ++
 scripts/gendwarfksyms/dwarf.c         | 166 ++++++++++++++++++++++++++
 scripts/gendwarfksyms/gendwarfksyms.c | 128 ++++++++++++++++++++
 scripts/gendwarfksyms/gendwarfksyms.h |  95 +++++++++++++++
 scripts/gendwarfksyms/symbols.c       |  98 +++++++++++++++
 9 files changed, 513 insertions(+)
 create mode 100644 scripts/gendwarfksyms/.gitignore
 create mode 100644 scripts/gendwarfksyms/Makefile
 create mode 100644 scripts/gendwarfksyms/dwarf.c
 create mode 100644 scripts/gendwarfksyms/gendwarfksyms.c
 create mode 100644 scripts/gendwarfksyms/gendwarfksyms.h
 create mode 100644 scripts/gendwarfksyms/symbols.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 30cbc3d44cd53..1ec4753b14578 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9550,6 +9550,13 @@ W:	https://linuxtv.org
 T:	git git://linuxtv.org/media.git
 F:	drivers/media/radio/radio-gemtek*
 
+GENDWARFKSYMS
+M:	Sami Tolvanen <samitolvanen@google.com>
+L:	linux-modules@vger.kernel.org
+L:	linux-kbuild@vger.kernel.org
+S:	Maintained
+F:	scripts/gendwarfksyms/
+
 GENERIC ARCHITECTURE TOPOLOGY
 M:	Sudeep Holla <sudeep.holla@arm.com>
 L:	linux-kernel@vger.kernel.org
diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig
index 7b329057997ad..4637f063d0fcb 100644
--- a/kernel/module/Kconfig
+++ b/kernel/module/Kconfig
@@ -169,6 +169,14 @@ config MODVERSIONS
 	  make them incompatible with the kernel you are running.  If
 	  unsure, say N.
 
+config GENDWARFKSYMS
+	bool "gendwarfksyms (from debugging information)"
+	depends on DEBUG_INFO
+	# Requires full debugging information, split DWARF not supported.
+	depends on !DEBUG_INFO_REDUCED && !DEBUG_INFO_SPLIT
+	# Requires ELF object files.
+	depends on !LTO
+
 config ASM_MODVERSIONS
 	bool
 	default HAVE_ASM_MODVERSIONS && MODVERSIONS
diff --git a/scripts/Makefile b/scripts/Makefile
index 6bcda4b9d0540..d7fec46d38c00 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -54,6 +54,7 @@ targets += module.lds
 
 subdir-$(CONFIG_GCC_PLUGINS) += gcc-plugins
 subdir-$(CONFIG_MODVERSIONS) += genksyms
+subdir-$(CONFIG_GENDWARFKSYMS) += gendwarfksyms
 subdir-$(CONFIG_SECURITY_SELINUX) += selinux
 subdir-$(CONFIG_SECURITY_IPE) += ipe
 
diff --git a/scripts/gendwarfksyms/.gitignore b/scripts/gendwarfksyms/.gitignore
new file mode 100644
index 0000000000000..0927f8d3cd96c
--- /dev/null
+++ b/scripts/gendwarfksyms/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+/gendwarfksyms
diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
new file mode 100644
index 0000000000000..9f8fec4fd39b9
--- /dev/null
+++ b/scripts/gendwarfksyms/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+hostprogs-always-y += gendwarfksyms
+
+gendwarfksyms-objs += gendwarfksyms.o
+gendwarfksyms-objs += dwarf.o
+gendwarfksyms-objs += symbols.o
+
+HOSTLDLIBS_gendwarfksyms := -ldw -lelf
diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
new file mode 100644
index 0000000000000..81df3e2ad3aed
--- /dev/null
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Google LLC
+ */
+
+#include "gendwarfksyms.h"
+
+static bool get_ref_die_attr(Dwarf_Die *die, unsigned int id, Dwarf_Die *value)
+{
+	Dwarf_Attribute da;
+
+	/* dwarf_formref_die returns a pointer instead of an error value. */
+	return dwarf_attr(die, id, &da) && dwarf_formref_die(&da, value);
+}
+
+#define DEFINE_GET_STRING_ATTR(attr)                         \
+	static const char *get_##attr##_attr(Dwarf_Die *die) \
+	{                                                    \
+		Dwarf_Attribute da;                          \
+		if (dwarf_attr(die, DW_AT_##attr, &da))      \
+			return dwarf_formstring(&da);        \
+		return NULL;                                 \
+	}
+
+DEFINE_GET_STRING_ATTR(name)
+DEFINE_GET_STRING_ATTR(linkage_name)
+
+static const char *get_symbol_name(Dwarf_Die *die)
+{
+	const char *name;
+
+	/* rustc uses DW_AT_linkage_name for exported symbols */
+	name = get_linkage_name_attr(die);
+	if (!name)
+		name = get_name_attr(die);
+
+	return name;
+}
+
+static bool match_export_symbol(struct state *state, Dwarf_Die *die)
+{
+	Dwarf_Die *source = die;
+	Dwarf_Die origin;
+
+	/* If the DIE has an abstract origin, use it for type information. */
+	if (get_ref_die_attr(die, DW_AT_abstract_origin, &origin))
+		source = &origin;
+
+	state->sym = symbol_get(get_symbol_name(die));
+
+	/* Look up using the origin name if there are no matches. */
+	if (!state->sym && source != die)
+		state->sym = symbol_get(get_symbol_name(source));
+
+	state->die = *source;
+	return !!state->sym;
+}
+
+/*
+ * Type string processing
+ */
+static void process(const char *s)
+{
+	s = s ?: "<null>";
+
+	if (dump_dies)
+		fputs(s, stderr);
+}
+
+bool match_all(Dwarf_Die *die)
+{
+	return true;
+}
+
+int process_die_container(struct state *state, Dwarf_Die *die,
+			  die_callback_t func, die_match_callback_t match)
+{
+	Dwarf_Die current;
+	int res;
+
+	res = checkp(dwarf_child(die, &current));
+	while (!res) {
+		if (match(&current)) {
+			/* <0 = error, 0 = continue, >0 = stop */
+			res = checkp(func(state, &current));
+			if (res)
+				return res;
+		}
+
+		res = checkp(dwarf_siblingof(&current, &current));
+	}
+
+	return 0;
+}
+
+/*
+ * Exported symbol processing
+ */
+static void process_symbol(struct state *state, Dwarf_Die *die,
+			   die_callback_t process_func)
+{
+	debug("%s", state->sym->name);
+	check(process_func(state, die));
+	if (dump_dies)
+		fputs("\n", stderr);
+}
+
+static int __process_subprogram(struct state *state, Dwarf_Die *die)
+{
+	process("subprogram");
+	return 0;
+}
+
+static void process_subprogram(struct state *state, Dwarf_Die *die)
+{
+	process_symbol(state, die, __process_subprogram);
+}
+
+static int __process_variable(struct state *state, Dwarf_Die *die)
+{
+	process("variable ");
+	return 0;
+}
+
+static void process_variable(struct state *state, Dwarf_Die *die)
+{
+	process_symbol(state, die, __process_variable);
+}
+
+static int process_exported_symbols(struct state *unused, Dwarf_Die *die)
+{
+	int tag = dwarf_tag(die);
+
+	switch (tag) {
+	/* Possible containers of exported symbols */
+	case DW_TAG_namespace:
+	case DW_TAG_class_type:
+	case DW_TAG_structure_type:
+		return check(process_die_container(
+			NULL, die, process_exported_symbols, match_all));
+
+	/* Possible exported symbols */
+	case DW_TAG_subprogram:
+	case DW_TAG_variable: {
+		struct state state;
+
+		if (!match_export_symbol(&state, die))
+			return 0;
+
+		if (tag == DW_TAG_subprogram)
+			process_subprogram(&state, &state.die);
+		else
+			process_variable(&state, &state.die);
+
+		return 0;
+	}
+	default:
+		return 0;
+	}
+}
+
+void process_cu(Dwarf_Die *cudie)
+{
+	check(process_die_container(NULL, cudie, process_exported_symbols,
+				    match_all));
+}
diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c
new file mode 100644
index 0000000000000..a1d13353c6bc7
--- /dev/null
+++ b/scripts/gendwarfksyms/gendwarfksyms.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Google LLC
+ */
+
+#include <fcntl.h>
+#include <getopt.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include "gendwarfksyms.h"
+
+/*
+ * Options
+ */
+
+/* Print debugging information to stderr */
+int debug;
+/* Dump DIE contents */
+int dump_dies;
+
+static void usage(void)
+{
+	fputs("Usage: gendwarfksyms [options] elf-object-file ... < symbol-list\n\n"
+	      "Options:\n"
+	      "  -d, --debug          Print debugging information\n"
+	      "      --dump-dies      Dump DWARF DIE contents\n"
+	      "  -h, --help           Print this message\n"
+	      "\n",
+	      stderr);
+}
+
+static int process_module(Dwfl_Module *mod, void **userdata, const char *name,
+			  Dwarf_Addr base, void *arg)
+{
+	Dwarf_Addr dwbias;
+	Dwarf_Die cudie;
+	Dwarf_CU *cu = NULL;
+	Dwarf *dbg;
+	int res;
+
+	debug("%s", name);
+	dbg = dwfl_module_getdwarf(mod, &dwbias);
+
+	do {
+		res = dwarf_get_units(dbg, cu, &cu, NULL, NULL, &cudie, NULL);
+		if (res < 0)
+			error("dwarf_get_units failed: no debugging information?");
+		if (res == 1)
+			break; /* No more units */
+
+		process_cu(&cudie);
+	} while (cu);
+
+	return DWARF_CB_OK;
+}
+
+static const Dwfl_Callbacks callbacks = {
+	.section_address = dwfl_offline_section_address,
+	.find_debuginfo = dwfl_standard_find_debuginfo,
+};
+
+int main(int argc, char **argv)
+{
+	unsigned int n;
+	int opt;
+
+	static const struct option opts[] = {
+		{ "debug", 0, NULL, 'd' },
+		{ "dump-dies", 0, &dump_dies, 1 },
+		{ "help", 0, NULL, 'h' },
+		{ 0, 0, NULL, 0 }
+	};
+
+	while ((opt = getopt_long(argc, argv, "dh", opts, NULL)) != EOF) {
+		switch (opt) {
+		case 0:
+			break;
+		case 'd':
+			debug = 1;
+			break;
+		case 'h':
+			usage();
+			return 0;
+		default:
+			usage();
+			return 1;
+		}
+	}
+
+	if (optind >= argc) {
+		usage();
+		error("no input files?");
+	}
+
+	symbol_read_exports(stdin);
+
+	for (n = optind; n < argc; n++) {
+		Dwfl *dwfl;
+		int fd;
+
+		fd = open(argv[n], O_RDONLY);
+		if (fd == -1)
+			error("open failed for '%s': %s", argv[n],
+			      strerror(errno));
+
+		dwfl = dwfl_begin(&callbacks);
+		if (!dwfl)
+			error("dwfl_begin failed for '%s': %s", argv[n],
+			      dwarf_errmsg(-1));
+
+		if (!dwfl_report_offline(dwfl, argv[n], argv[n], fd))
+			error("dwfl_report_offline failed for '%s': %s",
+			      argv[n], dwarf_errmsg(-1));
+
+		dwfl_report_end(dwfl, NULL, NULL);
+
+		if (dwfl_getmodules(dwfl, &process_module, NULL, 0))
+			error("dwfl_getmodules failed for '%s'", argv[n]);
+
+		dwfl_end(dwfl);
+	}
+
+	symbol_free();
+
+	return 0;
+}
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
new file mode 100644
index 0000000000000..5c8288c71fddb
--- /dev/null
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2024 Google LLC
+ */
+
+#include <dwarf.h>
+#include <elfutils/libdw.h>
+#include <elfutils/libdwfl.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <hash.h>
+#include <hashtable.h>
+#include <xalloc.h>
+
+#ifndef __GENDWARFKSYMS_H
+#define __GENDWARFKSYMS_H
+
+/*
+ * Options -- in gendwarfksyms.c
+ */
+extern int debug;
+extern int dump_dies;
+
+/*
+ * Output helpers
+ */
+#define __PREFIX "gendwarfksyms: "
+#define __println(prefix, format, ...)                                \
+	fprintf(stderr, prefix __PREFIX "%s: " format "\n", __func__, \
+		##__VA_ARGS__)
+
+#define debug(format, ...)                                    \
+	do {                                                  \
+		if (debug)                                    \
+			__println("", format, ##__VA_ARGS__); \
+	} while (0)
+
+#define warn(format, ...) __println("warning: ", format, ##__VA_ARGS__)
+#define error(format, ...)                                   \
+	do {                                                 \
+		__println("error: ", format, ##__VA_ARGS__); \
+		exit(1);                                     \
+	} while (0)
+
+/*
+ * Error handling helpers
+ */
+#define __check(expr, test)                                     \
+	({                                                      \
+		int __res = expr;                               \
+		if (test)                                       \
+			error("`%s` failed: %d", #expr, __res); \
+		__res;                                          \
+	})
+
+/* Error == non-zero values */
+#define check(expr) __check(expr, __res)
+/* Error == negative values */
+#define checkp(expr) __check(expr, __res < 0)
+
+/*
+ * symbols.c
+ */
+
+struct symbol {
+	const char *name;
+	struct hlist_node name_hash;
+};
+
+typedef void (*symbol_callback_t)(struct symbol *, void *arg);
+
+void symbol_read_exports(FILE *file);
+struct symbol *symbol_get(const char *name);
+void symbol_free(void);
+
+/*
+ * dwarf.c
+ */
+
+struct state {
+	struct symbol *sym;
+	Dwarf_Die die;
+};
+
+typedef int (*die_callback_t)(struct state *state, Dwarf_Die *die);
+typedef bool (*die_match_callback_t)(Dwarf_Die *die);
+bool match_all(Dwarf_Die *die);
+
+int process_die_container(struct state *state, Dwarf_Die *die,
+			  die_callback_t func, die_match_callback_t match);
+
+void process_cu(Dwarf_Die *cudie);
+
+#endif /* __GENDWARFKSYMS_H */
diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c
new file mode 100644
index 0000000000000..592eacf726948
--- /dev/null
+++ b/scripts/gendwarfksyms/symbols.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Google LLC
+ */
+
+#include "gendwarfksyms.h"
+
+#define SYMBOL_HASH_BITS 12
+
+/* name -> struct symbol */
+static HASHTABLE_DEFINE(symbol_names, 1 << SYMBOL_HASH_BITS);
+
+static unsigned int for_each(const char *name, symbol_callback_t func,
+			     void *data)
+{
+	struct hlist_node *tmp;
+	struct symbol *match;
+
+	if (!name || !*name)
+		return 0;
+
+	hash_for_each_possible_safe(symbol_names, match, tmp, name_hash,
+				    hash_str(name)) {
+		if (strcmp(match->name, name))
+			continue;
+
+		if (func)
+			func(match, data);
+
+		return 1;
+	}
+
+	return 0;
+}
+
+static bool is_exported(const char *name)
+{
+	return for_each(name, NULL, NULL) > 0;
+}
+
+void symbol_read_exports(FILE *file)
+{
+	struct symbol *sym;
+	char *line = NULL;
+	char *name = NULL;
+	size_t size = 0;
+	int nsym = 0;
+
+	while (getline(&line, &size, file) > 0) {
+		if (sscanf(line, "%ms\n", &name) != 1)
+			error("malformed input line: %s", line);
+
+		if (is_exported(name)) {
+			/* Ignore duplicates */
+			free(name);
+			continue;
+		}
+
+		sym = xcalloc(1, sizeof(struct symbol));
+		sym->name = name;
+
+		hash_add(symbol_names, &sym->name_hash, hash_str(sym->name));
+		++nsym;
+
+		debug("%s", sym->name);
+	}
+
+	free(line);
+	debug("%d exported symbols", nsym);
+}
+
+static void get_symbol(struct symbol *sym, void *arg)
+{
+	struct symbol **res = arg;
+
+	*res = sym;
+}
+
+struct symbol *symbol_get(const char *name)
+{
+	struct symbol *sym = NULL;
+
+	for_each(name, get_symbol, &sym);
+	return sym;
+}
+
+void symbol_free(void)
+{
+	struct hlist_node *tmp;
+	struct symbol *sym;
+
+	hash_for_each_safe(symbol_names, sym, tmp, name_hash) {
+		free((void *)sym->name);
+		free(sym);
+	}
+
+	hash_init(symbol_names);
+}

From e982abf43749529687dd1d07fa4f495902910cf2 Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Fri, 3 Jan 2025 20:45:24 +0000
Subject: [PATCH 054/368] gendwarfksyms: Add address matching

The compiler may choose not to emit type information in DWARF for all
aliases, but it's possible for each alias to be exported separately.
To ensure we find type information for the aliases as well, read
{section, address} tuples from the symbol table and match symbols also
by address.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/gendwarfksyms/gendwarfksyms.c |   2 +
 scripts/gendwarfksyms/gendwarfksyms.h |  13 +++
 scripts/gendwarfksyms/symbols.c       | 161 ++++++++++++++++++++++++++
 3 files changed, 176 insertions(+)

diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c
index a1d13353c6bc7..cd8bfe973a5cb 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.c
+++ b/scripts/gendwarfksyms/gendwarfksyms.c
@@ -105,6 +105,8 @@ int main(int argc, char **argv)
 			error("open failed for '%s': %s", argv[n],
 			      strerror(errno));
 
+		symbol_read_symtab(fd);
+
 		dwfl = dwfl_begin(&callbacks);
 		if (!dwfl)
 			error("dwfl_begin failed for '%s': %s", argv[n],
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
index 5c8288c71fddb..cb9fd78a58dac 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -63,14 +63,27 @@ extern int dump_dies;
  * symbols.c
  */
 
+static inline unsigned int addr_hash(uintptr_t addr)
+{
+	return hash_ptr((const void *)addr);
+}
+
+struct symbol_addr {
+	uint32_t section;
+	Elf64_Addr address;
+};
+
 struct symbol {
 	const char *name;
+	struct symbol_addr addr;
+	struct hlist_node addr_hash;
 	struct hlist_node name_hash;
 };
 
 typedef void (*symbol_callback_t)(struct symbol *, void *arg);
 
 void symbol_read_exports(FILE *file);
+void symbol_read_symtab(int fd);
 struct symbol *symbol_get(const char *name);
 void symbol_free(void);
 
diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c
index 592eacf726948..98febb524dd57 100644
--- a/scripts/gendwarfksyms/symbols.c
+++ b/scripts/gendwarfksyms/symbols.c
@@ -7,9 +7,38 @@
 
 #define SYMBOL_HASH_BITS 12
 
+/* struct symbol_addr -> struct symbol */
+static HASHTABLE_DEFINE(symbol_addrs, 1 << SYMBOL_HASH_BITS);
 /* name -> struct symbol */
 static HASHTABLE_DEFINE(symbol_names, 1 << SYMBOL_HASH_BITS);
 
+static inline unsigned int symbol_addr_hash(const struct symbol_addr *addr)
+{
+	return hash_32(addr->section ^ addr_hash(addr->address));
+}
+
+static unsigned int __for_each_addr(struct symbol *sym, symbol_callback_t func,
+				    void *data)
+{
+	struct hlist_node *tmp;
+	struct symbol *match = NULL;
+	unsigned int processed = 0;
+
+	hash_for_each_possible_safe(symbol_addrs, match, tmp, addr_hash,
+				    symbol_addr_hash(&sym->addr)) {
+		if (match == sym)
+			continue; /* Already processed */
+
+		if (match->addr.section == sym->addr.section &&
+		    match->addr.address == sym->addr.address) {
+			func(match, data);
+			++processed;
+		}
+	}
+
+	return processed;
+}
+
 static unsigned int for_each(const char *name, symbol_callback_t func,
 			     void *data)
 {
@@ -24,9 +53,13 @@ static unsigned int for_each(const char *name, symbol_callback_t func,
 		if (strcmp(match->name, name))
 			continue;
 
+		/* Call func for the match, and all address matches */
 		if (func)
 			func(match, data);
 
+		if (match->addr.section != SHN_UNDEF)
+			return __for_each_addr(match, func, data) + 1;
+
 		return 1;
 	}
 
@@ -58,6 +91,7 @@ void symbol_read_exports(FILE *file)
 
 		sym = xcalloc(1, sizeof(struct symbol));
 		sym->name = name;
+		sym->addr.section = SHN_UNDEF;
 
 		hash_add(symbol_names, &sym->name_hash, hash_str(sym->name));
 		++nsym;
@@ -84,6 +118,132 @@ struct symbol *symbol_get(const char *name)
 	return sym;
 }
 
+typedef void (*elf_symbol_callback_t)(const char *name, GElf_Sym *sym,
+				      Elf32_Word xndx, void *arg);
+
+static void elf_for_each_global(int fd, elf_symbol_callback_t func, void *arg)
+{
+	size_t sym_size;
+	GElf_Shdr shdr_mem;
+	GElf_Shdr *shdr;
+	Elf_Data *xndx_data = NULL;
+	Elf_Scn *scn;
+	Elf *elf;
+
+	if (elf_version(EV_CURRENT) != EV_CURRENT)
+		error("elf_version failed: %s", elf_errmsg(-1));
+
+	elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
+	if (!elf)
+		error("elf_begin failed: %s", elf_errmsg(-1));
+
+	scn = elf_nextscn(elf, NULL);
+
+	while (scn) {
+		shdr = gelf_getshdr(scn, &shdr_mem);
+		if (!shdr)
+			error("gelf_getshdr failed: %s", elf_errmsg(-1));
+
+		if (shdr->sh_type == SHT_SYMTAB_SHNDX) {
+			xndx_data = elf_getdata(scn, NULL);
+			if (!xndx_data)
+				error("elf_getdata failed: %s", elf_errmsg(-1));
+			break;
+		}
+
+		scn = elf_nextscn(elf, scn);
+	}
+
+	sym_size = gelf_fsize(elf, ELF_T_SYM, 1, EV_CURRENT);
+	scn = elf_nextscn(elf, NULL);
+
+	while (scn) {
+		shdr = gelf_getshdr(scn, &shdr_mem);
+		if (!shdr)
+			error("gelf_getshdr failed: %s", elf_errmsg(-1));
+
+		if (shdr->sh_type == SHT_SYMTAB) {
+			unsigned int nsyms;
+			unsigned int n;
+			Elf_Data *data = elf_getdata(scn, NULL);
+
+			if (!data)
+				error("elf_getdata failed: %s", elf_errmsg(-1));
+
+			if (shdr->sh_entsize != sym_size)
+				error("expected sh_entsize (%lu) to be %zu",
+				      shdr->sh_entsize, sym_size);
+
+			nsyms = shdr->sh_size / shdr->sh_entsize;
+
+			for (n = 1; n < nsyms; ++n) {
+				const char *name = NULL;
+				Elf32_Word xndx = 0;
+				GElf_Sym sym_mem;
+				GElf_Sym *sym;
+
+				sym = gelf_getsymshndx(data, xndx_data, n,
+						       &sym_mem, &xndx);
+				if (!sym)
+					error("gelf_getsymshndx failed: %s",
+					      elf_errmsg(-1));
+
+				if (GELF_ST_BIND(sym->st_info) == STB_LOCAL)
+					continue;
+
+				if (sym->st_shndx != SHN_XINDEX)
+					xndx = sym->st_shndx;
+
+				name = elf_strptr(elf, shdr->sh_link,
+						  sym->st_name);
+				if (!name)
+					error("elf_strptr failed: %s",
+					      elf_errmsg(-1));
+
+				/* Skip empty symbol names */
+				if (*name)
+					func(name, sym, xndx, arg);
+			}
+		}
+
+		scn = elf_nextscn(elf, scn);
+	}
+
+	check(elf_end(elf));
+}
+
+static void set_symbol_addr(struct symbol *sym, void *arg)
+{
+	struct symbol_addr *addr = arg;
+
+	if (sym->addr.section == SHN_UNDEF) {
+		sym->addr = *addr;
+		hash_add(symbol_addrs, &sym->addr_hash,
+			 symbol_addr_hash(&sym->addr));
+
+		debug("%s -> { %u, %lx }", sym->name, sym->addr.section,
+		      sym->addr.address);
+	} else if (sym->addr.section != addr->section ||
+		   sym->addr.address != addr->address) {
+		warn("multiple addresses for symbol %s?", sym->name);
+	}
+}
+
+static void elf_set_symbol_addr(const char *name, GElf_Sym *sym,
+				Elf32_Word xndx, void *arg)
+{
+	struct symbol_addr addr = { .section = xndx, .address = sym->st_value };
+
+	/* Set addresses for exported symbols */
+	if (addr.section != SHN_UNDEF)
+		for_each(name, set_symbol_addr, &addr);
+}
+
+void symbol_read_symtab(int fd)
+{
+	elf_for_each_global(fd, elf_set_symbol_addr, NULL);
+}
+
 void symbol_free(void)
 {
 	struct hlist_node *tmp;
@@ -94,5 +254,6 @@ void symbol_free(void)
 		free(sym);
 	}
 
+	hash_init(symbol_addrs);
 	hash_init(symbol_names);
 }

From 5b7780e86857f70249df2c4f8982cad3ba931eee Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Fri, 3 Jan 2025 20:45:25 +0000
Subject: [PATCH 055/368] gendwarfksyms: Expand base_type

Start making gendwarfksyms more useful by adding support for
expanding DW_TAG_base_type types and basic DWARF attributes.

Example:

  $ echo loops_per_jiffy | \
      scripts/gendwarfksyms/gendwarfksyms \
        --debug --dump-dies vmlinux.o
  ...
  gendwarfksyms: process_symbol: loops_per_jiffy
  variable base_type unsigned long byte_size(8) encoding(7)
  ...

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/gendwarfksyms/dwarf.c | 160 ++++++++++++++++++++++++++++++++++
 1 file changed, 160 insertions(+)

diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index 81df3e2ad3aed..74e75b8ec8913 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -3,8 +3,21 @@
  * Copyright (C) 2024 Google LLC
  */
 
+#include <inttypes.h>
+#include <stdarg.h>
 #include "gendwarfksyms.h"
 
+#define DEFINE_GET_ATTR(attr, type)                                    \
+	static bool get_##attr##_attr(Dwarf_Die *die, unsigned int id, \
+				      type *value)                     \
+	{                                                              \
+		Dwarf_Attribute da;                                    \
+		return dwarf_attr(die, id, &da) &&                     \
+		       !dwarf_form##attr(&da, value);                  \
+	}
+
+DEFINE_GET_ATTR(udata, Dwarf_Word)
+
 static bool get_ref_die_attr(Dwarf_Die *die, unsigned int id, Dwarf_Die *value)
 {
 	Dwarf_Attribute da;
@@ -67,6 +80,109 @@ static void process(const char *s)
 		fputs(s, stderr);
 }
 
+#define MAX_FMT_BUFFER_SIZE 128
+
+static void process_fmt(const char *fmt, ...)
+{
+	char buf[MAX_FMT_BUFFER_SIZE];
+	va_list args;
+
+	va_start(args, fmt);
+
+	if (checkp(vsnprintf(buf, sizeof(buf), fmt, args)) >= sizeof(buf))
+		error("vsnprintf overflow: increase MAX_FMT_BUFFER_SIZE");
+
+	process(buf);
+	va_end(args);
+}
+
+#define MAX_FQN_SIZE 64
+
+/* Get a fully qualified name from DWARF scopes */
+static char *get_fqn(Dwarf_Die *die)
+{
+	const char *list[MAX_FQN_SIZE];
+	Dwarf_Die *scopes = NULL;
+	bool has_name = false;
+	char *fqn = NULL;
+	char *p;
+	int count = 0;
+	int len = 0;
+	int res;
+	int i;
+
+	res = checkp(dwarf_getscopes_die(die, &scopes));
+	if (!res) {
+		list[count] = get_name_attr(die);
+
+		if (!list[count])
+			return NULL;
+
+		len += strlen(list[count]);
+		count++;
+
+		goto done;
+	}
+
+	for (i = res - 1; i >= 0 && count < MAX_FQN_SIZE; i--) {
+		if (dwarf_tag(&scopes[i]) == DW_TAG_compile_unit)
+			continue;
+
+		list[count] = get_name_attr(&scopes[i]);
+
+		if (list[count]) {
+			has_name = true;
+		} else {
+			list[count] = "<anonymous>";
+			has_name = false;
+		}
+
+		len += strlen(list[count]);
+		count++;
+
+		if (i > 0) {
+			list[count++] = "::";
+			len += 2;
+		}
+	}
+
+	free(scopes);
+
+	if (count == MAX_FQN_SIZE)
+		warn("increase MAX_FQN_SIZE: reached the maximum");
+
+	/* Consider the DIE unnamed if the last scope doesn't have a name */
+	if (!has_name)
+		return NULL;
+done:
+	fqn = xmalloc(len + 1);
+	*fqn = '\0';
+
+	p = fqn;
+	for (i = 0; i < count; i++)
+		p = stpcpy(p, list[i]);
+
+	return fqn;
+}
+
+static void process_fqn(Dwarf_Die *die)
+{
+	process(" ");
+	process(get_fqn(die) ?: "");
+}
+
+#define DEFINE_PROCESS_UDATA_ATTRIBUTE(attribute)                           \
+	static void process_##attribute##_attr(Dwarf_Die *die)              \
+	{                                                                   \
+		Dwarf_Word value;                                           \
+		if (get_udata_attr(die, DW_AT_##attribute, &value))         \
+			process_fmt(" " #attribute "(%" PRIu64 ")", value); \
+	}
+
+DEFINE_PROCESS_UDATA_ATTRIBUTE(alignment)
+DEFINE_PROCESS_UDATA_ATTRIBUTE(byte_size)
+DEFINE_PROCESS_UDATA_ATTRIBUTE(encoding)
+
 bool match_all(Dwarf_Die *die)
 {
 	return true;
@@ -93,6 +209,49 @@ int process_die_container(struct state *state, Dwarf_Die *die,
 	return 0;
 }
 
+static int process_type(struct state *state, Dwarf_Die *die);
+
+static void process_type_attr(struct state *state, Dwarf_Die *die)
+{
+	Dwarf_Die type;
+
+	if (get_ref_die_attr(die, DW_AT_type, &type)) {
+		check(process_type(state, &type));
+		return;
+	}
+
+	/* Compilers can omit DW_AT_type -- print out 'void' to clarify */
+	process("base_type void");
+}
+
+static void process_base_type(struct state *state, Dwarf_Die *die)
+{
+	process("base_type");
+	process_fqn(die);
+	process_byte_size_attr(die);
+	process_encoding_attr(die);
+	process_alignment_attr(die);
+}
+
+#define PROCESS_TYPE(type)                         \
+	case DW_TAG_##type##_type:                 \
+		process_##type##_type(state, die); \
+		break;
+
+static int process_type(struct state *state, Dwarf_Die *die)
+{
+	int tag = dwarf_tag(die);
+
+	switch (tag) {
+	PROCESS_TYPE(base)
+	default:
+		debug("unimplemented type: %x", tag);
+		break;
+	}
+
+	return 0;
+}
+
 /*
  * Exported symbol processing
  */
@@ -119,6 +278,7 @@ static void process_subprogram(struct state *state, Dwarf_Die *die)
 static int __process_variable(struct state *state, Dwarf_Die *die)
 {
 	process("variable ");
+	process_type_attr(state, die);
 	return 0;
 }
 

From 0c1c76274e88c420779c3aea077f9812bd16edaa Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Fri, 3 Jan 2025 20:45:26 +0000
Subject: [PATCH 056/368] gendwarfksyms: Add a cache for processed DIEs

Basic types in DWARF repeat frequently and traversing the DIEs using
libdw is relatively slow. Add a simple hashtable based cache for the
processed DIEs.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/gendwarfksyms/Makefile        |   1 +
 scripts/gendwarfksyms/die.c           | 143 ++++++++++++++++++++++++++
 scripts/gendwarfksyms/dwarf.c         | 136 +++++++++++++++++-------
 scripts/gendwarfksyms/gendwarfksyms.c |   6 ++
 scripts/gendwarfksyms/gendwarfksyms.h |  63 +++++++++++-
 5 files changed, 308 insertions(+), 41 deletions(-)
 create mode 100644 scripts/gendwarfksyms/die.c

diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
index 9f8fec4fd39b9..c0d4ce50fc27e 100644
--- a/scripts/gendwarfksyms/Makefile
+++ b/scripts/gendwarfksyms/Makefile
@@ -2,6 +2,7 @@
 hostprogs-always-y += gendwarfksyms
 
 gendwarfksyms-objs += gendwarfksyms.o
+gendwarfksyms-objs += die.o
 gendwarfksyms-objs += dwarf.o
 gendwarfksyms-objs += symbols.o
 
diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c
new file mode 100644
index 0000000000000..b7d900c6a9c89
--- /dev/null
+++ b/scripts/gendwarfksyms/die.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Google LLC
+ */
+
+#include <string.h>
+#include "gendwarfksyms.h"
+
+#define DIE_HASH_BITS 15
+
+/* {die->addr, state} -> struct die * */
+static HASHTABLE_DEFINE(die_map, 1 << DIE_HASH_BITS);
+
+static unsigned int map_hits;
+static unsigned int map_misses;
+
+static inline unsigned int die_hash(uintptr_t addr, enum die_state state)
+{
+	return hash_32(addr_hash(addr) ^ (unsigned int)state);
+}
+
+static void init_die(struct die *cd)
+{
+	cd->state = DIE_INCOMPLETE;
+	cd->fqn = NULL;
+	cd->tag = -1;
+	cd->addr = 0;
+	INIT_LIST_HEAD(&cd->fragments);
+}
+
+static struct die *create_die(Dwarf_Die *die, enum die_state state)
+{
+	struct die *cd;
+
+	cd = xmalloc(sizeof(struct die));
+	init_die(cd);
+	cd->addr = (uintptr_t)die->addr;
+
+	hash_add(die_map, &cd->hash, die_hash(cd->addr, state));
+	return cd;
+}
+
+int __die_map_get(uintptr_t addr, enum die_state state, struct die **res)
+{
+	struct die *cd;
+
+	hash_for_each_possible(die_map, cd, hash, die_hash(addr, state)) {
+		if (cd->addr == addr && cd->state == state) {
+			*res = cd;
+			return 0;
+		}
+	}
+
+	return -1;
+}
+
+struct die *die_map_get(Dwarf_Die *die, enum die_state state)
+{
+	struct die *cd;
+
+	if (__die_map_get((uintptr_t)die->addr, state, &cd) == 0) {
+		map_hits++;
+		return cd;
+	}
+
+	map_misses++;
+	return create_die(die, state);
+}
+
+static void reset_die(struct die *cd)
+{
+	struct die_fragment *tmp;
+	struct die_fragment *df;
+
+	list_for_each_entry_safe(df, tmp, &cd->fragments, list) {
+		if (df->type == FRAGMENT_STRING)
+			free(df->data.str);
+		free(df);
+	}
+
+	if (cd->fqn && *cd->fqn)
+		free(cd->fqn);
+	init_die(cd);
+}
+
+void die_map_free(void)
+{
+	struct hlist_node *tmp;
+	unsigned int stats[DIE_LAST + 1];
+	struct die *cd;
+	int i;
+
+	memset(stats, 0, sizeof(stats));
+
+	hash_for_each_safe(die_map, cd, tmp, hash) {
+		stats[cd->state]++;
+		reset_die(cd);
+		free(cd);
+	}
+	hash_init(die_map);
+
+	if (map_hits + map_misses > 0)
+		debug("hits %u, misses %u (hit rate %.02f%%)", map_hits,
+		      map_misses,
+		      (100.0f * map_hits) / (map_hits + map_misses));
+
+	for (i = 0; i <= DIE_LAST; i++)
+		debug("%s: %u entries", die_state_name(i), stats[i]);
+}
+
+static struct die_fragment *append_item(struct die *cd)
+{
+	struct die_fragment *df;
+
+	df = xmalloc(sizeof(struct die_fragment));
+	df->type = FRAGMENT_EMPTY;
+	list_add_tail(&df->list, &cd->fragments);
+	return df;
+}
+
+void die_map_add_string(struct die *cd, const char *str)
+{
+	struct die_fragment *df;
+
+	if (!cd)
+		return;
+
+	df = append_item(cd);
+	df->data.str = xstrdup(str);
+	df->type = FRAGMENT_STRING;
+}
+
+void die_map_add_die(struct die *cd, struct die *child)
+{
+	struct die_fragment *df;
+
+	if (!cd)
+		return;
+
+	df = append_item(cd);
+	df->data.addr = child->addr;
+	df->type = FRAGMENT_DIE;
+}
diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index 74e75b8ec8913..f40e23a547dae 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -72,17 +72,19 @@ static bool match_export_symbol(struct state *state, Dwarf_Die *die)
 /*
  * Type string processing
  */
-static void process(const char *s)
+static void process(struct die *cache, const char *s)
 {
 	s = s ?: "<null>";
 
 	if (dump_dies)
 		fputs(s, stderr);
+
+	die_map_add_string(cache, s);
 }
 
 #define MAX_FMT_BUFFER_SIZE 128
 
-static void process_fmt(const char *fmt, ...)
+static void process_fmt(struct die *cache, const char *fmt, ...)
 {
 	char buf[MAX_FMT_BUFFER_SIZE];
 	va_list args;
@@ -92,7 +94,7 @@ static void process_fmt(const char *fmt, ...)
 	if (checkp(vsnprintf(buf, sizeof(buf), fmt, args)) >= sizeof(buf))
 		error("vsnprintf overflow: increase MAX_FMT_BUFFER_SIZE");
 
-	process(buf);
+	process(cache, buf);
 	va_end(args);
 }
 
@@ -165,18 +167,28 @@ static char *get_fqn(Dwarf_Die *die)
 	return fqn;
 }
 
-static void process_fqn(Dwarf_Die *die)
+static void update_fqn(struct die *cache, Dwarf_Die *die)
+{
+	if (!cache->fqn)
+		cache->fqn = get_fqn(die) ?: "";
+}
+
+static void process_fqn(struct die *cache, Dwarf_Die *die)
 {
-	process(" ");
-	process(get_fqn(die) ?: "");
+	update_fqn(cache, die);
+	if (*cache->fqn)
+		process(cache, " ");
+	process(cache, cache->fqn);
 }
 
-#define DEFINE_PROCESS_UDATA_ATTRIBUTE(attribute)                           \
-	static void process_##attribute##_attr(Dwarf_Die *die)              \
-	{                                                                   \
-		Dwarf_Word value;                                           \
-		if (get_udata_attr(die, DW_AT_##attribute, &value))         \
-			process_fmt(" " #attribute "(%" PRIu64 ")", value); \
+#define DEFINE_PROCESS_UDATA_ATTRIBUTE(attribute)                          \
+	static void process_##attribute##_attr(struct die *cache,          \
+					       Dwarf_Die *die)             \
+	{                                                                  \
+		Dwarf_Word value;                                          \
+		if (get_udata_attr(die, DW_AT_##attribute, &value))        \
+			process_fmt(cache, " " #attribute "(%" PRIu64 ")", \
+				    value);                                \
 	}
 
 DEFINE_PROCESS_UDATA_ATTRIBUTE(alignment)
@@ -188,8 +200,9 @@ bool match_all(Dwarf_Die *die)
 	return true;
 }
 
-int process_die_container(struct state *state, Dwarf_Die *die,
-			  die_callback_t func, die_match_callback_t match)
+int process_die_container(struct state *state, struct die *cache,
+			  Dwarf_Die *die, die_callback_t func,
+			  die_match_callback_t match)
 {
 	Dwarf_Die current;
 	int res;
@@ -198,7 +211,7 @@ int process_die_container(struct state *state, Dwarf_Die *die,
 	while (!res) {
 		if (match(&current)) {
 			/* <0 = error, 0 = continue, >0 = stop */
-			res = checkp(func(state, &current));
+			res = checkp(func(state, cache, &current));
 			if (res)
 				return res;
 		}
@@ -209,39 +222,78 @@ int process_die_container(struct state *state, Dwarf_Die *die,
 	return 0;
 }
 
-static int process_type(struct state *state, Dwarf_Die *die);
+static int process_type(struct state *state, struct die *parent,
+			Dwarf_Die *die);
 
-static void process_type_attr(struct state *state, Dwarf_Die *die)
+static void process_type_attr(struct state *state, struct die *cache,
+			      Dwarf_Die *die)
 {
 	Dwarf_Die type;
 
 	if (get_ref_die_attr(die, DW_AT_type, &type)) {
-		check(process_type(state, &type));
+		check(process_type(state, cache, &type));
 		return;
 	}
 
 	/* Compilers can omit DW_AT_type -- print out 'void' to clarify */
-	process("base_type void");
+	process(cache, "base_type void");
+}
+
+static void process_base_type(struct state *state, struct die *cache,
+			      Dwarf_Die *die)
+{
+	process(cache, "base_type");
+	process_fqn(cache, die);
+	process_byte_size_attr(cache, die);
+	process_encoding_attr(cache, die);
+	process_alignment_attr(cache, die);
 }
 
-static void process_base_type(struct state *state, Dwarf_Die *die)
+static void process_cached(struct state *state, struct die *cache,
+			   Dwarf_Die *die)
 {
-	process("base_type");
-	process_fqn(die);
-	process_byte_size_attr(die);
-	process_encoding_attr(die);
-	process_alignment_attr(die);
+	struct die_fragment *df;
+	Dwarf_Die child;
+
+	list_for_each_entry(df, &cache->fragments, list) {
+		switch (df->type) {
+		case FRAGMENT_STRING:
+			process(NULL, df->data.str);
+			break;
+		case FRAGMENT_DIE:
+			if (!dwarf_die_addr_die(dwarf_cu_getdwarf(die->cu),
+						(void *)df->data.addr, &child))
+				error("dwarf_die_addr_die failed");
+			check(process_type(state, NULL, &child));
+			break;
+		default:
+			error("empty die_fragment");
+		}
+	}
 }
 
-#define PROCESS_TYPE(type)                         \
-	case DW_TAG_##type##_type:                 \
-		process_##type##_type(state, die); \
+#define PROCESS_TYPE(type)                                \
+	case DW_TAG_##type##_type:                        \
+		process_##type##_type(state, cache, die); \
 		break;
 
-static int process_type(struct state *state, Dwarf_Die *die)
+static int process_type(struct state *state, struct die *parent, Dwarf_Die *die)
 {
+	struct die *cache;
 	int tag = dwarf_tag(die);
 
+	/*
+	 * If we have the DIE already cached, use it instead of walking
+	 * through DWARF.
+	 */
+	cache = die_map_get(die, DIE_COMPLETE);
+
+	if (cache->state == DIE_COMPLETE) {
+		process_cached(state, cache, die);
+		die_map_add_die(parent, cache);
+		return 0;
+	}
+
 	switch (tag) {
 	PROCESS_TYPE(base)
 	default:
@@ -249,6 +301,11 @@ static int process_type(struct state *state, Dwarf_Die *die)
 		break;
 	}
 
+	/* Update cache state and append to the parent (if any) */
+	cache->tag = tag;
+	cache->state = DIE_COMPLETE;
+	die_map_add_die(parent, cache);
+
 	return 0;
 }
 
@@ -259,14 +316,15 @@ static void process_symbol(struct state *state, Dwarf_Die *die,
 			   die_callback_t process_func)
 {
 	debug("%s", state->sym->name);
-	check(process_func(state, die));
+	check(process_func(state, NULL, die));
 	if (dump_dies)
 		fputs("\n", stderr);
 }
 
-static int __process_subprogram(struct state *state, Dwarf_Die *die)
+static int __process_subprogram(struct state *state, struct die *cache,
+				Dwarf_Die *die)
 {
-	process("subprogram");
+	process(cache, "subprogram");
 	return 0;
 }
 
@@ -275,10 +333,11 @@ static void process_subprogram(struct state *state, Dwarf_Die *die)
 	process_symbol(state, die, __process_subprogram);
 }
 
-static int __process_variable(struct state *state, Dwarf_Die *die)
+static int __process_variable(struct state *state, struct die *cache,
+			      Dwarf_Die *die)
 {
-	process("variable ");
-	process_type_attr(state, die);
+	process(cache, "variable ");
+	process_type_attr(state, cache, die);
 	return 0;
 }
 
@@ -287,7 +346,8 @@ static void process_variable(struct state *state, Dwarf_Die *die)
 	process_symbol(state, die, __process_variable);
 }
 
-static int process_exported_symbols(struct state *unused, Dwarf_Die *die)
+static int process_exported_symbols(struct state *unused, struct die *cache,
+				    Dwarf_Die *die)
 {
 	int tag = dwarf_tag(die);
 
@@ -297,7 +357,7 @@ static int process_exported_symbols(struct state *unused, Dwarf_Die *die)
 	case DW_TAG_class_type:
 	case DW_TAG_structure_type:
 		return check(process_die_container(
-			NULL, die, process_exported_symbols, match_all));
+			NULL, cache, die, process_exported_symbols, match_all));
 
 	/* Possible exported symbols */
 	case DW_TAG_subprogram:
@@ -321,6 +381,6 @@ static int process_exported_symbols(struct state *unused, Dwarf_Die *die)
 
 void process_cu(Dwarf_Die *cudie)
 {
-	check(process_die_container(NULL, cudie, process_exported_symbols,
+	check(process_die_container(NULL, NULL, cudie, process_exported_symbols,
 				    match_all));
 }
diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c
index cd8bfe973a5cb..3809db840c06f 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.c
+++ b/scripts/gendwarfksyms/gendwarfksyms.c
@@ -43,6 +43,10 @@ static int process_module(Dwfl_Module *mod, void **userdata, const char *name,
 	debug("%s", name);
 	dbg = dwfl_module_getdwarf(mod, &dwbias);
 
+	/*
+	 * Look for exported symbols in each CU, follow the DIE tree, and add
+	 * the entries to die_map.
+	 */
 	do {
 		res = dwarf_get_units(dbg, cu, &cu, NULL, NULL, &cudie, NULL);
 		if (res < 0)
@@ -53,6 +57,8 @@ static int process_module(Dwfl_Module *mod, void **userdata, const char *name,
 		process_cu(&cudie);
 	} while (cu);
 
+	die_map_free();
+
 	return DWARF_CB_OK;
 }
 
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
index cb9fd78a58dac..601f877bc8cab 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -87,6 +87,61 @@ void symbol_read_symtab(int fd);
 struct symbol *symbol_get(const char *name);
 void symbol_free(void);
 
+/*
+ * die.c
+ */
+
+enum die_state {
+	DIE_INCOMPLETE,
+	DIE_COMPLETE,
+	DIE_LAST = DIE_COMPLETE
+};
+
+enum die_fragment_type {
+	FRAGMENT_EMPTY,
+	FRAGMENT_STRING,
+	FRAGMENT_DIE
+};
+
+struct die_fragment {
+	enum die_fragment_type type;
+	union {
+		char *str;
+		uintptr_t addr;
+	} data;
+	struct list_head list;
+};
+
+#define CASE_CONST_TO_STR(name) \
+	case name:              \
+		return #name;
+
+static inline const char *die_state_name(enum die_state state)
+{
+	switch (state) {
+	CASE_CONST_TO_STR(DIE_INCOMPLETE)
+	CASE_CONST_TO_STR(DIE_COMPLETE)
+	}
+
+	error("unexpected die_state: %d", state);
+}
+
+struct die {
+	enum die_state state;
+	char *fqn;
+	int tag;
+	uintptr_t addr;
+	struct list_head fragments;
+	struct hlist_node hash;
+};
+
+int __die_map_get(uintptr_t addr, enum die_state state, struct die **res);
+struct die *die_map_get(Dwarf_Die *die, enum die_state state);
+void die_map_add_string(struct die *pd, const char *str);
+void die_map_add_linebreak(struct die *pd, int linebreak);
+void die_map_add_die(struct die *pd, struct die *child);
+void die_map_free(void);
+
 /*
  * dwarf.c
  */
@@ -96,12 +151,14 @@ struct state {
 	Dwarf_Die die;
 };
 
-typedef int (*die_callback_t)(struct state *state, Dwarf_Die *die);
+typedef int (*die_callback_t)(struct state *state, struct die *cache,
+			      Dwarf_Die *die);
 typedef bool (*die_match_callback_t)(Dwarf_Die *die);
 bool match_all(Dwarf_Die *die);
 
-int process_die_container(struct state *state, Dwarf_Die *die,
-			  die_callback_t func, die_match_callback_t match);
+int process_die_container(struct state *state, struct die *cache,
+			  Dwarf_Die *die, die_callback_t func,
+			  die_match_callback_t match);
 
 void process_cu(Dwarf_Die *cudie);
 

From 06b8b036ab9c1e70a562705a398bcd271e0b5ebf Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Fri, 3 Jan 2025 20:45:27 +0000
Subject: [PATCH 057/368] gendwarfksyms: Expand type modifiers and typedefs

Add support for expanding DWARF type modifiers, such as pointers,
const values etc., and typedefs. These types all have DW_AT_type
attribute pointing to the underlying type, and thus produce similar
output.

Also add linebreaks and indentation to debugging output to make it
more readable.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/gendwarfksyms/die.c           | 12 +++++
 scripts/gendwarfksyms/dwarf.c         | 67 +++++++++++++++++++++++++++
 scripts/gendwarfksyms/gendwarfksyms.h |  5 ++
 3 files changed, 84 insertions(+)

diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c
index b7d900c6a9c89..0d70e02d02b5b 100644
--- a/scripts/gendwarfksyms/die.c
+++ b/scripts/gendwarfksyms/die.c
@@ -130,6 +130,18 @@ void die_map_add_string(struct die *cd, const char *str)
 	df->type = FRAGMENT_STRING;
 }
 
+void die_map_add_linebreak(struct die *cd, int linebreak)
+{
+	struct die_fragment *df;
+
+	if (!cd)
+		return;
+
+	df = append_item(cd);
+	df->data.linebreak = linebreak;
+	df->type = FRAGMENT_LINEBREAK;
+}
+
 void die_map_add_die(struct die *cd, struct die *child)
 {
 	struct die_fragment *df;
diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index f40e23a547dae..3e08a32b7b16c 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -7,6 +7,17 @@
 #include <stdarg.h>
 #include "gendwarfksyms.h"
 
+static bool do_linebreak;
+static int indentation_level;
+
+/* Line breaks and indentation for pretty-printing */
+static void process_linebreak(struct die *cache, int n)
+{
+	indentation_level += n;
+	do_linebreak = true;
+	die_map_add_linebreak(cache, n);
+}
+
 #define DEFINE_GET_ATTR(attr, type)                                    \
 	static bool get_##attr##_attr(Dwarf_Die *die, unsigned int id, \
 				      type *value)                     \
@@ -76,6 +87,12 @@ static void process(struct die *cache, const char *s)
 {
 	s = s ?: "<null>";
 
+	if (dump_dies && do_linebreak) {
+		fputs("\n", stderr);
+		for (int i = 0; i < indentation_level; i++)
+			fputs("  ", stderr);
+		do_linebreak = false;
+	}
 	if (dump_dies)
 		fputs(s, stderr);
 
@@ -239,6 +256,40 @@ static void process_type_attr(struct state *state, struct die *cache,
 	process(cache, "base_type void");
 }
 
+/* Container types with DW_AT_type */
+static void __process_type(struct state *state, struct die *cache,
+			   Dwarf_Die *die, const char *type)
+{
+	process(cache, type);
+	process_fqn(cache, die);
+	process(cache, " {");
+	process_linebreak(cache, 1);
+	process_type_attr(state, cache, die);
+	process_linebreak(cache, -1);
+	process(cache, "}");
+	process_byte_size_attr(cache, die);
+	process_alignment_attr(cache, die);
+}
+
+#define DEFINE_PROCESS_TYPE(type)                                            \
+	static void process_##type##_type(struct state *state,               \
+					  struct die *cache, Dwarf_Die *die) \
+	{                                                                    \
+		__process_type(state, cache, die, #type "_type");            \
+	}
+
+DEFINE_PROCESS_TYPE(atomic)
+DEFINE_PROCESS_TYPE(const)
+DEFINE_PROCESS_TYPE(immutable)
+DEFINE_PROCESS_TYPE(packed)
+DEFINE_PROCESS_TYPE(pointer)
+DEFINE_PROCESS_TYPE(reference)
+DEFINE_PROCESS_TYPE(restrict)
+DEFINE_PROCESS_TYPE(rvalue_reference)
+DEFINE_PROCESS_TYPE(shared)
+DEFINE_PROCESS_TYPE(volatile)
+DEFINE_PROCESS_TYPE(typedef)
+
 static void process_base_type(struct state *state, struct die *cache,
 			      Dwarf_Die *die)
 {
@@ -260,6 +311,9 @@ static void process_cached(struct state *state, struct die *cache,
 		case FRAGMENT_STRING:
 			process(NULL, df->data.str);
 			break;
+		case FRAGMENT_LINEBREAK:
+			process_linebreak(NULL, df->data.linebreak);
+			break;
 		case FRAGMENT_DIE:
 			if (!dwarf_die_addr_die(dwarf_cu_getdwarf(die->cu),
 						(void *)df->data.addr, &child))
@@ -295,7 +349,20 @@ static int process_type(struct state *state, struct die *parent, Dwarf_Die *die)
 	}
 
 	switch (tag) {
+	/* Type modifiers */
+	PROCESS_TYPE(atomic)
+	PROCESS_TYPE(const)
+	PROCESS_TYPE(immutable)
+	PROCESS_TYPE(packed)
+	PROCESS_TYPE(pointer)
+	PROCESS_TYPE(reference)
+	PROCESS_TYPE(restrict)
+	PROCESS_TYPE(rvalue_reference)
+	PROCESS_TYPE(shared)
+	PROCESS_TYPE(volatile)
+	/* Other types */
 	PROCESS_TYPE(base)
+	PROCESS_TYPE(typedef)
 	default:
 		debug("unimplemented type: %x", tag);
 		break;
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
index 601f877bc8cab..832d05b4fc1cf 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -59,6 +59,9 @@ extern int dump_dies;
 /* Error == negative values */
 #define checkp(expr) __check(expr, __res < 0)
 
+/* Consistent aliases (DW_TAG_<type>_type) for DWARF tags */
+#define DW_TAG_typedef_type DW_TAG_typedef
+
 /*
  * symbols.c
  */
@@ -100,6 +103,7 @@ enum die_state {
 enum die_fragment_type {
 	FRAGMENT_EMPTY,
 	FRAGMENT_STRING,
+	FRAGMENT_LINEBREAK,
 	FRAGMENT_DIE
 };
 
@@ -107,6 +111,7 @@ struct die_fragment {
 	enum die_fragment_type type;
 	union {
 		char *str;
+		int linebreak;
 		uintptr_t addr;
 	} data;
 	struct list_head list;

From 220a0857f3a89e0dce3fc7c38d981df41c4537a7 Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Fri, 3 Jan 2025 20:45:28 +0000
Subject: [PATCH 058/368] gendwarfksyms: Expand subroutine_type

Add support for expanding DW_TAG_subroutine_type and the parameters
in DW_TAG_formal_parameter. Use this to also expand subprograms.

Example output with --dump-dies:

  subprogram (
    formal_parameter pointer_type {
      const_type {
        base_type char byte_size(1) encoding(6)
      }
    }
  )
  -> base_type unsigned long byte_size(8) encoding(7)

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/gendwarfksyms/dwarf.c         | 84 ++++++++++++++++++++++++++-
 scripts/gendwarfksyms/gendwarfksyms.h |  4 ++
 2 files changed, 85 insertions(+), 3 deletions(-)

diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index 3e08a32b7b16c..7d8a4eb6c387a 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -212,6 +212,15 @@ DEFINE_PROCESS_UDATA_ATTRIBUTE(alignment)
 DEFINE_PROCESS_UDATA_ATTRIBUTE(byte_size)
 DEFINE_PROCESS_UDATA_ATTRIBUTE(encoding)
 
+/* Match functions -- die_match_callback_t */
+#define DEFINE_MATCH(type)                                     \
+	static bool match_##type##_type(Dwarf_Die *die)        \
+	{                                                      \
+		return dwarf_tag(die) == DW_TAG_##type##_type; \
+	}
+
+DEFINE_MATCH(formal_parameter)
+
 bool match_all(Dwarf_Die *die)
 {
 	return true;
@@ -224,19 +233,28 @@ int process_die_container(struct state *state, struct die *cache,
 	Dwarf_Die current;
 	int res;
 
+	/* Track the first item in lists. */
+	if (state)
+		state->first_list_item = true;
+
 	res = checkp(dwarf_child(die, &current));
 	while (!res) {
 		if (match(&current)) {
 			/* <0 = error, 0 = continue, >0 = stop */
 			res = checkp(func(state, cache, &current));
 			if (res)
-				return res;
+				goto out;
 		}
 
 		res = checkp(dwarf_siblingof(&current, &current));
 	}
 
-	return 0;
+	res = 0;
+out:
+	if (state)
+		state->first_list_item = false;
+
+	return res;
 }
 
 static int process_type(struct state *state, struct die *parent,
@@ -256,6 +274,40 @@ static void process_type_attr(struct state *state, struct die *cache,
 	process(cache, "base_type void");
 }
 
+static void process_list_comma(struct state *state, struct die *cache)
+{
+	if (state->first_list_item) {
+		state->first_list_item = false;
+	} else {
+		process(cache, " ,");
+		process_linebreak(cache, 0);
+	}
+}
+
+/* Comma-separated with DW_AT_type */
+static void __process_list_type(struct state *state, struct die *cache,
+				Dwarf_Die *die, const char *type)
+{
+	const char *name = get_name_attr(die);
+
+	process_list_comma(state, cache);
+	process(cache, type);
+	process_type_attr(state, cache, die);
+	if (name) {
+		process(cache, " ");
+		process(cache, name);
+	}
+}
+
+#define DEFINE_PROCESS_LIST_TYPE(type)                                       \
+	static void process_##type##_type(struct state *state,               \
+					  struct die *cache, Dwarf_Die *die) \
+	{                                                                    \
+		__process_list_type(state, cache, die, #type " ");           \
+	}
+
+DEFINE_PROCESS_LIST_TYPE(formal_parameter)
+
 /* Container types with DW_AT_type */
 static void __process_type(struct state *state, struct die *cache,
 			   Dwarf_Die *die, const char *type)
@@ -290,6 +342,29 @@ DEFINE_PROCESS_TYPE(shared)
 DEFINE_PROCESS_TYPE(volatile)
 DEFINE_PROCESS_TYPE(typedef)
 
+static void __process_subroutine_type(struct state *state, struct die *cache,
+				      Dwarf_Die *die, const char *type)
+{
+	process(cache, type);
+	process(cache, " (");
+	process_linebreak(cache, 1);
+	/* Parameters */
+	check(process_die_container(state, cache, die, process_type,
+				    match_formal_parameter_type));
+	process_linebreak(cache, -1);
+	process(cache, ")");
+	process_linebreak(cache, 0);
+	/* Return type */
+	process(cache, "-> ");
+	process_type_attr(state, cache, die);
+}
+
+static void process_subroutine_type(struct state *state, struct die *cache,
+				    Dwarf_Die *die)
+{
+	__process_subroutine_type(state, cache, die, "subroutine_type");
+}
+
 static void process_base_type(struct state *state, struct die *cache,
 			      Dwarf_Die *die)
 {
@@ -360,8 +435,11 @@ static int process_type(struct state *state, struct die *parent, Dwarf_Die *die)
 	PROCESS_TYPE(rvalue_reference)
 	PROCESS_TYPE(shared)
 	PROCESS_TYPE(volatile)
+	/* Subtypes */
+	PROCESS_TYPE(formal_parameter)
 	/* Other types */
 	PROCESS_TYPE(base)
+	PROCESS_TYPE(subroutine)
 	PROCESS_TYPE(typedef)
 	default:
 		debug("unimplemented type: %x", tag);
@@ -391,7 +469,7 @@ static void process_symbol(struct state *state, Dwarf_Die *die,
 static int __process_subprogram(struct state *state, struct die *cache,
 				Dwarf_Die *die)
 {
-	process(cache, "subprogram");
+	__process_subroutine_type(state, cache, die, "subprogram");
 	return 0;
 }
 
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
index 832d05b4fc1cf..0746a36f4924c 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -60,6 +60,7 @@ extern int dump_dies;
 #define checkp(expr) __check(expr, __res < 0)
 
 /* Consistent aliases (DW_TAG_<type>_type) for DWARF tags */
+#define DW_TAG_formal_parameter_type DW_TAG_formal_parameter
 #define DW_TAG_typedef_type DW_TAG_typedef
 
 /*
@@ -154,6 +155,9 @@ void die_map_free(void);
 struct state {
 	struct symbol *sym;
 	Dwarf_Die die;
+
+	/* List expansion */
+	bool first_list_item;
 };
 
 typedef int (*die_callback_t)(struct state *state, struct die *cache,

From c772f1d1eaac608c083ee79fd5cfbe879958eb3e Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Fri, 3 Jan 2025 20:45:29 +0000
Subject: [PATCH 059/368] gendwarfksyms: Expand array_type

Add support for expanding DW_TAG_array_type, and the subrange type
indicating array size.

Example source code:

  const char *s[34];

Output with --dump-dies:

  variable array_type[34] {
    pointer_type {
      const_type {
        base_type char byte_size(1) encoding(6)
      }
    } byte_size(8)
  }

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/gendwarfksyms/dwarf.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index 7d8a4eb6c387a..46ce17b2459b8 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -220,6 +220,7 @@ DEFINE_PROCESS_UDATA_ATTRIBUTE(encoding)
 	}
 
 DEFINE_MATCH(formal_parameter)
+DEFINE_MATCH(subrange)
 
 bool match_all(Dwarf_Die *die)
 {
@@ -342,6 +343,33 @@ DEFINE_PROCESS_TYPE(shared)
 DEFINE_PROCESS_TYPE(volatile)
 DEFINE_PROCESS_TYPE(typedef)
 
+static void process_subrange_type(struct state *state, struct die *cache,
+				  Dwarf_Die *die)
+{
+	Dwarf_Word count = 0;
+
+	if (get_udata_attr(die, DW_AT_count, &count))
+		process_fmt(cache, "[%" PRIu64 "]", count);
+	else if (get_udata_attr(die, DW_AT_upper_bound, &count))
+		process_fmt(cache, "[%" PRIu64 "]", count + 1);
+	else
+		process(cache, "[]");
+}
+
+static void process_array_type(struct state *state, struct die *cache,
+			       Dwarf_Die *die)
+{
+	process(cache, "array_type");
+	/* Array size */
+	check(process_die_container(state, cache, die, process_type,
+				    match_subrange_type));
+	process(cache, " {");
+	process_linebreak(cache, 1);
+	process_type_attr(state, cache, die);
+	process_linebreak(cache, -1);
+	process(cache, "}");
+}
+
 static void __process_subroutine_type(struct state *state, struct die *cache,
 				      Dwarf_Die *die, const char *type)
 {
@@ -437,7 +465,9 @@ static int process_type(struct state *state, struct die *parent, Dwarf_Die *die)
 	PROCESS_TYPE(volatile)
 	/* Subtypes */
 	PROCESS_TYPE(formal_parameter)
+	PROCESS_TYPE(subrange)
 	/* Other types */
+	PROCESS_TYPE(array)
 	PROCESS_TYPE(base)
 	PROCESS_TYPE(subroutine)
 	PROCESS_TYPE(typedef)

From f6bb92455a5e5b2241d2e1f3e240c5fc036c55cb Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Fri, 3 Jan 2025 20:45:30 +0000
Subject: [PATCH 060/368] gendwarfksyms: Expand structure types

Recursively expand DWARF structure types, i.e. structs, unions, and
enums. Also include relevant DWARF attributes in type strings to
encode structure layout, for example.

Example output with --dump-dies:

  subprogram (
    formal_parameter structure_type &str {
      member pointer_type {
        base_type u8 byte_size(1) encoding(7)
      } data_ptr data_member_location(0) ,
      member base_type usize byte_size(8) encoding(7) length data_member_location(8)
    } byte_size(16) alignment(8) msg
  )
  -> base_type void

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/gendwarfksyms/dwarf.c         | 138 +++++++++++++++++++++++++-
 scripts/gendwarfksyms/gendwarfksyms.h |   5 +
 2 files changed, 141 insertions(+), 2 deletions(-)

diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index 46ce17b2459b8..6ec1138c459f3 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -208,9 +208,14 @@ static void process_fqn(struct die *cache, Dwarf_Die *die)
 				    value);                                \
 	}
 
+DEFINE_PROCESS_UDATA_ATTRIBUTE(accessibility)
 DEFINE_PROCESS_UDATA_ATTRIBUTE(alignment)
+DEFINE_PROCESS_UDATA_ATTRIBUTE(bit_size)
 DEFINE_PROCESS_UDATA_ATTRIBUTE(byte_size)
 DEFINE_PROCESS_UDATA_ATTRIBUTE(encoding)
+DEFINE_PROCESS_UDATA_ATTRIBUTE(data_bit_offset)
+DEFINE_PROCESS_UDATA_ATTRIBUTE(data_member_location)
+DEFINE_PROCESS_UDATA_ATTRIBUTE(discr_value)
 
 /* Match functions -- die_match_callback_t */
 #define DEFINE_MATCH(type)                                     \
@@ -219,7 +224,9 @@ DEFINE_PROCESS_UDATA_ATTRIBUTE(encoding)
 		return dwarf_tag(die) == DW_TAG_##type##_type; \
 	}
 
+DEFINE_MATCH(enumerator)
 DEFINE_MATCH(formal_parameter)
+DEFINE_MATCH(member)
 DEFINE_MATCH(subrange)
 
 bool match_all(Dwarf_Die *die)
@@ -298,6 +305,10 @@ static void __process_list_type(struct state *state, struct die *cache,
 		process(cache, " ");
 		process(cache, name);
 	}
+	process_accessibility_attr(cache, die);
+	process_bit_size_attr(cache, die);
+	process_data_bit_offset_attr(cache, die);
+	process_data_member_location_attr(cache, die);
 }
 
 #define DEFINE_PROCESS_LIST_TYPE(type)                                       \
@@ -308,6 +319,7 @@ static void __process_list_type(struct state *state, struct die *cache,
 	}
 
 DEFINE_PROCESS_LIST_TYPE(formal_parameter)
+DEFINE_PROCESS_LIST_TYPE(member)
 
 /* Container types with DW_AT_type */
 static void __process_type(struct state *state, struct die *cache,
@@ -340,6 +352,7 @@ DEFINE_PROCESS_TYPE(reference)
 DEFINE_PROCESS_TYPE(restrict)
 DEFINE_PROCESS_TYPE(rvalue_reference)
 DEFINE_PROCESS_TYPE(shared)
+DEFINE_PROCESS_TYPE(template_type_parameter)
 DEFINE_PROCESS_TYPE(volatile)
 DEFINE_PROCESS_TYPE(typedef)
 
@@ -393,6 +406,107 @@ static void process_subroutine_type(struct state *state, struct die *cache,
 	__process_subroutine_type(state, cache, die, "subroutine_type");
 }
 
+static void process_variant_type(struct state *state, struct die *cache,
+				 Dwarf_Die *die)
+{
+	process_list_comma(state, cache);
+	process(cache, "variant {");
+	process_linebreak(cache, 1);
+	check(process_die_container(state, cache, die, process_type,
+				    match_member_type));
+	process_linebreak(cache, -1);
+	process(cache, "}");
+	process_discr_value_attr(cache, die);
+}
+
+static void process_variant_part_type(struct state *state, struct die *cache,
+				      Dwarf_Die *die)
+{
+	process_list_comma(state, cache);
+	process(cache, "variant_part {");
+	process_linebreak(cache, 1);
+	check(process_die_container(state, cache, die, process_type,
+				    match_all));
+	process_linebreak(cache, -1);
+	process(cache, "}");
+}
+
+static int ___process_structure_type(struct state *state, struct die *cache,
+				     Dwarf_Die *die)
+{
+	switch (dwarf_tag(die)) {
+	case DW_TAG_member:
+	case DW_TAG_variant_part:
+		return check(process_type(state, cache, die));
+	case DW_TAG_class_type:
+	case DW_TAG_enumeration_type:
+	case DW_TAG_structure_type:
+	case DW_TAG_template_type_parameter:
+	case DW_TAG_union_type:
+	case DW_TAG_subprogram:
+		/* Skip non-member types, including member functions */
+		return 0;
+	default:
+		error("unexpected structure_type child: %x", dwarf_tag(die));
+	}
+}
+
+static void __process_structure_type(struct state *state, struct die *cache,
+				     Dwarf_Die *die, const char *type,
+				     die_callback_t process_func,
+				     die_match_callback_t match_func)
+{
+	process(cache, type);
+	process_fqn(cache, die);
+	process(cache, " {");
+	process_linebreak(cache, 1);
+
+	check(process_die_container(state, cache, die, process_func,
+				    match_func));
+
+	process_linebreak(cache, -1);
+	process(cache, "}");
+
+	process_byte_size_attr(cache, die);
+	process_alignment_attr(cache, die);
+}
+
+#define DEFINE_PROCESS_STRUCTURE_TYPE(structure)                        \
+	static void process_##structure##_type(                         \
+		struct state *state, struct die *cache, Dwarf_Die *die) \
+	{                                                               \
+		__process_structure_type(state, cache, die,             \
+					 #structure "_type",            \
+					 ___process_structure_type,     \
+					 match_all);                    \
+	}
+
+DEFINE_PROCESS_STRUCTURE_TYPE(class)
+DEFINE_PROCESS_STRUCTURE_TYPE(structure)
+DEFINE_PROCESS_STRUCTURE_TYPE(union)
+
+static void process_enumerator_type(struct state *state, struct die *cache,
+				    Dwarf_Die *die)
+{
+	Dwarf_Word value;
+
+	process_list_comma(state, cache);
+	process(cache, "enumerator");
+	process_fqn(cache, die);
+
+	if (get_udata_attr(die, DW_AT_const_value, &value)) {
+		process(cache, " = ");
+		process_fmt(cache, "%" PRIu64, value);
+	}
+}
+
+static void process_enumeration_type(struct state *state, struct die *cache,
+				     Dwarf_Die *die)
+{
+	__process_structure_type(state, cache, die, "enumeration_type",
+				 process_type, match_enumerator_type);
+}
+
 static void process_base_type(struct state *state, struct die *cache,
 			      Dwarf_Die *die)
 {
@@ -403,6 +517,16 @@ static void process_base_type(struct state *state, struct die *cache,
 	process_alignment_attr(cache, die);
 }
 
+static void process_unspecified_type(struct state *state, struct die *cache,
+				     Dwarf_Die *die)
+{
+	/*
+	 * These can be emitted for stand-alone assembly code, which means we
+	 * might run into them in vmlinux.o.
+	 */
+	process(cache, "unspecified_type");
+}
+
 static void process_cached(struct state *state, struct die *cache,
 			   Dwarf_Die *die)
 {
@@ -463,17 +587,27 @@ static int process_type(struct state *state, struct die *parent, Dwarf_Die *die)
 	PROCESS_TYPE(rvalue_reference)
 	PROCESS_TYPE(shared)
 	PROCESS_TYPE(volatile)
+	/* Container types */
+	PROCESS_TYPE(class)
+	PROCESS_TYPE(structure)
+	PROCESS_TYPE(union)
+	PROCESS_TYPE(enumeration)
 	/* Subtypes */
+	PROCESS_TYPE(enumerator)
 	PROCESS_TYPE(formal_parameter)
+	PROCESS_TYPE(member)
 	PROCESS_TYPE(subrange)
+	PROCESS_TYPE(template_type_parameter)
+	PROCESS_TYPE(variant)
+	PROCESS_TYPE(variant_part)
 	/* Other types */
 	PROCESS_TYPE(array)
 	PROCESS_TYPE(base)
 	PROCESS_TYPE(subroutine)
 	PROCESS_TYPE(typedef)
+	PROCESS_TYPE(unspecified)
 	default:
-		debug("unimplemented type: %x", tag);
-		break;
+		error("unexpected type: %x", tag);
 	}
 
 	/* Update cache state and append to the parent (if any) */
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
index 0746a36f4924c..1796f71b3a34d 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -60,8 +60,13 @@ extern int dump_dies;
 #define checkp(expr) __check(expr, __res < 0)
 
 /* Consistent aliases (DW_TAG_<type>_type) for DWARF tags */
+#define DW_TAG_enumerator_type DW_TAG_enumerator
 #define DW_TAG_formal_parameter_type DW_TAG_formal_parameter
+#define DW_TAG_member_type DW_TAG_member
+#define DW_TAG_template_type_parameter_type DW_TAG_template_type_parameter
 #define DW_TAG_typedef_type DW_TAG_typedef
+#define DW_TAG_variant_part_type DW_TAG_variant_part
+#define DW_TAG_variant_type DW_TAG_variant
 
 /*
  * symbols.c

From f936c129fd4c3ce495768374ea48e5b736655046 Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Fri, 3 Jan 2025 20:45:31 +0000
Subject: [PATCH 061/368] gendwarfksyms: Limit structure expansion

Expand each structure type only once per exported symbol. This
is necessary to support self-referential structures, which would
otherwise result in infinite recursion, and it's sufficient for
catching ABI changes.

Types defined in .c files are opaque to external users and thus
cannot affect the ABI. Consider type definitions in .c files to
be declarations to prevent opaque types from changing symbol
versions.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/gendwarfksyms/Makefile        |   1 +
 scripts/gendwarfksyms/cache.c         |  51 +++++++++++
 scripts/gendwarfksyms/dwarf.c         | 125 ++++++++++++++++++++++++--
 scripts/gendwarfksyms/gendwarfksyms.h |  46 ++++++++++
 4 files changed, 215 insertions(+), 8 deletions(-)
 create mode 100644 scripts/gendwarfksyms/cache.c

diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
index c0d4ce50fc27e..c06145d84df84 100644
--- a/scripts/gendwarfksyms/Makefile
+++ b/scripts/gendwarfksyms/Makefile
@@ -2,6 +2,7 @@
 hostprogs-always-y += gendwarfksyms
 
 gendwarfksyms-objs += gendwarfksyms.o
+gendwarfksyms-objs += cache.o
 gendwarfksyms-objs += die.o
 gendwarfksyms-objs += dwarf.o
 gendwarfksyms-objs += symbols.o
diff --git a/scripts/gendwarfksyms/cache.c b/scripts/gendwarfksyms/cache.c
new file mode 100644
index 0000000000000..c9c19b86a686f
--- /dev/null
+++ b/scripts/gendwarfksyms/cache.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Google LLC
+ */
+
+#include "gendwarfksyms.h"
+
+struct cache_item {
+	unsigned long key;
+	int value;
+	struct hlist_node hash;
+};
+
+void cache_set(struct cache *cache, unsigned long key, int value)
+{
+	struct cache_item *ci;
+
+	ci = xmalloc(sizeof(struct cache_item));
+	ci->key = key;
+	ci->value = value;
+	hash_add(cache->cache, &ci->hash, hash_32(key));
+}
+
+int cache_get(struct cache *cache, unsigned long key)
+{
+	struct cache_item *ci;
+
+	hash_for_each_possible(cache->cache, ci, hash, hash_32(key)) {
+		if (ci->key == key)
+			return ci->value;
+	}
+
+	return -1;
+}
+
+void cache_init(struct cache *cache)
+{
+	hash_init(cache->cache);
+}
+
+void cache_free(struct cache *cache)
+{
+	struct hlist_node *tmp;
+	struct cache_item *ci;
+
+	hash_for_each_safe(cache->cache, ci, tmp, hash) {
+		free(ci);
+	}
+
+	hash_init(cache->cache);
+}
diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index 6ec1138c459f3..6b30e45a4e820 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -27,6 +27,7 @@ static void process_linebreak(struct die *cache, int n)
 		       !dwarf_form##attr(&da, value);                  \
 	}
 
+DEFINE_GET_ATTR(flag, bool)
 DEFINE_GET_ATTR(udata, Dwarf_Word)
 
 static bool get_ref_die_attr(Dwarf_Die *die, unsigned int id, Dwarf_Die *value)
@@ -80,6 +81,55 @@ static bool match_export_symbol(struct state *state, Dwarf_Die *die)
 	return !!state->sym;
 }
 
+/* DW_AT_decl_file -> struct srcfile */
+static struct cache srcfile_cache;
+
+static bool is_definition_private(Dwarf_Die *die)
+{
+	Dwarf_Word filenum;
+	Dwarf_Files *files;
+	Dwarf_Die cudie;
+	const char *s;
+	int res;
+
+	/*
+	 * Definitions in .c files cannot change the public ABI,
+	 * so consider them private.
+	 */
+	if (!get_udata_attr(die, DW_AT_decl_file, &filenum))
+		return false;
+
+	res = cache_get(&srcfile_cache, filenum);
+	if (res >= 0)
+		return !!res;
+
+	if (!dwarf_cu_die(die->cu, &cudie, NULL, NULL, NULL, NULL, NULL, NULL))
+		error("dwarf_cu_die failed: '%s'", dwarf_errmsg(-1));
+
+	if (dwarf_getsrcfiles(&cudie, &files, NULL))
+		error("dwarf_getsrcfiles failed: '%s'", dwarf_errmsg(-1));
+
+	s = dwarf_filesrc(files, filenum, NULL, NULL);
+	if (!s)
+		error("dwarf_filesrc failed: '%s'", dwarf_errmsg(-1));
+
+	s = strrchr(s, '.');
+	res = s && !strcmp(s, ".c");
+	cache_set(&srcfile_cache, filenum, res);
+
+	return !!res;
+}
+
+static bool is_kabi_definition(Dwarf_Die *die)
+{
+	bool value;
+
+	if (get_flag_attr(die, DW_AT_declaration, &value) && value)
+		return false;
+
+	return !is_definition_private(die);
+}
+
 /*
  * Type string processing
  */
@@ -456,19 +506,27 @@ static void __process_structure_type(struct state *state, struct die *cache,
 				     die_callback_t process_func,
 				     die_match_callback_t match_func)
 {
+	bool expand;
+
 	process(cache, type);
 	process_fqn(cache, die);
 	process(cache, " {");
 	process_linebreak(cache, 1);
 
-	check(process_die_container(state, cache, die, process_func,
-				    match_func));
+	expand = state->expand.expand && is_kabi_definition(die);
+
+	if (expand) {
+		check(process_die_container(state, cache, die, process_func,
+					    match_func));
+	}
 
 	process_linebreak(cache, -1);
 	process(cache, "}");
 
-	process_byte_size_attr(cache, die);
-	process_alignment_attr(cache, die);
+	if (expand) {
+		process_byte_size_attr(cache, die);
+		process_alignment_attr(cache, die);
+	}
 }
 
 #define DEFINE_PROCESS_STRUCTURE_TYPE(structure)                        \
@@ -553,6 +611,30 @@ static void process_cached(struct state *state, struct die *cache,
 	}
 }
 
+static void state_init(struct state *state)
+{
+	state->expand.expand = true;
+	cache_init(&state->expansion_cache);
+}
+
+static void expansion_state_restore(struct expansion_state *state,
+				    struct expansion_state *saved)
+{
+	state->expand = saved->expand;
+}
+
+static void expansion_state_save(struct expansion_state *state,
+				 struct expansion_state *saved)
+{
+	expansion_state_restore(saved, state);
+}
+
+static bool is_expanded_type(int tag)
+{
+	return tag == DW_TAG_class_type || tag == DW_TAG_structure_type ||
+	       tag == DW_TAG_union_type || tag == DW_TAG_enumeration_type;
+}
+
 #define PROCESS_TYPE(type)                                \
 	case DW_TAG_##type##_type:                        \
 		process_##type##_type(state, cache, die); \
@@ -560,18 +642,39 @@ static void process_cached(struct state *state, struct die *cache,
 
 static int process_type(struct state *state, struct die *parent, Dwarf_Die *die)
 {
+	enum die_state want_state = DIE_COMPLETE;
 	struct die *cache;
+	struct expansion_state saved;
 	int tag = dwarf_tag(die);
 
+	expansion_state_save(&state->expand, &saved);
+
 	/*
-	 * If we have the DIE already cached, use it instead of walking
+	 * Structures and enumeration types are expanded only once per
+	 * exported symbol. This is sufficient for detecting ABI changes
+	 * within the structure.
+	 */
+	if (is_expanded_type(tag)) {
+		if (cache_was_expanded(&state->expansion_cache, die->addr))
+			state->expand.expand = false;
+
+		if (state->expand.expand)
+			cache_mark_expanded(&state->expansion_cache, die->addr);
+		else
+			want_state = DIE_UNEXPANDED;
+	}
+
+	/*
+	 * If we have want_state already cached, use it instead of walking
 	 * through DWARF.
 	 */
-	cache = die_map_get(die, DIE_COMPLETE);
+	cache = die_map_get(die, want_state);
 
-	if (cache->state == DIE_COMPLETE) {
+	if (cache->state == want_state) {
 		process_cached(state, cache, die);
 		die_map_add_die(parent, cache);
+
+		expansion_state_restore(&state->expand, &saved);
 		return 0;
 	}
 
@@ -612,9 +715,10 @@ static int process_type(struct state *state, struct die *parent, Dwarf_Die *die)
 
 	/* Update cache state and append to the parent (if any) */
 	cache->tag = tag;
-	cache->state = DIE_COMPLETE;
+	cache->state = want_state;
 	die_map_add_die(parent, cache);
 
+	expansion_state_restore(&state->expand, &saved);
 	return 0;
 }
 
@@ -676,11 +780,14 @@ static int process_exported_symbols(struct state *unused, struct die *cache,
 		if (!match_export_symbol(&state, die))
 			return 0;
 
+		state_init(&state);
+
 		if (tag == DW_TAG_subprogram)
 			process_subprogram(&state, &state.die);
 		else
 			process_variable(&state, &state.die);
 
+		cache_free(&state.expansion_cache);
 		return 0;
 	}
 	default:
@@ -692,4 +799,6 @@ void process_cu(Dwarf_Die *cudie)
 {
 	check(process_die_container(NULL, NULL, cudie, process_exported_symbols,
 				    match_all));
+
+	cache_free(&srcfile_cache);
 }
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
index 1796f71b3a34d..941c4134da8ed 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -102,6 +102,7 @@ void symbol_free(void);
 
 enum die_state {
 	DIE_INCOMPLETE,
+	DIE_UNEXPANDED,
 	DIE_COMPLETE,
 	DIE_LAST = DIE_COMPLETE
 };
@@ -131,6 +132,7 @@ static inline const char *die_state_name(enum die_state state)
 {
 	switch (state) {
 	CASE_CONST_TO_STR(DIE_INCOMPLETE)
+	CASE_CONST_TO_STR(DIE_UNEXPANDED)
 	CASE_CONST_TO_STR(DIE_COMPLETE)
 	}
 
@@ -153,16 +155,60 @@ void die_map_add_linebreak(struct die *pd, int linebreak);
 void die_map_add_die(struct die *pd, struct die *child);
 void die_map_free(void);
 
+/*
+ * cache.c
+ */
+
+#define CACHE_HASH_BITS 10
+
+/* A cache for addresses we've already seen. */
+struct cache {
+	HASHTABLE_DECLARE(cache, 1 << CACHE_HASH_BITS);
+};
+
+void cache_set(struct cache *cache, unsigned long key, int value);
+int cache_get(struct cache *cache, unsigned long key);
+void cache_init(struct cache *cache);
+void cache_free(struct cache *cache);
+
+static inline void __cache_mark_expanded(struct cache *cache, uintptr_t addr)
+{
+	cache_set(cache, addr, 1);
+}
+
+static inline bool __cache_was_expanded(struct cache *cache, uintptr_t addr)
+{
+	return cache_get(cache, addr) == 1;
+}
+
+static inline void cache_mark_expanded(struct cache *cache, void *addr)
+{
+	__cache_mark_expanded(cache, (uintptr_t)addr);
+}
+
+static inline bool cache_was_expanded(struct cache *cache, void *addr)
+{
+	return __cache_was_expanded(cache, (uintptr_t)addr);
+}
+
 /*
  * dwarf.c
  */
 
+struct expansion_state {
+	bool expand;
+};
+
 struct state {
 	struct symbol *sym;
 	Dwarf_Die die;
 
 	/* List expansion */
 	bool first_list_item;
+
+	/* Structure expansion */
+	struct expansion_state expand;
+	struct cache expansion_cache;
 };
 
 typedef int (*die_callback_t)(struct state *state, struct die *cache,

From d2ffdc1c9a0ee71b30e25fbe3e2a37bf4c146085 Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Fri, 3 Jan 2025 20:45:32 +0000
Subject: [PATCH 062/368] gendwarfksyms: Add die_map debugging

Debugging the DWARF processing can be somewhat challenging, so add
more detailed debugging output for die_map operations. Add the
--dump-die-map flag, which adds color coded tags to the output for
die_map changes.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/gendwarfksyms/dwarf.c         | 15 +++++++++++++++
 scripts/gendwarfksyms/gendwarfksyms.c |  7 +++++++
 scripts/gendwarfksyms/gendwarfksyms.h | 13 +++++++++++++
 3 files changed, 35 insertions(+)

diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index 6b30e45a4e820..364ff4892d5c8 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -146,6 +146,8 @@ static void process(struct die *cache, const char *s)
 	if (dump_dies)
 		fputs(s, stderr);
 
+	if (cache)
+		die_debug_r("cache %p string '%s'", cache, s);
 	die_map_add_string(cache, s);
 }
 
@@ -594,6 +596,8 @@ static void process_cached(struct state *state, struct die *cache,
 	list_for_each_entry(df, &cache->fragments, list) {
 		switch (df->type) {
 		case FRAGMENT_STRING:
+			die_debug_b("cache %p STRING '%s'", cache,
+				    df->data.str);
 			process(NULL, df->data.str);
 			break;
 		case FRAGMENT_LINEBREAK:
@@ -603,6 +607,8 @@ static void process_cached(struct state *state, struct die *cache,
 			if (!dwarf_die_addr_die(dwarf_cu_getdwarf(die->cu),
 						(void *)df->data.addr, &child))
 				error("dwarf_die_addr_die failed");
+			die_debug_b("cache %p DIE addr %" PRIxPTR " tag %x",
+				    cache, df->data.addr, dwarf_tag(&child));
 			check(process_type(state, NULL, &child));
 			break;
 		default:
@@ -671,6 +677,9 @@ static int process_type(struct state *state, struct die *parent, Dwarf_Die *die)
 	cache = die_map_get(die, want_state);
 
 	if (cache->state == want_state) {
+		die_debug_g("cached addr %p tag %x -- %s", die->addr, tag,
+			    die_state_name(cache->state));
+
 		process_cached(state, cache, die);
 		die_map_add_die(parent, cache);
 
@@ -678,6 +687,9 @@ static int process_type(struct state *state, struct die *parent, Dwarf_Die *die)
 		return 0;
 	}
 
+	die_debug_g("addr %p tag %x -- %s -> %s", die->addr, tag,
+		    die_state_name(cache->state), die_state_name(want_state));
+
 	switch (tag) {
 	/* Type modifiers */
 	PROCESS_TYPE(atomic)
@@ -713,6 +725,9 @@ static int process_type(struct state *state, struct die *parent, Dwarf_Die *die)
 		error("unexpected type: %x", tag);
 	}
 
+	die_debug_r("parent %p cache %p die addr %p tag %x", parent, cache,
+		    die->addr, tag);
+
 	/* Update cache state and append to the parent (if any) */
 	cache->tag = tag;
 	cache->state = want_state;
diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c
index 3809db840c06f..bf282e33e00ce 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.c
+++ b/scripts/gendwarfksyms/gendwarfksyms.c
@@ -19,6 +19,8 @@
 int debug;
 /* Dump DIE contents */
 int dump_dies;
+/* Print debugging information about die_map changes */
+int dump_die_map;
 
 static void usage(void)
 {
@@ -26,6 +28,7 @@ static void usage(void)
 	      "Options:\n"
 	      "  -d, --debug          Print debugging information\n"
 	      "      --dump-dies      Dump DWARF DIE contents\n"
+	      "      --dump-die-map   Print debugging information about die_map changes\n"
 	      "  -h, --help           Print this message\n"
 	      "\n",
 	      stderr);
@@ -75,6 +78,7 @@ int main(int argc, char **argv)
 	static const struct option opts[] = {
 		{ "debug", 0, NULL, 'd' },
 		{ "dump-dies", 0, &dump_dies, 1 },
+		{ "dump-die-map", 0, &dump_die_map, 1 },
 		{ "help", 0, NULL, 'h' },
 		{ 0, 0, NULL, 0 }
 	};
@@ -95,6 +99,9 @@ int main(int argc, char **argv)
 		}
 	}
 
+	if (dump_die_map)
+		dump_dies = 1;
+
 	if (optind >= argc) {
 		usage();
 		error("no input files?");
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
index 941c4134da8ed..251832dac5997 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -21,6 +21,7 @@
  */
 extern int debug;
 extern int dump_dies;
+extern int dump_die_map;
 
 /*
  * Output helpers
@@ -43,6 +44,18 @@ extern int dump_dies;
 		exit(1);                                     \
 	} while (0)
 
+#define __die_debug(color, format, ...)                                 \
+	do {                                                            \
+		if (dump_dies && dump_die_map)                          \
+			fprintf(stderr,                                 \
+				"\033[" #color "m<" format ">\033[39m", \
+				__VA_ARGS__);                           \
+	} while (0)
+
+#define die_debug_r(format, ...) __die_debug(91, format, __VA_ARGS__)
+#define die_debug_g(format, ...) __die_debug(92, format, __VA_ARGS__)
+#define die_debug_b(format, ...) __die_debug(94, format, __VA_ARGS__)
+
 /*
  * Error handling helpers
  */

From ab4439981f8549b013f4ea0b274b7c77c88ab4bc Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Fri, 3 Jan 2025 20:45:33 +0000
Subject: [PATCH 063/368] gendwarfksyms: Add symtypes output

Add support for producing genksyms-style symtypes files. Process
die_map to find the longest expansions for each type, and use symtypes
references in type definitions. The basic file format is similar to
genksyms, with two notable exceptions:

  1. Type names with spaces (common with Rust) in references are
     wrapped in single quotes. E.g.:

     s#'core::result::Result<u8, core::num::error::ParseIntError>'

  2. The actual type definition is the simple parsed DWARF format we
     output with --dump-dies, not the preprocessed C-style format
     genksyms produces.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/gendwarfksyms/Makefile        |   1 +
 scripts/gendwarfksyms/die.c           |  11 +
 scripts/gendwarfksyms/dwarf.c         |   1 +
 scripts/gendwarfksyms/gendwarfksyms.c |  33 ++-
 scripts/gendwarfksyms/gendwarfksyms.h |  19 ++
 scripts/gendwarfksyms/symbols.c       |   4 +-
 scripts/gendwarfksyms/types.c         | 363 ++++++++++++++++++++++++++
 7 files changed, 429 insertions(+), 3 deletions(-)
 create mode 100644 scripts/gendwarfksyms/types.c

diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
index c06145d84df84..6540282dc7466 100644
--- a/scripts/gendwarfksyms/Makefile
+++ b/scripts/gendwarfksyms/Makefile
@@ -6,5 +6,6 @@ gendwarfksyms-objs += cache.o
 gendwarfksyms-objs += die.o
 gendwarfksyms-objs += dwarf.o
 gendwarfksyms-objs += symbols.o
+gendwarfksyms-objs += types.o
 
 HOSTLDLIBS_gendwarfksyms := -ldw -lelf
diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c
index 0d70e02d02b5b..66bd4c9bc9528 100644
--- a/scripts/gendwarfksyms/die.c
+++ b/scripts/gendwarfksyms/die.c
@@ -22,6 +22,7 @@ static inline unsigned int die_hash(uintptr_t addr, enum die_state state)
 static void init_die(struct die *cd)
 {
 	cd->state = DIE_INCOMPLETE;
+	cd->mapped = false;
 	cd->fqn = NULL;
 	cd->tag = -1;
 	cd->addr = 0;
@@ -83,6 +84,16 @@ static void reset_die(struct die *cd)
 	init_die(cd);
 }
 
+void die_map_for_each(die_map_callback_t func, void *arg)
+{
+	struct hlist_node *tmp;
+	struct die *cd;
+
+	hash_for_each_safe(die_map, cd, tmp, hash) {
+		func(cd, arg);
+	}
+}
+
 void die_map_free(void)
 {
 	struct hlist_node *tmp;
diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index 364ff4892d5c8..a9966a23167ab 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -745,6 +745,7 @@ static void process_symbol(struct state *state, Dwarf_Die *die,
 {
 	debug("%s", state->sym->name);
 	check(process_func(state, NULL, die));
+	state->sym->state = SYMBOL_MAPPED;
 	if (dump_dies)
 		fputs("\n", stderr);
 }
diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c
index bf282e33e00ce..1d30f42cbd143 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.c
+++ b/scripts/gendwarfksyms/gendwarfksyms.c
@@ -21,6 +21,11 @@ int debug;
 int dump_dies;
 /* Print debugging information about die_map changes */
 int dump_die_map;
+/* Print out type strings (i.e. type_map) */
+int dump_types;
+/* Write a symtypes file */
+int symtypes;
+static const char *symtypes_file;
 
 static void usage(void)
 {
@@ -29,6 +34,8 @@ static void usage(void)
 	      "  -d, --debug          Print debugging information\n"
 	      "      --dump-dies      Dump DWARF DIE contents\n"
 	      "      --dump-die-map   Print debugging information about die_map changes\n"
+	      "      --dump-types     Dump type strings\n"
+	      "  -T, --symtypes file  Write a symtypes file\n"
 	      "  -h, --help           Print this message\n"
 	      "\n",
 	      stderr);
@@ -41,6 +48,7 @@ static int process_module(Dwfl_Module *mod, void **userdata, const char *name,
 	Dwarf_Die cudie;
 	Dwarf_CU *cu = NULL;
 	Dwarf *dbg;
+	FILE *symfile = arg;
 	int res;
 
 	debug("%s", name);
@@ -60,6 +68,10 @@ static int process_module(Dwfl_Module *mod, void **userdata, const char *name,
 		process_cu(&cudie);
 	} while (cu);
 
+	/*
+	 * Use die_map to expand type strings and write them to `symfile`.
+	 */
+	generate_symtypes(symfile);
 	die_map_free();
 
 	return DWARF_CB_OK;
@@ -72,6 +84,7 @@ static const Dwfl_Callbacks callbacks = {
 
 int main(int argc, char **argv)
 {
+	FILE *symfile = NULL;
 	unsigned int n;
 	int opt;
 
@@ -79,17 +92,23 @@ int main(int argc, char **argv)
 		{ "debug", 0, NULL, 'd' },
 		{ "dump-dies", 0, &dump_dies, 1 },
 		{ "dump-die-map", 0, &dump_die_map, 1 },
+		{ "dump-types", 0, &dump_types, 1 },
+		{ "symtypes", 1, NULL, 'T' },
 		{ "help", 0, NULL, 'h' },
 		{ 0, 0, NULL, 0 }
 	};
 
-	while ((opt = getopt_long(argc, argv, "dh", opts, NULL)) != EOF) {
+	while ((opt = getopt_long(argc, argv, "dT:h", opts, NULL)) != EOF) {
 		switch (opt) {
 		case 0:
 			break;
 		case 'd':
 			debug = 1;
 			break;
+		case 'T':
+			symtypes = 1;
+			symtypes_file = optarg;
+			break;
 		case 'h':
 			usage();
 			return 0;
@@ -109,6 +128,13 @@ int main(int argc, char **argv)
 
 	symbol_read_exports(stdin);
 
+	if (symtypes_file) {
+		symfile = fopen(symtypes_file, "w");
+		if (!symfile)
+			error("fopen failed for '%s': %s", symtypes_file,
+			      strerror(errno));
+	}
+
 	for (n = optind; n < argc; n++) {
 		Dwfl *dwfl;
 		int fd;
@@ -131,12 +157,15 @@ int main(int argc, char **argv)
 
 		dwfl_report_end(dwfl, NULL, NULL);
 
-		if (dwfl_getmodules(dwfl, &process_module, NULL, 0))
+		if (dwfl_getmodules(dwfl, &process_module, symfile, 0))
 			error("dwfl_getmodules failed for '%s'", argv[n]);
 
 		dwfl_end(dwfl);
 	}
 
+	if (symfile)
+		check(fclose(symfile));
+
 	symbol_free();
 
 	return 0;
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
index 251832dac5997..98d5b2315f218 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -22,6 +22,8 @@
 extern int debug;
 extern int dump_dies;
 extern int dump_die_map;
+extern int dump_types;
+extern int symtypes;
 
 /*
  * Output helpers
@@ -90,6 +92,11 @@ static inline unsigned int addr_hash(uintptr_t addr)
 	return hash_ptr((const void *)addr);
 }
 
+enum symbol_state {
+	SYMBOL_UNPROCESSED,
+	SYMBOL_MAPPED,
+};
+
 struct symbol_addr {
 	uint32_t section;
 	Elf64_Addr address;
@@ -100,6 +107,8 @@ struct symbol {
 	struct symbol_addr addr;
 	struct hlist_node addr_hash;
 	struct hlist_node name_hash;
+	enum symbol_state state;
+	uintptr_t die_addr;
 };
 
 typedef void (*symbol_callback_t)(struct symbol *, void *arg);
@@ -154,6 +163,7 @@ static inline const char *die_state_name(enum die_state state)
 
 struct die {
 	enum die_state state;
+	bool mapped;
 	char *fqn;
 	int tag;
 	uintptr_t addr;
@@ -161,10 +171,13 @@ struct die {
 	struct hlist_node hash;
 };
 
+typedef void (*die_map_callback_t)(struct die *, void *arg);
+
 int __die_map_get(uintptr_t addr, enum die_state state, struct die **res);
 struct die *die_map_get(Dwarf_Die *die, enum die_state state);
 void die_map_add_string(struct die *pd, const char *str);
 void die_map_add_linebreak(struct die *pd, int linebreak);
+void die_map_for_each(die_map_callback_t func, void *arg);
 void die_map_add_die(struct die *pd, struct die *child);
 void die_map_free(void);
 
@@ -235,4 +248,10 @@ int process_die_container(struct state *state, struct die *cache,
 
 void process_cu(Dwarf_Die *cudie);
 
+/*
+ * types.c
+ */
+
+void generate_symtypes(FILE *file);
+
 #endif /* __GENDWARFKSYMS_H */
diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c
index 98febb524dd57..0d2ce7284a53c 100644
--- a/scripts/gendwarfksyms/symbols.c
+++ b/scripts/gendwarfksyms/symbols.c
@@ -92,6 +92,7 @@ void symbol_read_exports(FILE *file)
 		sym = xcalloc(1, sizeof(struct symbol));
 		sym->name = name;
 		sym->addr.section = SHN_UNDEF;
+		sym->state = SYMBOL_UNPROCESSED;
 
 		hash_add(symbol_names, &sym->name_hash, hash_str(sym->name));
 		++nsym;
@@ -107,7 +108,8 @@ static void get_symbol(struct symbol *sym, void *arg)
 {
 	struct symbol **res = arg;
 
-	*res = sym;
+	if (sym->state == SYMBOL_UNPROCESSED)
+		*res = sym;
 }
 
 struct symbol *symbol_get(const char *name)
diff --git a/scripts/gendwarfksyms/types.c b/scripts/gendwarfksyms/types.c
new file mode 100644
index 0000000000000..21d7a34228eba
--- /dev/null
+++ b/scripts/gendwarfksyms/types.c
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Google LLC
+ */
+
+#define _GNU_SOURCE
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "gendwarfksyms.h"
+
+static struct cache expansion_cache;
+
+/*
+ * A simple linked list of shared or owned strings to avoid copying strings
+ * around when not necessary.
+ */
+struct type_list_entry {
+	const char *str;
+	void *owned;
+	struct list_head list;
+};
+
+static void type_list_free(struct list_head *list)
+{
+	struct type_list_entry *entry;
+	struct type_list_entry *tmp;
+
+	list_for_each_entry_safe(entry, tmp, list, list) {
+		if (entry->owned)
+			free(entry->owned);
+		free(entry);
+	}
+
+	INIT_LIST_HEAD(list);
+}
+
+static int type_list_append(struct list_head *list, const char *s, void *owned)
+{
+	struct type_list_entry *entry;
+
+	if (!s)
+		return 0;
+
+	entry = xmalloc(sizeof(struct type_list_entry));
+	entry->str = s;
+	entry->owned = owned;
+	list_add_tail(&entry->list, list);
+
+	return strlen(entry->str);
+}
+
+static void type_list_write(struct list_head *list, FILE *file)
+{
+	struct type_list_entry *entry;
+
+	list_for_each_entry(entry, list, list) {
+		if (entry->str)
+			checkp(fputs(entry->str, file));
+	}
+}
+
+/*
+ * An expanded type string in symtypes format.
+ */
+struct type_expansion {
+	char *name;
+	size_t len;
+	struct list_head expanded;
+	struct hlist_node hash;
+};
+
+static void type_expansion_init(struct type_expansion *type)
+{
+	type->name = NULL;
+	type->len = 0;
+	INIT_LIST_HEAD(&type->expanded);
+}
+
+static inline void type_expansion_free(struct type_expansion *type)
+{
+	free(type->name);
+	type->name = NULL;
+	type->len = 0;
+	type_list_free(&type->expanded);
+}
+
+static void type_expansion_append(struct type_expansion *type, const char *s,
+				  void *owned)
+{
+	type->len += type_list_append(&type->expanded, s, owned);
+}
+
+/*
+ * type_map -- the longest expansions for each type.
+ *
+ * const char *name -> struct type_expansion *
+ */
+#define TYPE_HASH_BITS 12
+static HASHTABLE_DEFINE(type_map, 1 << TYPE_HASH_BITS);
+
+static int type_map_get(const char *name, struct type_expansion **res)
+{
+	struct type_expansion *e;
+
+	hash_for_each_possible(type_map, e, hash, hash_str(name)) {
+		if (!strcmp(name, e->name)) {
+			*res = e;
+			return 0;
+		}
+	}
+
+	return -1;
+}
+
+static void type_map_add(const char *name, struct type_expansion *type)
+{
+	struct type_expansion *e;
+
+	if (type_map_get(name, &e)) {
+		e = xmalloc(sizeof(struct type_expansion));
+		type_expansion_init(e);
+		e->name = xstrdup(name);
+
+		hash_add(type_map, &e->hash, hash_str(e->name));
+
+		if (dump_types)
+			debug("adding %s", e->name);
+	} else {
+		/* Use the longest available expansion */
+		if (type->len <= e->len)
+			return;
+
+		type_list_free(&e->expanded);
+
+		if (dump_types)
+			debug("replacing %s", e->name);
+	}
+
+	/* Take ownership of type->expanded */
+	list_replace_init(&type->expanded, &e->expanded);
+	e->len = type->len;
+
+	if (dump_types) {
+		checkp(fputs(e->name, stderr));
+		checkp(fputs(" ", stderr));
+		type_list_write(&e->expanded, stderr);
+		checkp(fputs("\n", stderr));
+	}
+}
+
+static void type_map_write(FILE *file)
+{
+	struct type_expansion *e;
+	struct hlist_node *tmp;
+
+	if (!file)
+		return;
+
+	hash_for_each_safe(type_map, e, tmp, hash) {
+		checkp(fputs(e->name, file));
+		checkp(fputs(" ", file));
+		type_list_write(&e->expanded, file);
+		checkp(fputs("\n", file));
+	}
+}
+
+static void type_map_free(void)
+{
+	struct type_expansion *e;
+	struct hlist_node *tmp;
+
+	hash_for_each_safe(type_map, e, tmp, hash) {
+		type_expansion_free(e);
+		free(e);
+	}
+
+	hash_init(type_map);
+}
+
+/*
+ * Type reference format: <prefix>#<name>, where prefix:
+ * 	s -> structure
+ * 	u -> union
+ * 	e -> enum
+ * 	t -> typedef
+ *
+ * Names with spaces are additionally wrapped in single quotes.
+ */
+static char get_type_prefix(int tag)
+{
+	switch (tag) {
+	case DW_TAG_class_type:
+	case DW_TAG_structure_type:
+		return 's';
+	case DW_TAG_union_type:
+		return 'u';
+	case DW_TAG_enumeration_type:
+		return 'e';
+	case DW_TAG_typedef_type:
+		return 't';
+	default:
+		return 0;
+	}
+}
+
+static char *get_type_name(struct die *cache)
+{
+	const char *quote;
+	char prefix;
+	char *name;
+
+	if (cache->state == DIE_INCOMPLETE) {
+		warn("found incomplete cache entry: %p", cache);
+		return NULL;
+	}
+	if (!cache->fqn || !*cache->fqn)
+		return NULL;
+
+	prefix = get_type_prefix(cache->tag);
+	if (!prefix)
+		return NULL;
+
+	/* Wrap names with spaces in single quotes */
+	quote = strstr(cache->fqn, " ") ? "'" : "";
+
+	/* <prefix>#<type_name>\0 */
+	if (asprintf(&name, "%c#%s%s%s", prefix, quote, cache->fqn, quote) < 0)
+		error("asprintf failed for '%s'", cache->fqn);
+
+	return name;
+}
+
+static void __type_expand(struct die *cache, struct type_expansion *type,
+			  bool recursive);
+
+static void type_expand_child(struct die *cache, struct type_expansion *type,
+			      bool recursive)
+{
+	struct type_expansion child;
+	char *name;
+
+	name = get_type_name(cache);
+	if (!name) {
+		__type_expand(cache, type, recursive);
+		return;
+	}
+
+	if (recursive && !__cache_was_expanded(&expansion_cache, cache->addr)) {
+		__cache_mark_expanded(&expansion_cache, cache->addr);
+		type_expansion_init(&child);
+		__type_expand(cache, &child, true);
+		type_map_add(name, &child);
+		type_expansion_free(&child);
+	}
+
+	type_expansion_append(type, name, name);
+}
+
+static void __type_expand(struct die *cache, struct type_expansion *type,
+			  bool recursive)
+{
+	struct die_fragment *df;
+	struct die *child;
+
+	list_for_each_entry(df, &cache->fragments, list) {
+		switch (df->type) {
+		case FRAGMENT_STRING:
+			type_expansion_append(type, df->data.str, NULL);
+			break;
+		case FRAGMENT_DIE:
+			/* Use a complete die_map expansion if available */
+			if (__die_map_get(df->data.addr, DIE_COMPLETE,
+					  &child) &&
+			    __die_map_get(df->data.addr, DIE_UNEXPANDED,
+					  &child))
+				error("unknown child: %" PRIxPTR,
+				      df->data.addr);
+
+			type_expand_child(child, type, recursive);
+			break;
+		case FRAGMENT_LINEBREAK:
+			/*
+			 * Keep whitespace in the symtypes format, but avoid
+			 * repeated spaces.
+			 */
+			if (list_is_last(&df->list, &cache->fragments) ||
+			    list_next_entry(df, list)->type !=
+				    FRAGMENT_LINEBREAK)
+				type_expansion_append(type, " ", NULL);
+			break;
+		default:
+			error("empty die_fragment in %p", cache);
+		}
+	}
+}
+
+static void type_expand(struct die *cache, struct type_expansion *type,
+			bool recursive)
+{
+	type_expansion_init(type);
+	__type_expand(cache, type, recursive);
+	cache_free(&expansion_cache);
+}
+
+static void expand_type(struct die *cache, void *arg)
+{
+	struct type_expansion type;
+	char *name;
+
+	if (cache->mapped)
+		return;
+
+	cache->mapped = true;
+
+	/*
+	 * Skip unexpanded die_map entries if there's a complete
+	 * expansion available for this DIE.
+	 */
+	if (cache->state == DIE_UNEXPANDED &&
+	    !__die_map_get(cache->addr, DIE_COMPLETE, &cache)) {
+		if (cache->mapped)
+			return;
+
+		cache->mapped = true;
+	}
+
+	name = get_type_name(cache);
+	if (!name)
+		return;
+
+	debug("%s", name);
+	type_expand(cache, &type, true);
+	type_map_add(name, &type);
+
+	type_expansion_free(&type);
+	free(name);
+}
+
+void generate_symtypes(FILE *file)
+{
+	cache_init(&expansion_cache);
+
+	/*
+	 * die_map processing:
+	 *
+	 *   1. die_map contains all types referenced in exported symbol
+	 *      signatures, but can contain duplicates just like the original
+	 *      DWARF, and some references may not be fully expanded depending
+	 *      on how far we processed the DIE tree for that specific symbol.
+	 *
+	 *      For each die_map entry, find the longest available expansion,
+	 *      and add it to type_map.
+	 */
+	die_map_for_each(expand_type, NULL);
+
+	/*
+	 *   2. If a symtypes file is requested, write type_map contents to
+	 *      the file.
+	 */
+	type_map_write(file);
+	type_map_free();
+}

From 71378888018833a1cdcbf72f1e95d7c010542d8b Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Fri, 3 Jan 2025 20:45:34 +0000
Subject: [PATCH 064/368] gendwarfksyms: Add symbol versioning

Calculate symbol versions from the fully expanded type strings in
type_map, and output the versions in a genksyms-compatible format.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/gendwarfksyms/Makefile        |   2 +-
 scripts/gendwarfksyms/dwarf.c         |  25 +++++-
 scripts/gendwarfksyms/gendwarfksyms.c |  10 ++-
 scripts/gendwarfksyms/gendwarfksyms.h |  13 ++-
 scripts/gendwarfksyms/symbols.c       |  53 +++++++++++
 scripts/gendwarfksyms/types.c         | 122 +++++++++++++++++++++++++-
 6 files changed, 216 insertions(+), 9 deletions(-)

diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
index 6540282dc7466..e889b958957b6 100644
--- a/scripts/gendwarfksyms/Makefile
+++ b/scripts/gendwarfksyms/Makefile
@@ -8,4 +8,4 @@ gendwarfksyms-objs += dwarf.o
 gendwarfksyms-objs += symbols.o
 gendwarfksyms-objs += types.o
 
-HOSTLDLIBS_gendwarfksyms := -ldw -lelf
+HOSTLDLIBS_gendwarfksyms := -ldw -lelf -lz
diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index a9966a23167ab..bdf899d607072 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -740,12 +740,33 @@ static int process_type(struct state *state, struct die *parent, Dwarf_Die *die)
 /*
  * Exported symbol processing
  */
+static struct die *get_symbol_cache(struct state *state, Dwarf_Die *die)
+{
+	struct die *cache;
+
+	cache = die_map_get(die, DIE_SYMBOL);
+
+	if (cache->state != DIE_INCOMPLETE)
+		return NULL; /* We already processed a symbol for this DIE */
+
+	cache->tag = dwarf_tag(die);
+	return cache;
+}
+
 static void process_symbol(struct state *state, Dwarf_Die *die,
 			   die_callback_t process_func)
 {
+	struct die *cache;
+
+	symbol_set_die(state->sym, die);
+
+	cache = get_symbol_cache(state, die);
+	if (!cache)
+		return;
+
 	debug("%s", state->sym->name);
-	check(process_func(state, NULL, die));
-	state->sym->state = SYMBOL_MAPPED;
+	check(process_func(state, cache, die));
+	cache->state = DIE_SYMBOL;
 	if (dump_dies)
 		fputs("\n", stderr);
 }
diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c
index 1d30f42cbd143..b0e13c37c6c2c 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.c
+++ b/scripts/gendwarfksyms/gendwarfksyms.c
@@ -23,6 +23,8 @@ int dump_dies;
 int dump_die_map;
 /* Print out type strings (i.e. type_map) */
 int dump_types;
+/* Print out expanded type strings used for symbol versions */
+int dump_versions;
 /* Write a symtypes file */
 int symtypes;
 static const char *symtypes_file;
@@ -35,6 +37,7 @@ static void usage(void)
 	      "      --dump-dies      Dump DWARF DIE contents\n"
 	      "      --dump-die-map   Print debugging information about die_map changes\n"
 	      "      --dump-types     Dump type strings\n"
+	      "      --dump-versions  Dump expanded type strings used for symbol versions\n"
 	      "  -T, --symtypes file  Write a symtypes file\n"
 	      "  -h, --help           Print this message\n"
 	      "\n",
@@ -69,9 +72,10 @@ static int process_module(Dwfl_Module *mod, void **userdata, const char *name,
 	} while (cu);
 
 	/*
-	 * Use die_map to expand type strings and write them to `symfile`.
+	 * Use die_map to expand type strings, write them to `symfile`, and
+	 * calculate symbol versions.
 	 */
-	generate_symtypes(symfile);
+	generate_symtypes_and_versions(symfile);
 	die_map_free();
 
 	return DWARF_CB_OK;
@@ -93,6 +97,7 @@ int main(int argc, char **argv)
 		{ "dump-dies", 0, &dump_dies, 1 },
 		{ "dump-die-map", 0, &dump_die_map, 1 },
 		{ "dump-types", 0, &dump_types, 1 },
+		{ "dump-versions", 0, &dump_versions, 1 },
 		{ "symtypes", 1, NULL, 'T' },
 		{ "help", 0, NULL, 'h' },
 		{ 0, 0, NULL, 0 }
@@ -166,6 +171,7 @@ int main(int argc, char **argv)
 	if (symfile)
 		check(fclose(symfile));
 
+	symbol_print_versions();
 	symbol_free();
 
 	return 0;
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
index 98d5b2315f218..203534abcd354 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -23,6 +23,7 @@ extern int debug;
 extern int dump_dies;
 extern int dump_die_map;
 extern int dump_types;
+extern int dump_versions;
 extern int symtypes;
 
 /*
@@ -95,6 +96,7 @@ static inline unsigned int addr_hash(uintptr_t addr)
 enum symbol_state {
 	SYMBOL_UNPROCESSED,
 	SYMBOL_MAPPED,
+	SYMBOL_PROCESSED
 };
 
 struct symbol_addr {
@@ -109,6 +111,7 @@ struct symbol {
 	struct hlist_node name_hash;
 	enum symbol_state state;
 	uintptr_t die_addr;
+	unsigned long crc;
 };
 
 typedef void (*symbol_callback_t)(struct symbol *, void *arg);
@@ -116,6 +119,10 @@ typedef void (*symbol_callback_t)(struct symbol *, void *arg);
 void symbol_read_exports(FILE *file);
 void symbol_read_symtab(int fd);
 struct symbol *symbol_get(const char *name);
+void symbol_set_die(struct symbol *sym, Dwarf_Die *die);
+void symbol_set_crc(struct symbol *sym, unsigned long crc);
+void symbol_for_each(symbol_callback_t func, void *arg);
+void symbol_print_versions(void);
 void symbol_free(void);
 
 /*
@@ -126,7 +133,8 @@ enum die_state {
 	DIE_INCOMPLETE,
 	DIE_UNEXPANDED,
 	DIE_COMPLETE,
-	DIE_LAST = DIE_COMPLETE
+	DIE_SYMBOL,
+	DIE_LAST = DIE_SYMBOL
 };
 
 enum die_fragment_type {
@@ -156,6 +164,7 @@ static inline const char *die_state_name(enum die_state state)
 	CASE_CONST_TO_STR(DIE_INCOMPLETE)
 	CASE_CONST_TO_STR(DIE_UNEXPANDED)
 	CASE_CONST_TO_STR(DIE_COMPLETE)
+	CASE_CONST_TO_STR(DIE_SYMBOL)
 	}
 
 	error("unexpected die_state: %d", state);
@@ -252,6 +261,6 @@ void process_cu(Dwarf_Die *cudie);
  * types.c
  */
 
-void generate_symtypes(FILE *file);
+void generate_symtypes_and_versions(FILE *file);
 
 #endif /* __GENDWARFKSYMS_H */
diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c
index 0d2ce7284a53c..4c499ba6c86de 100644
--- a/scripts/gendwarfksyms/symbols.c
+++ b/scripts/gendwarfksyms/symbols.c
@@ -66,6 +66,36 @@ static unsigned int for_each(const char *name, symbol_callback_t func,
 	return 0;
 }
 
+static void set_crc(struct symbol *sym, void *data)
+{
+	unsigned long *crc = data;
+
+	if (sym->state == SYMBOL_PROCESSED && sym->crc != *crc)
+		warn("overriding version for symbol %s (crc %lx vs. %lx)",
+		     sym->name, sym->crc, *crc);
+
+	sym->state = SYMBOL_PROCESSED;
+	sym->crc = *crc;
+}
+
+void symbol_set_crc(struct symbol *sym, unsigned long crc)
+{
+	if (for_each(sym->name, set_crc, &crc) == 0)
+		error("no matching symbols: '%s'", sym->name);
+}
+
+static void set_die(struct symbol *sym, void *data)
+{
+	sym->die_addr = (uintptr_t)((Dwarf_Die *)data)->addr;
+	sym->state = SYMBOL_MAPPED;
+}
+
+void symbol_set_die(struct symbol *sym, Dwarf_Die *die)
+{
+	if (for_each(sym->name, set_die, die) == 0)
+		error("no matching symbols: '%s'", sym->name);
+}
+
 static bool is_exported(const char *name)
 {
 	return for_each(name, NULL, NULL) > 0;
@@ -120,6 +150,16 @@ struct symbol *symbol_get(const char *name)
 	return sym;
 }
 
+void symbol_for_each(symbol_callback_t func, void *arg)
+{
+	struct hlist_node *tmp;
+	struct symbol *sym;
+
+	hash_for_each_safe(symbol_names, sym, tmp, name_hash) {
+		func(sym, arg);
+	}
+}
+
 typedef void (*elf_symbol_callback_t)(const char *name, GElf_Sym *sym,
 				      Elf32_Word xndx, void *arg);
 
@@ -246,6 +286,19 @@ void symbol_read_symtab(int fd)
 	elf_for_each_global(fd, elf_set_symbol_addr, NULL);
 }
 
+void symbol_print_versions(void)
+{
+	struct hlist_node *tmp;
+	struct symbol *sym;
+
+	hash_for_each_safe(symbol_names, sym, tmp, name_hash) {
+		if (sym->state != SYMBOL_PROCESSED)
+			warn("no information for symbol %s", sym->name);
+
+		printf("#SYMVER %s 0x%08lx\n", sym->name, sym->crc);
+	}
+}
+
 void symbol_free(void)
 {
 	struct hlist_node *tmp;
diff --git a/scripts/gendwarfksyms/types.c b/scripts/gendwarfksyms/types.c
index 21d7a34228eba..6c03265f4d107 100644
--- a/scripts/gendwarfksyms/types.c
+++ b/scripts/gendwarfksyms/types.c
@@ -6,6 +6,7 @@
 #define _GNU_SOURCE
 #include <inttypes.h>
 #include <stdio.h>
+#include <zlib.h>
 
 #include "gendwarfksyms.h"
 
@@ -178,6 +179,33 @@ static void type_map_free(void)
 	hash_init(type_map);
 }
 
+/*
+ * CRC for a type, with an optional fully expanded type string for
+ * debugging.
+ */
+struct version {
+	struct type_expansion type;
+	unsigned long crc;
+};
+
+static void version_init(struct version *version)
+{
+	version->crc = crc32(0, NULL, 0);
+	type_expansion_init(&version->type);
+}
+
+static void version_free(struct version *version)
+{
+	type_expansion_free(&version->type);
+}
+
+static void version_add(struct version *version, const char *s)
+{
+	version->crc = crc32(version->crc, (void *)s, strlen(s));
+	if (dump_versions)
+		type_expansion_append(&version->type, s, NULL);
+}
+
 /*
  * Type reference format: <prefix>#<name>, where prefix:
  * 	s -> structure
@@ -187,6 +215,12 @@ static void type_map_free(void)
  *
  * Names with spaces are additionally wrapped in single quotes.
  */
+static inline bool is_type_prefix(const char *s)
+{
+	return (s[0] == 's' || s[0] == 'u' || s[0] == 'e' || s[0] == 't') &&
+	       s[1] == '#';
+}
+
 static char get_type_prefix(int tag)
 {
 	switch (tag) {
@@ -214,6 +248,8 @@ static char *get_type_name(struct die *cache)
 		warn("found incomplete cache entry: %p", cache);
 		return NULL;
 	}
+	if (cache->state == DIE_SYMBOL)
+		return NULL;
 	if (!cache->fqn || !*cache->fqn)
 		return NULL;
 
@@ -231,6 +267,39 @@ static char *get_type_name(struct die *cache)
 	return name;
 }
 
+static void __calculate_version(struct version *version, struct list_head *list)
+{
+	struct type_list_entry *entry;
+	struct type_expansion *e;
+
+	/* Calculate a CRC over an expanded type string */
+	list_for_each_entry(entry, list, list) {
+		if (is_type_prefix(entry->str)) {
+			check(type_map_get(entry->str, &e));
+
+			/*
+			 * It's sufficient to expand each type reference just
+			 * once to detect changes.
+			 */
+			if (cache_was_expanded(&expansion_cache, e)) {
+				version_add(version, entry->str);
+			} else {
+				cache_mark_expanded(&expansion_cache, e);
+				__calculate_version(version, &e->expanded);
+			}
+		} else {
+			version_add(version, entry->str);
+		}
+	}
+}
+
+static void calculate_version(struct version *version, struct list_head *list)
+{
+	version_init(version);
+	__calculate_version(version, list);
+	cache_free(&expansion_cache);
+}
+
 static void __type_expand(struct die *cache, struct type_expansion *type,
 			  bool recursive);
 
@@ -337,7 +406,49 @@ static void expand_type(struct die *cache, void *arg)
 	free(name);
 }
 
-void generate_symtypes(FILE *file)
+static void expand_symbol(struct symbol *sym, void *arg)
+{
+	struct type_expansion type;
+	struct version version;
+	struct die *cache;
+
+	/*
+	 * No need to expand again unless we want a symtypes file entry
+	 * for the symbol. Note that this means `sym` has the same address
+	 * as another symbol that was already processed.
+	 */
+	if (!symtypes && sym->state == SYMBOL_PROCESSED)
+		return;
+
+	if (__die_map_get(sym->die_addr, DIE_SYMBOL, &cache))
+		return; /* We'll warn about missing CRCs later. */
+
+	type_expand(cache, &type, false);
+
+	/* If the symbol already has a version, don't calculate it again. */
+	if (sym->state != SYMBOL_PROCESSED) {
+		calculate_version(&version, &type.expanded);
+		symbol_set_crc(sym, version.crc);
+		debug("%s = %lx", sym->name, version.crc);
+
+		if (dump_versions) {
+			checkp(fputs(sym->name, stderr));
+			checkp(fputs(" ", stderr));
+			type_list_write(&version.type.expanded, stderr);
+			checkp(fputs("\n", stderr));
+		}
+
+		version_free(&version);
+	}
+
+	/* These aren't needed in type_map unless we want a symtypes file. */
+	if (symtypes)
+		type_map_add(sym->name, &type);
+
+	type_expansion_free(&type);
+}
+
+void generate_symtypes_and_versions(FILE *file)
 {
 	cache_init(&expansion_cache);
 
@@ -355,7 +466,14 @@ void generate_symtypes(FILE *file)
 	die_map_for_each(expand_type, NULL);
 
 	/*
-	 *   2. If a symtypes file is requested, write type_map contents to
+	 *   2. For each exported symbol, expand the die_map type, and use
+	 *      type_map expansions to calculate a symbol version from the
+	 *      fully expanded type string.
+	 */
+	symbol_for_each(expand_symbol, NULL);
+
+	/*
+	 *   3. If a symtypes file is requested, write type_map contents to
 	 *      the file.
 	 */
 	type_map_write(file);

From 936cf61c3ef5d6dad714d6c01a85704027dddeb9 Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Fri, 3 Jan 2025 20:45:35 +0000
Subject: [PATCH 065/368] gendwarfksyms: Add support for kABI rules

Distributions that want to maintain a stable kABI need the ability
to make ABI compatible changes to kernel without affecting symbol
versions, either because of LTS updates or backports.

With genksyms, developers would typically hide these changes from
version calculation with #ifndef __GENKSYMS__, which would result
in the symbol version not changing even though the actual type has
changed.  When we process precompiled object files, this isn't an
option.

To support this use case, add a --stable command line flag that
gates kABI stability features that are not needed in mainline
kernels, but can be useful for distributions, and add support for
kABI rules, which can be used to restrict gendwarfksyms output.

The rules are specified as a set of null-terminated strings stored
in the .discard.gendwarfksyms.kabi_rules section. Each rule consists
of four strings as follows:

  "version\0type\0target\0value"

The version string ensures the structure can be changed in a
backwards compatible way. The type string indicates the type of the
rule, and target and value strings contain rule-specific data.

Initially support two simple rules:

  1. Declaration-only types

     A type declaration can change into a full definition when
     additional includes are pulled in to the TU, which changes the
     versions of any symbol that references the type. Add support
     for defining declaration-only types whose definition is not
     expanded during versioning.

  2. Ignored enumerators

     It's possible to add new enum fields without changing the ABI,
     but as the fields are included in symbol versioning, this would
     change the versions. Add support for ignoring specific fields.

  3. Overridden enumerator values

     Add support for overriding enumerator values when calculating
     versions. This may be needed when the last field of the enum
     is used as a sentinel and new fields must be added before it.

Add examples for using the rules under the examples/ directory.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/gendwarfksyms/Makefile           |   1 +
 scripts/gendwarfksyms/dwarf.c            |  25 +-
 scripts/gendwarfksyms/examples/kabi.h    |  70 +++++
 scripts/gendwarfksyms/examples/kabi_ex.c |  14 +
 scripts/gendwarfksyms/examples/kabi_ex.h |  64 +++++
 scripts/gendwarfksyms/gendwarfksyms.c    |  11 +-
 scripts/gendwarfksyms/gendwarfksyms.h    |  14 +
 scripts/gendwarfksyms/kabi.c             | 336 +++++++++++++++++++++++
 8 files changed, 531 insertions(+), 4 deletions(-)
 create mode 100644 scripts/gendwarfksyms/examples/kabi.h
 create mode 100644 scripts/gendwarfksyms/examples/kabi_ex.c
 create mode 100644 scripts/gendwarfksyms/examples/kabi_ex.h
 create mode 100644 scripts/gendwarfksyms/kabi.c

diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
index e889b958957b6..6334c7d3c4d58 100644
--- a/scripts/gendwarfksyms/Makefile
+++ b/scripts/gendwarfksyms/Makefile
@@ -5,6 +5,7 @@ gendwarfksyms-objs += gendwarfksyms.o
 gendwarfksyms-objs += cache.o
 gendwarfksyms-objs += die.o
 gendwarfksyms-objs += dwarf.o
+gendwarfksyms-objs += kabi.o
 gendwarfksyms-objs += symbols.o
 gendwarfksyms-objs += types.o
 
diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index bdf899d607072..17f7e6b9a7ff2 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -120,13 +120,16 @@ static bool is_definition_private(Dwarf_Die *die)
 	return !!res;
 }
 
-static bool is_kabi_definition(Dwarf_Die *die)
+static bool is_kabi_definition(struct die *cache, Dwarf_Die *die)
 {
 	bool value;
 
 	if (get_flag_attr(die, DW_AT_declaration, &value) && value)
 		return false;
 
+	if (kabi_is_declonly(cache->fqn))
+		return false;
+
 	return !is_definition_private(die);
 }
 
@@ -515,9 +518,10 @@ static void __process_structure_type(struct state *state, struct die *cache,
 	process(cache, " {");
 	process_linebreak(cache, 1);
 
-	expand = state->expand.expand && is_kabi_definition(die);
+	expand = state->expand.expand && is_kabi_definition(cache, die);
 
 	if (expand) {
+		state->expand.current_fqn = cache->fqn;
 		check(process_die_container(state, cache, die, process_func,
 					    match_func));
 	}
@@ -548,13 +552,26 @@ DEFINE_PROCESS_STRUCTURE_TYPE(union)
 static void process_enumerator_type(struct state *state, struct die *cache,
 				    Dwarf_Die *die)
 {
+	bool overridden = false;
 	Dwarf_Word value;
 
+	if (stable) {
+		/* Get the fqn before we process anything */
+		update_fqn(cache, die);
+
+		if (kabi_is_enumerator_ignored(state->expand.current_fqn,
+					       cache->fqn))
+			return;
+
+		overridden = kabi_get_enumerator_value(
+			state->expand.current_fqn, cache->fqn, &value);
+	}
+
 	process_list_comma(state, cache);
 	process(cache, "enumerator");
 	process_fqn(cache, die);
 
-	if (get_udata_attr(die, DW_AT_const_value, &value)) {
+	if (overridden || get_udata_attr(die, DW_AT_const_value, &value)) {
 		process(cache, " = ");
 		process_fmt(cache, "%" PRIu64, value);
 	}
@@ -620,6 +637,7 @@ static void process_cached(struct state *state, struct die *cache,
 static void state_init(struct state *state)
 {
 	state->expand.expand = true;
+	state->expand.current_fqn = NULL;
 	cache_init(&state->expansion_cache);
 }
 
@@ -627,6 +645,7 @@ static void expansion_state_restore(struct expansion_state *state,
 				    struct expansion_state *saved)
 {
 	state->expand = saved->expand;
+	state->current_fqn = saved->current_fqn;
 }
 
 static void expansion_state_save(struct expansion_state *state,
diff --git a/scripts/gendwarfksyms/examples/kabi.h b/scripts/gendwarfksyms/examples/kabi.h
new file mode 100644
index 0000000000000..fcd0300e5b58f
--- /dev/null
+++ b/scripts/gendwarfksyms/examples/kabi.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Example macros for maintaining kABI stability.
+ *
+ * This file is based on android_kabi.h, which has the following notice:
+ *
+ * Heavily influenced by rh_kabi.h which came from the RHEL/CENTOS kernel
+ * and was:
+ *	Copyright (c) 2014 Don Zickus
+ *	Copyright (c) 2015-2018 Jiri Benc
+ *	Copyright (c) 2015 Sabrina Dubroca, Hannes Frederic Sowa
+ *	Copyright (c) 2016-2018 Prarit Bhargava
+ *	Copyright (c) 2017 Paolo Abeni, Larry Woodman
+ */
+
+#ifndef __KABI_H__
+#define __KABI_H__
+
+/* Kernel macros for userspace testing. */
+#ifndef __aligned
+#define __aligned(x) __attribute__((__aligned__(x)))
+#endif
+#ifndef __used
+#define __used __attribute__((__used__))
+#endif
+#ifndef __section
+#define __section(section) __attribute__((__section__(section)))
+#endif
+#ifndef __PASTE
+#define ___PASTE(a, b) a##b
+#define __PASTE(a, b) ___PASTE(a, b)
+#endif
+#ifndef __stringify
+#define __stringify_1(x...) #x
+#define __stringify(x...) __stringify_1(x)
+#endif
+
+#define __KABI_RULE(hint, target, value)                             \
+	static const char __PASTE(__gendwarfksyms_rule_,             \
+				  __COUNTER__)[] __used __aligned(1) \
+		__section(".discard.gendwarfksyms.kabi_rules") =     \
+			"1\0" #hint "\0" #target "\0" #value
+
+/*
+ * KABI_DECLONLY(fqn)
+ *   Treat the struct/union/enum fqn as a declaration, i.e. even if
+ *   a definition is available, don't expand the contents.
+ */
+#define KABI_DECLONLY(fqn) __KABI_RULE(declonly, fqn, )
+
+/*
+ * KABI_ENUMERATOR_IGNORE(fqn, field)
+ *   When expanding enum fqn, skip the provided field. This makes it
+ *   possible to hide added enum fields from versioning.
+ */
+#define KABI_ENUMERATOR_IGNORE(fqn, field) \
+	__KABI_RULE(enumerator_ignore, fqn field, )
+
+/*
+ * KABI_ENUMERATOR_VALUE(fqn, field, value)
+ *   When expanding enum fqn, use the provided value for the
+ *   specified field. This makes it possible to override enumerator
+ *   values when calculating versions.
+ */
+#define KABI_ENUMERATOR_VALUE(fqn, field, value) \
+	__KABI_RULE(enumerator_value, fqn field, value)
+
+#endif /* __KABI_H__ */
diff --git a/scripts/gendwarfksyms/examples/kabi_ex.c b/scripts/gendwarfksyms/examples/kabi_ex.c
new file mode 100644
index 0000000000000..799552ea6679b
--- /dev/null
+++ b/scripts/gendwarfksyms/examples/kabi_ex.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * kabi_ex.c
+ *
+ * Copyright (C) 2024 Google LLC
+ *
+ * Examples for kABI stability features with --stable. See kabi_ex.h
+ * for details.
+ */
+
+#include "kabi_ex.h"
+
+struct s e0;
+enum e e1;
diff --git a/scripts/gendwarfksyms/examples/kabi_ex.h b/scripts/gendwarfksyms/examples/kabi_ex.h
new file mode 100644
index 0000000000000..fca1e07c78e2b
--- /dev/null
+++ b/scripts/gendwarfksyms/examples/kabi_ex.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * kabi_ex.h
+ *
+ * Copyright (C) 2024 Google LLC
+ *
+ * Examples for kABI stability features with --stable.
+ */
+
+/*
+ * The comments below each example contain the expected gendwarfksyms
+ * output, which can be verified using LLVM's FileCheck tool:
+ *
+ * https://llvm.org/docs/CommandGuide/FileCheck.html
+ *
+ * Usage:
+ *
+ * $ gcc -g -c examples/kabi_ex.c -o examples/kabi_ex.o
+ *
+ * $ nm examples/kabi_ex.o | awk '{ print $NF }' | \
+ * 	./gendwarfksyms --stable --dump-dies \
+ * 		examples/kabi_ex.o 2>&1 >/dev/null | \
+ * 	FileCheck examples/kabi_ex.h --check-prefix=STABLE
+ */
+
+#ifndef __KABI_EX_H__
+#define __KABI_EX_H__
+
+#include "kabi.h"
+
+/*
+ * Example: kABI rules
+ */
+
+struct s {
+	int a;
+};
+
+KABI_DECLONLY(s);
+
+/*
+ * STABLE:      variable structure_type s {
+ * STABLE-NEXT: }
+ */
+
+enum e {
+	A,
+	B,
+	C,
+	D,
+};
+
+KABI_ENUMERATOR_IGNORE(e, B);
+KABI_ENUMERATOR_IGNORE(e, C);
+KABI_ENUMERATOR_VALUE(e, D, 123456789);
+
+/*
+ * STABLE:      variable enumeration_type e {
+ * STABLE-NEXT:   enumerator A = 0 ,
+ * STABLE-NEXT:   enumerator D = 123456789
+ * STABLE-NEXT: } byte_size(4)
+ */
+
+#endif /* __KABI_EX_H__ */
diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c
index b0e13c37c6c2c..08ae61eb327ea 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.c
+++ b/scripts/gendwarfksyms/gendwarfksyms.c
@@ -25,6 +25,8 @@ int dump_die_map;
 int dump_types;
 /* Print out expanded type strings used for symbol versions */
 int dump_versions;
+/* Support kABI stability features */
+int stable;
 /* Write a symtypes file */
 int symtypes;
 static const char *symtypes_file;
@@ -38,6 +40,7 @@ static void usage(void)
 	      "      --dump-die-map   Print debugging information about die_map changes\n"
 	      "      --dump-types     Dump type strings\n"
 	      "      --dump-versions  Dump expanded type strings used for symbol versions\n"
+	      "  -s, --stable         Support kABI stability features\n"
 	      "  -T, --symtypes file  Write a symtypes file\n"
 	      "  -h, --help           Print this message\n"
 	      "\n",
@@ -98,18 +101,22 @@ int main(int argc, char **argv)
 		{ "dump-die-map", 0, &dump_die_map, 1 },
 		{ "dump-types", 0, &dump_types, 1 },
 		{ "dump-versions", 0, &dump_versions, 1 },
+		{ "stable", 0, NULL, 's' },
 		{ "symtypes", 1, NULL, 'T' },
 		{ "help", 0, NULL, 'h' },
 		{ 0, 0, NULL, 0 }
 	};
 
-	while ((opt = getopt_long(argc, argv, "dT:h", opts, NULL)) != EOF) {
+	while ((opt = getopt_long(argc, argv, "dsT:h", opts, NULL)) != EOF) {
 		switch (opt) {
 		case 0:
 			break;
 		case 'd':
 			debug = 1;
 			break;
+		case 's':
+			stable = 1;
+			break;
 		case 'T':
 			symtypes = 1;
 			symtypes_file = optarg;
@@ -150,6 +157,7 @@ int main(int argc, char **argv)
 			      strerror(errno));
 
 		symbol_read_symtab(fd);
+		kabi_read_rules(fd);
 
 		dwfl = dwfl_begin(&callbacks);
 		if (!dwfl)
@@ -166,6 +174,7 @@ int main(int argc, char **argv)
 			error("dwfl_getmodules failed for '%s'", argv[n]);
 
 		dwfl_end(dwfl);
+		kabi_free();
 	}
 
 	if (symfile)
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
index 203534abcd354..c0207ca10e198 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -24,6 +24,7 @@ extern int dump_dies;
 extern int dump_die_map;
 extern int dump_types;
 extern int dump_versions;
+extern int stable;
 extern int symtypes;
 
 /*
@@ -232,6 +233,7 @@ static inline bool cache_was_expanded(struct cache *cache, void *addr)
 
 struct expansion_state {
 	bool expand;
+	const char *current_fqn;
 };
 
 struct state {
@@ -263,4 +265,16 @@ void process_cu(Dwarf_Die *cudie);
 
 void generate_symtypes_and_versions(FILE *file);
 
+/*
+ * kabi.c
+ */
+
+bool kabi_is_enumerator_ignored(const char *fqn, const char *field);
+bool kabi_get_enumerator_value(const char *fqn, const char *field,
+			       unsigned long *value);
+bool kabi_is_declonly(const char *fqn);
+
+void kabi_read_rules(int fd);
+void kabi_free(void);
+
 #endif /* __GENDWARFKSYMS_H */
diff --git a/scripts/gendwarfksyms/kabi.c b/scripts/gendwarfksyms/kabi.c
new file mode 100644
index 0000000000000..66f01fcd16079
--- /dev/null
+++ b/scripts/gendwarfksyms/kabi.c
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Google LLC
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdio.h>
+
+#include "gendwarfksyms.h"
+
+#define KABI_RULE_SECTION ".discard.gendwarfksyms.kabi_rules"
+#define KABI_RULE_VERSION "1"
+
+/*
+ * The rule section consists of four null-terminated strings per
+ * entry:
+ *
+ *   1. version
+ *      Entry format version. Must match KABI_RULE_VERSION.
+ *
+ *   2. type
+ *      Type of the kABI rule. Must be one of the tags defined below.
+ *
+ *   3. target
+ *      Rule-dependent target, typically the fully qualified name of
+ *      the target DIE.
+ *
+ *   4. value
+ *      Rule-dependent value.
+ */
+#define KABI_RULE_MIN_ENTRY_SIZE                                  \
+	(/* version\0 */ 2 + /* type\0 */ 2 + /* target\0" */ 1 + \
+	 /* value\0 */ 1)
+#define KABI_RULE_EMPTY_VALUE ""
+
+/*
+ * Rule: declonly
+ * - For the struct/enum/union in the target field, treat it as a
+ *   declaration only even if a definition is available.
+ */
+#define KABI_RULE_TAG_DECLONLY "declonly"
+
+/*
+ * Rule: enumerator_ignore
+ * - For the enum_field in the target field, ignore the enumerator.
+ */
+#define KABI_RULE_TAG_ENUMERATOR_IGNORE "enumerator_ignore"
+
+/*
+ * Rule: enumerator_value
+ * - For the fqn_field in the target field, set the value to the
+ *   unsigned integer in the value field.
+ */
+#define KABI_RULE_TAG_ENUMERATOR_VALUE "enumerator_value"
+
+enum kabi_rule_type {
+	KABI_RULE_TYPE_UNKNOWN,
+	KABI_RULE_TYPE_DECLONLY,
+	KABI_RULE_TYPE_ENUMERATOR_IGNORE,
+	KABI_RULE_TYPE_ENUMERATOR_VALUE,
+};
+
+#define RULE_HASH_BITS 7
+
+struct rule {
+	enum kabi_rule_type type;
+	const char *target;
+	const char *value;
+	struct hlist_node hash;
+};
+
+/* { type, target } -> struct rule */
+static HASHTABLE_DEFINE(rules, 1 << RULE_HASH_BITS);
+
+static inline unsigned int rule_values_hash(enum kabi_rule_type type,
+					    const char *target)
+{
+	return hash_32(type) ^ hash_str(target);
+}
+
+static inline unsigned int rule_hash(const struct rule *rule)
+{
+	return rule_values_hash(rule->type, rule->target);
+}
+
+static inline const char *get_rule_field(const char **pos, ssize_t *left)
+{
+	const char *start = *pos;
+	size_t len;
+
+	if (*left <= 0)
+		error("unexpected end of kABI rules");
+
+	len = strnlen(start, *left) + 1;
+	*pos += len;
+	*left -= len;
+
+	return start;
+}
+
+void kabi_read_rules(int fd)
+{
+	GElf_Shdr shdr_mem;
+	GElf_Shdr *shdr;
+	Elf_Data *rule_data = NULL;
+	Elf_Scn *scn;
+	Elf *elf;
+	size_t shstrndx;
+	const char *rule_str;
+	ssize_t left;
+	int i;
+
+	const struct {
+		enum kabi_rule_type type;
+		const char *tag;
+	} rule_types[] = {
+		{
+			.type = KABI_RULE_TYPE_DECLONLY,
+			.tag = KABI_RULE_TAG_DECLONLY,
+		},
+		{
+			.type = KABI_RULE_TYPE_ENUMERATOR_IGNORE,
+			.tag = KABI_RULE_TAG_ENUMERATOR_IGNORE,
+		},
+		{
+			.type = KABI_RULE_TYPE_ENUMERATOR_VALUE,
+			.tag = KABI_RULE_TAG_ENUMERATOR_VALUE,
+		},
+	};
+
+	if (!stable)
+		return;
+
+	if (elf_version(EV_CURRENT) != EV_CURRENT)
+		error("elf_version failed: %s", elf_errmsg(-1));
+
+	elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
+	if (!elf)
+		error("elf_begin failed: %s", elf_errmsg(-1));
+
+	if (elf_getshdrstrndx(elf, &shstrndx) < 0)
+		error("elf_getshdrstrndx failed: %s", elf_errmsg(-1));
+
+	scn = elf_nextscn(elf, NULL);
+
+	while (scn) {
+		const char *sname;
+
+		shdr = gelf_getshdr(scn, &shdr_mem);
+		if (!shdr)
+			error("gelf_getshdr failed: %s", elf_errmsg(-1));
+
+		sname = elf_strptr(elf, shstrndx, shdr->sh_name);
+		if (!sname)
+			error("elf_strptr failed: %s", elf_errmsg(-1));
+
+		if (!strcmp(sname, KABI_RULE_SECTION)) {
+			rule_data = elf_getdata(scn, NULL);
+			if (!rule_data)
+				error("elf_getdata failed: %s", elf_errmsg(-1));
+			break;
+		}
+
+		scn = elf_nextscn(elf, scn);
+	}
+
+	if (!rule_data) {
+		debug("kABI rules not found");
+		check(elf_end(elf));
+		return;
+	}
+
+	rule_str = rule_data->d_buf;
+	left = shdr->sh_size;
+
+	if (left < KABI_RULE_MIN_ENTRY_SIZE)
+		error("kABI rule section too small: %zd bytes", left);
+
+	if (rule_str[left - 1] != '\0')
+		error("kABI rules are not null-terminated");
+
+	while (left > KABI_RULE_MIN_ENTRY_SIZE) {
+		enum kabi_rule_type type = KABI_RULE_TYPE_UNKNOWN;
+		const char *field;
+		struct rule *rule;
+
+		/* version */
+		field = get_rule_field(&rule_str, &left);
+
+		if (strcmp(field, KABI_RULE_VERSION))
+			error("unsupported kABI rule version: '%s'", field);
+
+		/* type */
+		field = get_rule_field(&rule_str, &left);
+
+		for (i = 0; i < ARRAY_SIZE(rule_types); i++) {
+			if (!strcmp(field, rule_types[i].tag)) {
+				type = rule_types[i].type;
+				break;
+			}
+		}
+
+		if (type == KABI_RULE_TYPE_UNKNOWN)
+			error("unsupported kABI rule type: '%s'", field);
+
+		rule = xmalloc(sizeof(struct rule));
+
+		rule->type = type;
+		rule->target = xstrdup(get_rule_field(&rule_str, &left));
+		rule->value = xstrdup(get_rule_field(&rule_str, &left));
+
+		hash_add(rules, &rule->hash, rule_hash(rule));
+
+		debug("kABI rule: type: '%s', target: '%s', value: '%s'", field,
+		      rule->target, rule->value);
+	}
+
+	if (left > 0)
+		warn("unexpected data at the end of the kABI rules section");
+
+	check(elf_end(elf));
+}
+
+bool kabi_is_declonly(const char *fqn)
+{
+	struct rule *rule;
+
+	if (!stable)
+		return false;
+	if (!fqn || !*fqn)
+		return false;
+
+	hash_for_each_possible(rules, rule, hash,
+			       rule_values_hash(KABI_RULE_TYPE_DECLONLY, fqn)) {
+		if (rule->type == KABI_RULE_TYPE_DECLONLY &&
+		    !strcmp(fqn, rule->target))
+			return true;
+	}
+
+	return false;
+}
+
+static char *get_enumerator_target(const char *fqn, const char *field)
+{
+	char *target = NULL;
+
+	if (asprintf(&target, "%s %s", fqn, field) < 0)
+		error("asprintf failed for '%s %s'", fqn, field);
+
+	return target;
+}
+
+static unsigned long get_ulong_value(const char *value)
+{
+	unsigned long result = 0;
+	char *endptr = NULL;
+
+	errno = 0;
+	result = strtoul(value, &endptr, 10);
+
+	if (errno || *endptr)
+		error("invalid unsigned value '%s'", value);
+
+	return result;
+}
+
+bool kabi_is_enumerator_ignored(const char *fqn, const char *field)
+{
+	bool match = false;
+	struct rule *rule;
+	char *target;
+
+	if (!stable)
+		return false;
+	if (!fqn || !*fqn || !field || !*field)
+		return false;
+
+	target = get_enumerator_target(fqn, field);
+
+	hash_for_each_possible(
+		rules, rule, hash,
+		rule_values_hash(KABI_RULE_TYPE_ENUMERATOR_IGNORE, target)) {
+		if (rule->type == KABI_RULE_TYPE_ENUMERATOR_IGNORE &&
+		    !strcmp(target, rule->target)) {
+			match = true;
+			break;
+		}
+	}
+
+	free(target);
+	return match;
+}
+
+bool kabi_get_enumerator_value(const char *fqn, const char *field,
+			       unsigned long *value)
+{
+	bool match = false;
+	struct rule *rule;
+	char *target;
+
+	if (!stable)
+		return false;
+	if (!fqn || !*fqn || !field || !*field)
+		return false;
+
+	target = get_enumerator_target(fqn, field);
+
+	hash_for_each_possible(rules, rule, hash,
+			       rule_values_hash(KABI_RULE_TYPE_ENUMERATOR_VALUE,
+						target)) {
+		if (rule->type == KABI_RULE_TYPE_ENUMERATOR_VALUE &&
+		    !strcmp(target, rule->target)) {
+			*value = get_ulong_value(rule->value);
+			match = true;
+			break;
+		}
+	}
+
+	free(target);
+	return match;
+}
+
+void kabi_free(void)
+{
+	struct hlist_node *tmp;
+	struct rule *rule;
+
+	hash_for_each_safe(rules, rule, tmp, hash) {
+		free((void *)rule->target);
+		free((void *)rule->value);
+		free(rule);
+	}
+
+	hash_init(rules);
+}

From a93694188127a5f7ba3baa2f98b275ce388a5246 Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Fri, 3 Jan 2025 20:45:36 +0000
Subject: [PATCH 066/368] gendwarfksyms: Add support for reserved and ignored
 fields

Distributions that want to maintain a stable kABI need the ability
to make ABI compatible changes to kernel data structures without
affecting symbol versions, either because of LTS updates or backports.

With genksyms, developers would typically hide these changes from
version calculation with #ifndef __GENKSYMS__, which would result
in the symbol version not changing even though the actual type has
changed.  When we process precompiled object files, this isn't an
option.

Change union processing to recognize field name prefixes that allow
the user to ignore the union completely during symbol versioning with
a __kabi_ignored prefix in a field name, or to replace the type of a
placeholder field using a __kabi_reserved field name prefix.

For example, assume we want to add a new field to an existing
alignment hole in a data structure, and ignore the new field when
calculating symbol versions:

  struct struct1 {
    int a;
    /* a 4-byte alignment hole */
    unsigned long b;
  };

To add `int n` to the alignment hole, we can add a union that includes
a __kabi_ignored field that causes gendwarfksyms to ignore the entire
union:

  struct struct1 {
    int a;
    union {
      char __kabi_ignored_0;
      int n;
    };
    unsigned long b;
  };

With --stable, both structs produce the same symbol version.

Alternatively, when a distribution expects future modification to a
data structure, they can explicitly add reserved fields:

  struct struct2 {
    long a;
    long __kabi_reserved_0; /* reserved for future use */
  };

To take the field into use, we can again replace it with a union, with
one of the fields keeping the __kabi_reserved name prefix to indicate
the original type:

  struct struct2 {
    long a;
    union {
      long __kabi_reserved_0;
      struct {
          int b;
          int v;
      };
    };

Here gendwarfksyms --stable replaces the union with the type of the
placeholder field when calculating versions.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/gendwarfksyms/dwarf.c            | 248 ++++++++++++++++++++++-
 scripts/gendwarfksyms/examples/kabi.h    |  87 ++++++++
 scripts/gendwarfksyms/examples/kabi_ex.c |  16 ++
 scripts/gendwarfksyms/examples/kabi_ex.h | 199 ++++++++++++++++++
 scripts/gendwarfksyms/gendwarfksyms.h    |   9 +
 5 files changed, 558 insertions(+), 1 deletion(-)

diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index 17f7e6b9a7ff2..746a89d9e3d42 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -3,10 +3,33 @@
  * Copyright (C) 2024 Google LLC
  */
 
+#include <assert.h>
 #include <inttypes.h>
 #include <stdarg.h>
 #include "gendwarfksyms.h"
 
+/* See get_union_kabi_status */
+#define KABI_PREFIX "__kabi_"
+#define KABI_PREFIX_LEN (sizeof(KABI_PREFIX) - 1)
+#define KABI_RESERVED_PREFIX "reserved"
+#define KABI_RESERVED_PREFIX_LEN (sizeof(KABI_RESERVED_PREFIX) - 1)
+#define KABI_RENAMED_PREFIX "renamed"
+#define KABI_RENAMED_PREFIX_LEN (sizeof(KABI_RENAMED_PREFIX) - 1)
+#define KABI_IGNORED_PREFIX "ignored"
+#define KABI_IGNORED_PREFIX_LEN (sizeof(KABI_IGNORED_PREFIX) - 1)
+
+static inline bool is_kabi_prefix(const char *name)
+{
+	return name && !strncmp(name, KABI_PREFIX, KABI_PREFIX_LEN);
+}
+
+enum kabi_status {
+	/* >0 to stop DIE processing */
+	KABI_NORMAL = 1,
+	KABI_RESERVED,
+	KABI_IGNORED,
+};
+
 static bool do_linebreak;
 static int indentation_level;
 
@@ -353,13 +376,23 @@ static void __process_list_type(struct state *state, struct die *cache,
 {
 	const char *name = get_name_attr(die);
 
+	if (stable) {
+		if (is_kabi_prefix(name))
+			name = NULL;
+		state->kabi.orig_name = NULL;
+	}
+
 	process_list_comma(state, cache);
 	process(cache, type);
 	process_type_attr(state, cache, die);
+
+	if (stable && state->kabi.orig_name)
+		name = state->kabi.orig_name;
 	if (name) {
 		process(cache, " ");
 		process(cache, name);
 	}
+
 	process_accessibility_attr(cache, die);
 	process_bit_size_attr(cache, die);
 	process_data_bit_offset_attr(cache, die);
@@ -486,11 +519,208 @@ static void process_variant_part_type(struct state *state, struct die *cache,
 	process(cache, "}");
 }
 
+static int get_kabi_status(Dwarf_Die *die, const char **suffix)
+{
+	const char *name = get_name_attr(die);
+
+	if (suffix)
+		*suffix = NULL;
+
+	if (is_kabi_prefix(name)) {
+		name += KABI_PREFIX_LEN;
+
+		if (!strncmp(name, KABI_RESERVED_PREFIX,
+			     KABI_RESERVED_PREFIX_LEN))
+			return KABI_RESERVED;
+		if (!strncmp(name, KABI_IGNORED_PREFIX,
+			     KABI_IGNORED_PREFIX_LEN))
+			return KABI_IGNORED;
+
+		if (!strncmp(name, KABI_RENAMED_PREFIX,
+			     KABI_RENAMED_PREFIX_LEN)) {
+			if (suffix) {
+				name += KABI_RENAMED_PREFIX_LEN;
+				*suffix = name;
+			}
+			return KABI_RESERVED;
+		}
+	}
+
+	return KABI_NORMAL;
+}
+
+static int check_struct_member_kabi_status(struct state *state,
+					   struct die *__unused, Dwarf_Die *die)
+{
+	int res;
+
+	assert(dwarf_tag(die) == DW_TAG_member_type);
+
+	/*
+	 * If the union member is a struct, expect the __kabi field to
+	 * be the first member of the structure, i.e..:
+	 *
+	 * union {
+	 * 	type new_member;
+	 * 	struct {
+	 * 		type __kabi_field;
+	 * 	}
+	 * };
+	 */
+	res = get_kabi_status(die, &state->kabi.orig_name);
+
+	if (res == KABI_RESERVED &&
+	    !get_ref_die_attr(die, DW_AT_type, &state->kabi.placeholder))
+		error("structure member missing a type?");
+
+	return res;
+}
+
+static int check_union_member_kabi_status(struct state *state,
+					  struct die *__unused, Dwarf_Die *die)
+{
+	Dwarf_Die type;
+	int res;
+
+	assert(dwarf_tag(die) == DW_TAG_member_type);
+
+	if (!get_ref_die_attr(die, DW_AT_type, &type))
+		error("union member missing a type?");
+
+	/*
+	 * We expect a union with two members. Check if either of them
+	 * has a __kabi name prefix, i.e.:
+	 *
+	 * union {
+	 * 	...
+	 * 	type memberN; // <- type, N = {0,1}
+	 *	...
+	 * };
+	 *
+	 * The member can also be a structure type, in which case we'll
+	 * check the first structure member.
+	 *
+	 * In any case, stop processing after we've seen two members.
+	 */
+	res = get_kabi_status(die, &state->kabi.orig_name);
+
+	if (res == KABI_RESERVED)
+		state->kabi.placeholder = type;
+	if (res != KABI_NORMAL)
+		return res;
+
+	if (dwarf_tag(&type) == DW_TAG_structure_type)
+		res = checkp(process_die_container(
+			state, NULL, &type, check_struct_member_kabi_status,
+			match_member_type));
+
+	if (res <= KABI_NORMAL && ++state->kabi.members < 2)
+		return 0; /* Continue */
+
+	return res;
+}
+
+static int get_union_kabi_status(Dwarf_Die *die, Dwarf_Die *placeholder,
+				 const char **orig_name)
+{
+	struct state state;
+	int res;
+
+	if (!stable)
+		return KABI_NORMAL;
+
+	/*
+	 * To maintain a stable kABI, distributions may choose to reserve
+	 * space in structs for later use by adding placeholder members,
+	 * for example:
+	 *
+	 * struct s {
+	 * 	u32 a;
+	 *	// an 8-byte placeholder for future use
+	 * 	u64 __kabi_reserved_0;
+	 * };
+	 *
+	 * When the reserved member is taken into use, the type change
+	 * would normally cause the symbol version to change as well, but
+	 * if the replacement uses the following convention, gendwarfksyms
+	 * continues to use the placeholder type for versioning instead,
+	 * thus maintaining the same symbol version:
+	 *
+	 * struct s {
+	 * 	u32 a;
+	 *	union {
+	 * 		// placeholder replaced with a new member `b`
+	 * 		struct t b;
+	 * 		struct {
+	 * 			// the placeholder type that is still
+	 *			// used for versioning
+	 * 			u64 __kabi_reserved_0;
+	 * 		};
+	 * 	};
+	 * };
+	 *
+	 * I.e., as long as the replaced member is in a union, and the
+	 * placeholder has a __kabi_reserved name prefix, we'll continue
+	 * to use the placeholder type (here u64) for version calculation
+	 * instead of the union type.
+	 *
+	 * It's also possible to ignore new members from versioning if
+	 * they've been added to alignment holes, for example, by
+	 * including them in a union with another member that uses the
+	 * __kabi_ignored name prefix:
+	 *
+	 * struct s {
+	 * 	u32 a;
+	 *	// an alignment hole is used to add `n`
+	 * 	union {
+	 * 		u32 n;
+	 *		// hide the entire union member from versioning
+	 * 		u8 __kabi_ignored_0;
+	 * 	};
+	 * 	u64 b;
+	 * };
+	 *
+	 * Note that the user of this feature is responsible for ensuring
+	 * that the structure actually remains ABI compatible.
+	 */
+	memset(&state.kabi, 0, sizeof(struct kabi_state));
+
+	res = checkp(process_die_container(&state, NULL, die,
+					   check_union_member_kabi_status,
+					   match_member_type));
+
+	if (res == KABI_RESERVED) {
+		if (placeholder)
+			*placeholder = state.kabi.placeholder;
+		if (orig_name)
+			*orig_name = state.kabi.orig_name;
+	}
+
+	return res;
+}
+
+static bool is_kabi_ignored(Dwarf_Die *die)
+{
+	Dwarf_Die type;
+
+	if (!stable)
+		return false;
+
+	if (!get_ref_die_attr(die, DW_AT_type, &type))
+		error("member missing a type?");
+
+	return dwarf_tag(&type) == DW_TAG_union_type &&
+	       checkp(get_union_kabi_status(&type, NULL, NULL)) == KABI_IGNORED;
+}
+
 static int ___process_structure_type(struct state *state, struct die *cache,
 				     Dwarf_Die *die)
 {
 	switch (dwarf_tag(die)) {
 	case DW_TAG_member:
+		if (is_kabi_ignored(die))
+			return 0;
+		return check(process_type(state, cache, die));
 	case DW_TAG_variant_part:
 		return check(process_type(state, cache, die));
 	case DW_TAG_class_type:
@@ -547,7 +777,23 @@ static void __process_structure_type(struct state *state, struct die *cache,
 
 DEFINE_PROCESS_STRUCTURE_TYPE(class)
 DEFINE_PROCESS_STRUCTURE_TYPE(structure)
-DEFINE_PROCESS_STRUCTURE_TYPE(union)
+
+static void process_union_type(struct state *state, struct die *cache,
+			       Dwarf_Die *die)
+{
+	Dwarf_Die placeholder;
+
+	int res = checkp(get_union_kabi_status(die, &placeholder,
+					       &state->kabi.orig_name));
+
+	if (res == KABI_RESERVED)
+		check(process_type(state, cache, &placeholder));
+	if (res > KABI_NORMAL)
+		return;
+
+	__process_structure_type(state, cache, die, "union_type",
+				 ___process_structure_type, match_all);
+}
 
 static void process_enumerator_type(struct state *state, struct die *cache,
 				    Dwarf_Die *die)
diff --git a/scripts/gendwarfksyms/examples/kabi.h b/scripts/gendwarfksyms/examples/kabi.h
index fcd0300e5b58f..97a5669b083d7 100644
--- a/scripts/gendwarfksyms/examples/kabi.h
+++ b/scripts/gendwarfksyms/examples/kabi.h
@@ -43,6 +43,28 @@
 		__section(".discard.gendwarfksyms.kabi_rules") =     \
 			"1\0" #hint "\0" #target "\0" #value
 
+#define __KABI_NORMAL_SIZE_ALIGN(_orig, _new)                                             \
+	union {                                                                           \
+		_Static_assert(                                                           \
+			sizeof(struct { _new; }) <= sizeof(struct { _orig; }),            \
+			__FILE__ ":" __stringify(__LINE__) ": " __stringify(              \
+				_new) " is larger than " __stringify(_orig));             \
+		_Static_assert(                                                           \
+			__alignof__(struct { _new; }) <=                                  \
+				__alignof__(struct { _orig; }),                           \
+			__FILE__ ":" __stringify(__LINE__) ": " __stringify(              \
+				_orig) " is not aligned the same as " __stringify(_new)); \
+	}
+
+#define __KABI_REPLACE(_orig, _new)                    \
+	union {                                        \
+		_new;                                  \
+		struct {                               \
+			_orig;                         \
+		};                                     \
+		__KABI_NORMAL_SIZE_ALIGN(_orig, _new); \
+	}
+
 /*
  * KABI_DECLONLY(fqn)
  *   Treat the struct/union/enum fqn as a declaration, i.e. even if
@@ -67,4 +89,69 @@
 #define KABI_ENUMERATOR_VALUE(fqn, field, value) \
 	__KABI_RULE(enumerator_value, fqn field, value)
 
+/*
+ * KABI_RESERVE
+ *   Reserve some "padding" in a structure for use by LTS backports.
+ *   This is normally placed at the end of a structure.
+ *   number: the "number" of the padding variable in the structure.  Start with
+ *   1 and go up.
+ */
+#define KABI_RESERVE(n) unsigned long __kabi_reserved##n
+
+/*
+ * KABI_RESERVE_ARRAY
+ *   Same as _BACKPORT_RESERVE but allocates an array with the specified
+ *   size in bytes.
+ */
+#define KABI_RESERVE_ARRAY(n, s) \
+	unsigned char __aligned(8) __kabi_reserved##n[s]
+
+/*
+ * KABI_IGNORE
+ *   Add a new field that's ignored in versioning.
+ */
+#define KABI_IGNORE(n, _new)                     \
+	union {                                  \
+		_new;                            \
+		unsigned char __kabi_ignored##n; \
+	}
+
+/*
+ * KABI_REPLACE
+ *   Replace a field with a compatible new field.
+ */
+#define KABI_REPLACE(_oldtype, _oldname, _new) \
+	__KABI_REPLACE(_oldtype __kabi_renamed##_oldname, struct { _new; })
+
+/*
+ * KABI_USE(number, _new)
+ *   Use a previous padding entry that was defined with KABI_RESERVE
+ *   number: the previous "number" of the padding variable
+ *   _new: the variable to use now instead of the padding variable
+ */
+#define KABI_USE(number, _new) __KABI_REPLACE(KABI_RESERVE(number), _new)
+
+/*
+ * KABI_USE2(number, _new1, _new2)
+ *   Use a previous padding entry that was defined with KABI_RESERVE for
+ *   two new variables that fit into 64 bits.  This is good for when you do not
+ *   want to "burn" a 64bit padding variable for a smaller variable size if not
+ *   needed.
+ */
+#define KABI_USE2(number, _new1, _new2)        \
+	__KABI_REPLACE(                        \
+		KABI_RESERVE(number), struct { \
+			_new1;                 \
+			_new2;                 \
+		})
+/*
+ * KABI_USE_ARRAY(number, bytes, _new)
+ *   Use a previous padding entry that was defined with KABI_RESERVE_ARRAY
+ *   number: the previous "number" of the padding variable
+ *   bytes: the size in bytes reserved for the array
+ *   _new: the variable to use now instead of the padding variable
+ */
+#define KABI_USE_ARRAY(number, bytes, _new) \
+	__KABI_REPLACE(KABI_RESERVE_ARRAY(number, bytes), _new)
+
 #endif /* __KABI_H__ */
diff --git a/scripts/gendwarfksyms/examples/kabi_ex.c b/scripts/gendwarfksyms/examples/kabi_ex.c
index 799552ea6679b..0b7ffd830541d 100644
--- a/scripts/gendwarfksyms/examples/kabi_ex.c
+++ b/scripts/gendwarfksyms/examples/kabi_ex.c
@@ -12,3 +12,19 @@
 
 struct s e0;
 enum e e1;
+
+struct ex0a ex0a;
+struct ex0b ex0b;
+struct ex0c ex0c;
+
+struct ex1a ex1a;
+struct ex1b ex1b;
+struct ex1c ex1c;
+
+struct ex2a ex2a;
+struct ex2b ex2b;
+struct ex2c ex2c;
+
+struct ex3a ex3a;
+struct ex3b ex3b;
+struct ex3c ex3c;
diff --git a/scripts/gendwarfksyms/examples/kabi_ex.h b/scripts/gendwarfksyms/examples/kabi_ex.h
index fca1e07c78e2b..1736e0f652081 100644
--- a/scripts/gendwarfksyms/examples/kabi_ex.h
+++ b/scripts/gendwarfksyms/examples/kabi_ex.h
@@ -59,6 +59,205 @@ KABI_ENUMERATOR_VALUE(e, D, 123456789);
  * STABLE-NEXT:   enumerator A = 0 ,
  * STABLE-NEXT:   enumerator D = 123456789
  * STABLE-NEXT: } byte_size(4)
+*/
+
+/*
+ * Example: Reserved fields
+ */
+struct ex0a {
+	int a;
+	KABI_RESERVE(0);
+	KABI_RESERVE(1);
+};
+
+/*
+ * STABLE:      variable structure_type ex0a {
+ * STABLE-NEXT:   member base_type int byte_size(4) encoding(5) a data_member_location(0) ,
+ * STABLE-NEXT:   member base_type [[ULONG:long unsigned int|unsigned long]] byte_size(8) encoding(7) data_member_location(8) ,
+ * STABLE-NEXT:   member base_type [[ULONG]] byte_size(8) encoding(7) data_member_location(16)
+ * STABLE-NEXT: } byte_size(24)
+ */
+
+struct ex0b {
+	int a;
+	KABI_RESERVE(0);
+	KABI_USE2(1, int b, int c);
+};
+
+/*
+ * STABLE:      variable structure_type ex0b {
+ * STABLE-NEXT:   member base_type int byte_size(4) encoding(5) a data_member_location(0) ,
+ * STABLE-NEXT:   member base_type [[ULONG]] byte_size(8) encoding(7) data_member_location(8) ,
+ * STABLE-NEXT:   member base_type [[ULONG]] byte_size(8) encoding(7) data_member_location(16)
+ * STABLE-NEXT: } byte_size(24)
+ */
+
+struct ex0c {
+	int a;
+	KABI_USE(0, void *p);
+	KABI_USE2(1, int b, int c);
+};
+
+/*
+ * STABLE:      variable structure_type ex0c {
+ * STABLE-NEXT:   member base_type int byte_size(4) encoding(5) a data_member_location(0) ,
+ * STABLE-NEXT:   member base_type [[ULONG]] byte_size(8) encoding(7) data_member_location(8) ,
+ * STABLE-NEXT:   member base_type [[ULONG]] byte_size(8) encoding(7) data_member_location(16)
+ * STABLE-NEXT: } byte_size(24)
+ */
+
+/*
+ * Example: A reserved array
+ */
+
+struct ex1a {
+	unsigned int a;
+	KABI_RESERVE_ARRAY(0, 64);
+};
+
+/*
+ * STABLE:      variable structure_type ex1a {
+ * STABLE-NEXT:   member base_type unsigned int byte_size(4) encoding(7) a data_member_location(0) ,
+ * STABLE-NEXT:   member array_type[64] {
+ * STABLE-NEXT:     base_type unsigned char byte_size(1) encoding(8)
+ * STABLE-NEXT:   } data_member_location(8)
+ * STABLE-NEXT: } byte_size(72)
+ */
+
+struct ex1b {
+	unsigned int a;
+	KABI_USE_ARRAY(
+		0, 64, struct {
+			void *p;
+			KABI_RESERVE_ARRAY(1, 56);
+		});
+};
+
+/*
+ * STABLE:      variable structure_type ex1b {
+ * STABLE-NEXT:   member base_type unsigned int byte_size(4) encoding(7) a data_member_location(0) ,
+ * STABLE-NEXT:   member array_type[64] {
+ * STABLE-NEXT:     base_type unsigned char byte_size(1) encoding(8)
+ * STABLE-NEXT:   } data_member_location(8)
+ * STABLE-NEXT: } byte_size(72)
+ */
+
+struct ex1c {
+	unsigned int a;
+	KABI_USE_ARRAY(0, 64, void *p[8]);
+};
+
+/*
+ * STABLE:      variable structure_type ex1c {
+ * STABLE-NEXT:   member base_type unsigned int byte_size(4) encoding(7) a data_member_location(0) ,
+ * STABLE-NEXT:   member array_type[64] {
+ * STABLE-NEXT:     base_type unsigned char byte_size(1) encoding(8)
+ * STABLE-NEXT:   } data_member_location(8)
+ * STABLE-NEXT: } byte_size(72)
+ */
+
+/*
+ * Example: An ignored field added to an alignment hole
+ */
+
+struct ex2a {
+	int a;
+	unsigned long b;
+	int c;
+	unsigned long d;
+};
+
+/*
+ * STABLE:      variable structure_type ex2a {
+ * STABLE-NEXT:   member base_type int byte_size(4) encoding(5) a data_member_location(0) ,
+ * STABLE-NEXT:   member base_type [[ULONG:long unsigned int|unsigned long]] byte_size(8) encoding(7) b data_member_location(8)
+ * STABLE-NEXT:   member base_type int byte_size(4) encoding(5) c data_member_location(16) ,
+ * STABLE-NEXT:   member base_type [[ULONG]] byte_size(8) encoding(7) d data_member_location(24)
+ * STABLE-NEXT: } byte_size(32)
+ */
+
+struct ex2b {
+	int a;
+	KABI_IGNORE(0, unsigned int n);
+	unsigned long b;
+	int c;
+	unsigned long d;
+};
+
+_Static_assert(sizeof(struct ex2a) == sizeof(struct ex2b), "ex2a size doesn't match ex2b");
+
+/*
+ * STABLE:      variable structure_type ex2b {
+ * STABLE-NEXT:   member base_type int byte_size(4) encoding(5) a data_member_location(0) ,
+ * STABLE-NEXT:   member base_type [[ULONG]] byte_size(8) encoding(7) b data_member_location(8)
+ * STABLE-NEXT:   member base_type int byte_size(4) encoding(5) c data_member_location(16) ,
+ * STABLE-NEXT:   member base_type [[ULONG]] byte_size(8) encoding(7) d data_member_location(24)
+ * STABLE-NEXT: } byte_size(32)
+ */
+
+struct ex2c {
+	int a;
+	KABI_IGNORE(0, unsigned int n);
+	unsigned long b;
+	int c;
+	KABI_IGNORE(1, unsigned int m);
+	unsigned long d;
+};
+
+_Static_assert(sizeof(struct ex2a) == sizeof(struct ex2c), "ex2a size doesn't match ex2c");
+
+/*
+ * STABLE:      variable structure_type ex2c {
+ * STABLE-NEXT:   member base_type int byte_size(4) encoding(5) a data_member_location(0) ,
+ * STABLE-NEXT:   member base_type [[ULONG]] byte_size(8) encoding(7) b data_member_location(8)
+ * STABLE-NEXT:   member base_type int byte_size(4) encoding(5) c data_member_location(16) ,
+ * STABLE-NEXT:   member base_type [[ULONG]] byte_size(8) encoding(7) d data_member_location(24)
+ * STABLE-NEXT: } byte_size(32)
+ */
+
+
+/*
+ * Example: A replaced field
+ */
+
+struct ex3a {
+	unsigned long a;
+	unsigned long unused;
+};
+
+/*
+ * STABLE:      variable structure_type ex3a {
+ * STABLE-NEXT:   member base_type [[ULONG:long unsigned int|unsigned long]] byte_size(8) encoding(7) a data_member_location(0)
+ * STABLE-NEXT:   member base_type [[ULONG]] byte_size(8) encoding(7) unused data_member_location(8)
+ * STABLE-NEXT: } byte_size(16)
+ */
+
+struct ex3b {
+	unsigned long a;
+	KABI_REPLACE(unsigned long, unused, unsigned long renamed);
+};
+
+_Static_assert(sizeof(struct ex3a) == sizeof(struct ex3b), "ex3a size doesn't match ex3b");
+
+/*
+ * STABLE:      variable structure_type ex3b {
+ * STABLE-NEXT:   member base_type [[ULONG]] byte_size(8) encoding(7) a data_member_location(0)
+ * STABLE-NEXT:   member base_type [[ULONG]] byte_size(8) encoding(7) unused data_member_location(8)
+ * STABLE-NEXT: } byte_size(16)
+ */
+
+struct ex3c {
+	unsigned long a;
+	KABI_REPLACE(unsigned long, unused, long replaced);
+};
+
+_Static_assert(sizeof(struct ex3a) == sizeof(struct ex3c), "ex3a size doesn't match ex3c");
+
+/*
+ * STABLE:      variable structure_type ex3c {
+ * STABLE-NEXT:   member base_type [[ULONG]] byte_size(8) encoding(7) a data_member_location(0)
+ * STABLE-NEXT:   member base_type [[ULONG]] byte_size(8) encoding(7) unused data_member_location(8)
+ * STABLE-NEXT: } byte_size(16)
  */
 
 #endif /* __KABI_EX_H__ */
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
index c0207ca10e198..fe49730fe623e 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -236,6 +236,12 @@ struct expansion_state {
 	const char *current_fqn;
 };
 
+struct kabi_state {
+	int members;
+	Dwarf_Die placeholder;
+	const char *orig_name;
+};
+
 struct state {
 	struct symbol *sym;
 	Dwarf_Die die;
@@ -246,6 +252,9 @@ struct state {
 	/* Structure expansion */
 	struct expansion_state expand;
 	struct cache expansion_cache;
+
+	/* Reserved or ignored members */
+	struct kabi_state kabi;
 };
 
 typedef int (*die_callback_t)(struct state *state, struct die *cache,

From fa624569b70d8015775592ae7e2c514009367541 Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Fri, 3 Jan 2025 20:45:37 +0000
Subject: [PATCH 067/368] gendwarfksyms: Add support for symbol type pointers

The compiler may choose not to emit type information in DWARF for
external symbols. Clang, for example, does this for symbols not
defined in the current TU.

To provide a way to work around this issue, add support for
__gendwarfksyms_ptr_<symbol> pointers that force the compiler to emit
the necessary type information in DWARF also for the missing symbols.

Example usage:

  #define GENDWARFKSYMS_PTR(sym) \
      static typeof(sym) *__gendwarfksyms_ptr_##sym __used  \
          __section(".discard.gendwarfksyms") = &sym;

  extern int external_symbol(void);
  GENDWARFKSYMS_PTR(external_symbol);

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/gendwarfksyms/dwarf.c              | 55 +++++++++++++++++++++-
 scripts/gendwarfksyms/examples/symbolptr.c | 33 +++++++++++++
 scripts/gendwarfksyms/gendwarfksyms.h      |  7 +++
 scripts/gendwarfksyms/symbols.c            | 27 +++++++++++
 4 files changed, 121 insertions(+), 1 deletion(-)
 create mode 100644 scripts/gendwarfksyms/examples/symbolptr.c

diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index 746a89d9e3d42..534d9aa7c1147 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -1061,6 +1061,31 @@ static void process_variable(struct state *state, Dwarf_Die *die)
 	process_symbol(state, die, __process_variable);
 }
 
+static void save_symbol_ptr(struct state *state)
+{
+	Dwarf_Die ptr_type;
+	Dwarf_Die type;
+
+	if (!get_ref_die_attr(&state->die, DW_AT_type, &ptr_type) ||
+	    dwarf_tag(&ptr_type) != DW_TAG_pointer_type)
+		error("%s must be a pointer type!",
+		      get_symbol_name(&state->die));
+
+	if (!get_ref_die_attr(&ptr_type, DW_AT_type, &type))
+		error("%s pointer missing a type attribute?",
+		      get_symbol_name(&state->die));
+
+	/*
+	 * Save the symbol pointer DIE in case the actual symbol is
+	 * missing from the DWARF. Clang, for example, intentionally
+	 * omits external symbols from the debugging information.
+	 */
+	if (dwarf_tag(&type) == DW_TAG_subroutine_type)
+		symbol_set_ptr(state->sym, &type);
+	else
+		symbol_set_ptr(state->sym, &ptr_type);
+}
+
 static int process_exported_symbols(struct state *unused, struct die *cache,
 				    Dwarf_Die *die)
 {
@@ -1084,7 +1109,9 @@ static int process_exported_symbols(struct state *unused, struct die *cache,
 
 		state_init(&state);
 
-		if (tag == DW_TAG_subprogram)
+		if (is_symbol_ptr(get_symbol_name(&state.die)))
+			save_symbol_ptr(&state);
+		else if (tag == DW_TAG_subprogram)
 			process_subprogram(&state, &state.die);
 		else
 			process_variable(&state, &state.die);
@@ -1097,10 +1124,36 @@ static int process_exported_symbols(struct state *unused, struct die *cache,
 	}
 }
 
+static void process_symbol_ptr(struct symbol *sym, void *arg)
+{
+	struct state state;
+	Dwarf *dwarf = arg;
+
+	if (sym->state != SYMBOL_UNPROCESSED || !sym->ptr_die_addr)
+		return;
+
+	debug("%s", sym->name);
+	state_init(&state);
+	state.sym = sym;
+
+	if (!dwarf_die_addr_die(dwarf, (void *)sym->ptr_die_addr, &state.die))
+		error("dwarf_die_addr_die failed for symbol ptr: '%s'",
+		      sym->name);
+
+	if (dwarf_tag(&state.die) == DW_TAG_subroutine_type)
+		process_subprogram(&state, &state.die);
+	else
+		process_variable(&state, &state.die);
+
+	cache_free(&state.expansion_cache);
+}
+
 void process_cu(Dwarf_Die *cudie)
 {
 	check(process_die_container(NULL, NULL, cudie, process_exported_symbols,
 				    match_all));
 
+	symbol_for_each(process_symbol_ptr, dwarf_cu_getdwarf(cudie->cu));
+
 	cache_free(&srcfile_cache);
 }
diff --git a/scripts/gendwarfksyms/examples/symbolptr.c b/scripts/gendwarfksyms/examples/symbolptr.c
new file mode 100644
index 0000000000000..88bc1bd60da86
--- /dev/null
+++ b/scripts/gendwarfksyms/examples/symbolptr.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Example for symbol pointers. When compiled with Clang, gendwarfkyms
+ * uses a symbol pointer for `f`.
+ *
+ * $ clang -g -c examples/symbolptr.c -o examples/symbolptr.o
+ * $ echo -e "f\ng\np" | ./gendwarfksyms -d examples/symbolptr.o
+ */
+
+/* Kernel macros for userspace testing. */
+#ifndef __used
+#define __used __attribute__((__used__))
+#endif
+#ifndef __section
+#define __section(section) __attribute__((__section__(section)))
+#endif
+
+#define __GENDWARFKSYMS_EXPORT(sym)				\
+	static typeof(sym) *__gendwarfksyms_ptr_##sym __used	\
+		__section(".discard.gendwarfksyms") = &sym;
+
+extern void f(unsigned int arg);
+void g(int *arg);
+void g(int *arg) {}
+
+struct s;
+extern struct s *p;
+
+__GENDWARFKSYMS_EXPORT(f);
+__GENDWARFKSYMS_EXPORT(g);
+__GENDWARFKSYMS_EXPORT(p);
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
index fe49730fe623e..197a1a8123c6c 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -89,6 +89,10 @@ extern int symtypes;
  * symbols.c
  */
 
+/* See symbols.c:is_symbol_ptr */
+#define SYMBOL_PTR_PREFIX "__gendwarfksyms_ptr_"
+#define SYMBOL_PTR_PREFIX_LEN (sizeof(SYMBOL_PTR_PREFIX) - 1)
+
 static inline unsigned int addr_hash(uintptr_t addr)
 {
 	return hash_ptr((const void *)addr);
@@ -112,14 +116,17 @@ struct symbol {
 	struct hlist_node name_hash;
 	enum symbol_state state;
 	uintptr_t die_addr;
+	uintptr_t ptr_die_addr;
 	unsigned long crc;
 };
 
 typedef void (*symbol_callback_t)(struct symbol *, void *arg);
 
+bool is_symbol_ptr(const char *name);
 void symbol_read_exports(FILE *file);
 void symbol_read_symtab(int fd);
 struct symbol *symbol_get(const char *name);
+void symbol_set_ptr(struct symbol *sym, Dwarf_Die *ptr);
 void symbol_set_die(struct symbol *sym, Dwarf_Die *die);
 void symbol_set_crc(struct symbol *sym, unsigned long crc);
 void symbol_for_each(symbol_callback_t func, void *arg);
diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c
index 4c499ba6c86de..327f87389c343 100644
--- a/scripts/gendwarfksyms/symbols.c
+++ b/scripts/gendwarfksyms/symbols.c
@@ -39,6 +39,20 @@ static unsigned int __for_each_addr(struct symbol *sym, symbol_callback_t func,
 	return processed;
 }
 
+/*
+ * For symbols without debugging information (e.g. symbols defined in other
+ * TUs), we also match __gendwarfksyms_ptr_<symbol_name> symbols, which the
+ * kernel uses to ensure type information is present in the TU that exports
+ * the symbol. A __gendwarfksyms_ptr pointer must have the same type as the
+ * exported symbol, e.g.:
+ *
+ *   typeof(symname) *__gendwarf_ptr_symname = &symname;
+ */
+bool is_symbol_ptr(const char *name)
+{
+	return name && !strncmp(name, SYMBOL_PTR_PREFIX, SYMBOL_PTR_PREFIX_LEN);
+}
+
 static unsigned int for_each(const char *name, symbol_callback_t func,
 			     void *data)
 {
@@ -47,6 +61,8 @@ static unsigned int for_each(const char *name, symbol_callback_t func,
 
 	if (!name || !*name)
 		return 0;
+	if (is_symbol_ptr(name))
+		name += SYMBOL_PTR_PREFIX_LEN;
 
 	hash_for_each_possible_safe(symbol_names, match, tmp, name_hash,
 				    hash_str(name)) {
@@ -84,6 +100,17 @@ void symbol_set_crc(struct symbol *sym, unsigned long crc)
 		error("no matching symbols: '%s'", sym->name);
 }
 
+static void set_ptr(struct symbol *sym, void *data)
+{
+	sym->ptr_die_addr = (uintptr_t)((Dwarf_Die *)data)->addr;
+}
+
+void symbol_set_ptr(struct symbol *sym, Dwarf_Die *ptr)
+{
+	if (for_each(sym->name, set_ptr, ptr) == 0)
+		error("no matching symbols: '%s'", sym->name);
+}
+
 static void set_die(struct symbol *sym, void *data)
 {
 	sym->die_addr = (uintptr_t)((Dwarf_Die *)data)->addr;

From d7476f24c9aa93d02ef3fd8d587a6114387b7667 Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Fri, 3 Jan 2025 20:45:38 +0000
Subject: [PATCH 068/368] export: Add __gendwarfksyms_ptr_ references to
 exported symbols

With gendwarfksyms, we need each TU where the EXPORT_SYMBOL() macro
is used to also contain DWARF type information for the symbols it
exports.  However, as a TU can also export external symbols and
compilers may choose not to emit debugging information for symbols not
defined in the current TU, the missing types will result in missing
symbol versions. Stand-alone assembly code also doesn't contain type
information for exported symbols, so we need to compile a temporary
object file with asm-prototypes.h instead, and similarly need to
ensure the DWARF in the temporary object file contains the necessary
types.

To always emit type information for external exports, add explicit
__gendwarfksyms_ptr_<symbol> references to them in EXPORT_SYMBOL().
gendwarfksyms will use the type information for __gendwarfksyms_ptr_*
if needed. Discard the pointers from the final binary to avoid further
bloat.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 include/linux/export.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/include/linux/export.h b/include/linux/export.h
index 2633df4d31e62..a8c23d945634b 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -52,9 +52,24 @@
 
 #else
 
+#ifdef CONFIG_GENDWARFKSYMS
+/*
+ * With CONFIG_GENDWARFKSYMS, ensure the compiler emits debugging
+ * information for all exported symbols, including those defined in
+ * different TUs, by adding a __gendwarfksyms_ptr_<symbol> pointer
+ * that's discarded during the final link.
+ */
+#define __GENDWARFKSYMS_EXPORT(sym)				\
+	static typeof(sym) *__gendwarfksyms_ptr_##sym __used	\
+		__section(".discard.gendwarfksyms") = &sym;
+#else
+#define __GENDWARFKSYMS_EXPORT(sym)
+#endif
+
 #define __EXPORT_SYMBOL(sym, license, ns)			\
 	extern typeof(sym) sym;					\
 	__ADDRESSABLE(sym)					\
+	__GENDWARFKSYMS_EXPORT(sym)				\
 	asm(__stringify(___EXPORT_SYMBOL(sym, license, ns)))
 
 #endif

From 9c3681f9b9fd12cdbc4a542df599f1837512f3d5 Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Fri, 3 Jan 2025 20:45:39 +0000
Subject: [PATCH 069/368] kbuild: Add gendwarfksyms as an alternative to
 genksyms

When MODVERSIONS is enabled, allow selecting gendwarfksyms as the
implementation, but default to genksyms.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 kernel/module/Kconfig  | 22 ++++++++++++++++++++++
 scripts/Makefile       |  2 +-
 scripts/Makefile.build | 35 +++++++++++++++++++++++++++++------
 3 files changed, 52 insertions(+), 7 deletions(-)

diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig
index 4637f063d0fcb..d443fc504ffca 100644
--- a/kernel/module/Kconfig
+++ b/kernel/module/Kconfig
@@ -169,6 +169,22 @@ config MODVERSIONS
 	  make them incompatible with the kernel you are running.  If
 	  unsure, say N.
 
+choice
+	prompt "Module versioning implementation"
+	depends on MODVERSIONS
+	help
+	  Select the tool used to calculate symbol versions for modules.
+
+	  If unsure, select GENKSYMS.
+
+config GENKSYMS
+	bool "genksyms (from source code)"
+	help
+	  Calculate symbol versions from pre-processed source code using
+	  genksyms.
+
+	  If unsure, say Y.
+
 config GENDWARFKSYMS
 	bool "gendwarfksyms (from debugging information)"
 	depends on DEBUG_INFO
@@ -176,6 +192,12 @@ config GENDWARFKSYMS
 	depends on !DEBUG_INFO_REDUCED && !DEBUG_INFO_SPLIT
 	# Requires ELF object files.
 	depends on !LTO
+	help
+	  Calculate symbol versions from DWARF debugging information using
+	  gendwarfksyms. Requires DEBUG_INFO to be enabled.
+
+	  If unsure, say N.
+endchoice
 
 config ASM_MODVERSIONS
 	bool
diff --git a/scripts/Makefile b/scripts/Makefile
index d7fec46d38c00..8533f4498885e 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -53,7 +53,7 @@ hostprogs += unifdef
 targets += module.lds
 
 subdir-$(CONFIG_GCC_PLUGINS) += gcc-plugins
-subdir-$(CONFIG_MODVERSIONS) += genksyms
+subdir-$(CONFIG_GENKSYMS) += genksyms
 subdir-$(CONFIG_GENDWARFKSYMS) += gendwarfksyms
 subdir-$(CONFIG_SECURITY_SELINUX) += selinux
 subdir-$(CONFIG_SECURITY_IPE) += ipe
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index c16e4cf54d770..81d9dacad03c7 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -107,13 +107,24 @@ cmd_cpp_i_c       = $(CPP) $(c_flags) -o $@ $<
 $(obj)/%.i: $(obj)/%.c FORCE
 	$(call if_changed_dep,cpp_i_c)
 
+getexportsymbols = $(NM) $@ | sed -n 's/.* __export_symbol_\(.*\)/$(1)/p'
+
+gendwarfksyms = $(objtree)/scripts/gendwarfksyms/gendwarfksyms	\
+	$(if $(KBUILD_SYMTYPES), --symtypes $(@:.o=.symtypes))	\
+	$(if $(KBUILD_GENDWARFKSYMS_STABLE), --stable)
+
 genksyms = $(objtree)/scripts/genksyms/genksyms		\
 	$(if $(KBUILD_SYMTYPES), -T $(@:.o=.symtypes))	\
 	$(if $(KBUILD_PRESERVE), -p)			\
 	$(addprefix -r , $(wildcard $(@:.o=.symref)))
 
 # These mirror gensymtypes_S and co below, keep them in synch.
+ifdef CONFIG_GENDWARFKSYMS
+cmd_gensymtypes_c = $(if $(skip_gendwarfksyms),,	\
+	$(call getexportsymbols,\1) | $(gendwarfksyms) $@)
+else
 cmd_gensymtypes_c = $(CPP) -D__GENKSYMS__ $(c_flags) $< | $(genksyms)
+endif # CONFIG_GENDWARFKSYMS
 
 # LLVM assembly
 # Generate .ll files from .c
@@ -286,14 +297,26 @@ $(obj)/%.rs: $(obj)/%.rs.S FORCE
 # This is convoluted. The .S file must first be preprocessed to run guards and
 # expand names, then the resulting exports must be constructed into plain
 # EXPORT_SYMBOL(symbol); to build our dummy C file, and that gets preprocessed
-# to make the genksyms input.
+# to make the genksyms input or compiled into an object for gendwarfksyms.
 #
 # These mirror gensymtypes_c and co above, keep them in synch.
-cmd_gensymtypes_S =                                                         \
-   { echo "\#include <linux/kernel.h>" ;                                    \
-     echo "\#include <asm/asm-prototypes.h>" ;                              \
-     $(NM) $@ | sed -n 's/.* __export_symbol_\(.*\)/EXPORT_SYMBOL(\1);/p' ; } | \
-    $(CPP) -D__GENKSYMS__ $(c_flags) -xc - | $(genksyms)
+getasmexports =								\
+   { echo "\#include <linux/kernel.h>" ;				\
+     echo "\#include <linux/string.h>" ;				\
+     echo "\#include <asm/asm-prototypes.h>" ;				\
+     $(call getexportsymbols,EXPORT_SYMBOL(\1);) ; }
+
+ifdef CONFIG_GENDWARFKSYMS
+cmd_gensymtypes_S =							\
+	$(getasmexports) |						\
+	$(CC) $(c_flags) -c -o $(@:.o=.gendwarfksyms.o) -xc -;		\
+	$(call getexportsymbols,\1) |					\
+	$(gendwarfksyms) $(@:.o=.gendwarfksyms.o)
+else
+cmd_gensymtypes_S =							\
+	$(getasmexports) |						\
+	$(CPP) -D__GENKSYMS__ $(c_flags) -xc - | $(genksyms)
+endif # CONFIG_GENDWARFKSYMS
 
 quiet_cmd_cpp_s_S = CPP $(quiet_modtag) $@
 cmd_cpp_s_S       = $(CPP) $(a_flags) -o $@ $<

From 8c6d7b417f0fe69d7e29501db801838a54c6764b Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Fri, 3 Jan 2025 20:45:40 +0000
Subject: [PATCH 070/368] Documentation/kbuild: Add DWARF module versioning

Add documentation for gendwarfksyms changes, and the kABI stability
features that can be useful for distributions even though they're not
used in mainline kernels.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 Documentation/kbuild/gendwarfksyms.rst | 308 +++++++++++++++++++++++++
 Documentation/kbuild/index.rst         |   1 +
 2 files changed, 309 insertions(+)
 create mode 100644 Documentation/kbuild/gendwarfksyms.rst

diff --git a/Documentation/kbuild/gendwarfksyms.rst b/Documentation/kbuild/gendwarfksyms.rst
new file mode 100644
index 0000000000000..e4beaae7e456c
--- /dev/null
+++ b/Documentation/kbuild/gendwarfksyms.rst
@@ -0,0 +1,308 @@
+=======================
+DWARF module versioning
+=======================
+
+1. Introduction
+===============
+
+When CONFIG_MODVERSIONS is enabled, symbol versions for modules
+are typically calculated from preprocessed source code using the
+**genksyms** tool.  However, this is incompatible with languages such
+as Rust, where the source code has insufficient information about
+the resulting ABI. With CONFIG_GENDWARFKSYMS (and CONFIG_DEBUG_INFO)
+selected, **gendwarfksyms** is used instead to calculate symbol versions
+from the DWARF debugging information, which contains the necessary
+details about the final module ABI.
+
+1.1. Usage
+==========
+
+gendwarfksyms accepts a list of object files on the command line, and a
+list of symbol names (one per line) in standard input::
+
+        Usage: gendwarfksyms [options] elf-object-file ... < symbol-list
+
+        Options:
+          -d, --debug          Print debugging information
+              --dump-dies      Dump DWARF DIE contents
+              --dump-die-map   Print debugging information about die_map changes
+              --dump-types     Dump type strings
+              --dump-versions  Dump expanded type strings used for symbol versions
+          -s, --stable         Support kABI stability features
+          -T, --symtypes file  Write a symtypes file
+          -h, --help           Print this message
+
+
+2. Type information availability
+================================
+
+While symbols are typically exported in the same translation unit (TU)
+where they're defined, it's also perfectly fine for a TU to export
+external symbols. For example, this is done when calculating symbol
+versions for exports in stand-alone assembly code.
+
+To ensure the compiler emits the necessary DWARF type information in the
+TU where symbols are actually exported, gendwarfksyms adds a pointer
+to exported symbols in the `EXPORT_SYMBOL()` macro using the following
+macro::
+
+        #define __GENDWARFKSYMS_EXPORT(sym)                             \
+                static typeof(sym) *__gendwarfksyms_ptr_##sym __used    \
+                        __section(".discard.gendwarfksyms") = &sym;
+
+
+When a symbol pointer is found in DWARF, gendwarfksyms can use its
+type for calculating symbol versions even if the symbol is defined
+elsewhere. The name of the symbol pointer is expected to start with
+`__gendwarfksyms_ptr_`, followed by the name of the exported symbol.
+
+3. Symtypes output format
+=========================
+
+Similarly to genksyms, gendwarfksyms supports writing a symtypes
+file for each processed object that contain types for exported
+symbols and each referenced type that was used in calculating symbol
+versions. These files can be useful when trying to determine what
+exactly caused symbol versions to change between builds. To generate
+symtypes files during a kernel build, set `KBUILD_SYMTYPES=1`.
+
+Matching the existing format, the first column of each line contains
+either a type reference or a symbol name. Type references have a
+one-letter prefix followed by "#" and the name of the type. Four
+reference types are supported::
+
+        e#<type> = enum
+        s#<type> = struct
+        t#<type> = typedef
+        u#<type> = union
+
+Type names with spaces in them are wrapped in single quotes, e.g.::
+
+        s#'core::result::Result<u8, core::num::error::ParseIntError>'
+
+The rest of the line contains a type string. Unlike with genksyms that
+produces C-style type strings, gendwarfksyms uses the same simple parsed
+DWARF format produced by **--dump-dies**, but with type references
+instead of fully expanded strings.
+
+4. Maintaining a stable kABI
+============================
+
+Distribution maintainers often need the ability to make ABI compatible
+changes to kernel data structures due to LTS updates or backports. Using
+the traditional `#ifndef __GENKSYMS__` to hide these changes from symbol
+versioning won't work when processing object files. To support this
+use case, gendwarfksyms provides kABI stability features designed to
+hide changes that won't affect the ABI when calculating versions. These
+features are all gated behind the **--stable** command line flag and are
+not used in the mainline kernel. To use stable features during a kernel
+build, set `KBUILD_GENDWARFKSYMS_STABLE=1`.
+
+Examples for using these features are provided in the
+**scripts/gendwarfksyms/examples** directory, including helper macros
+for source code annotation. Note that as these features are only used to
+transform the inputs for symbol versioning, the user is responsible for
+ensuring that their changes actually won't break the ABI.
+
+4.1. kABI rules
+===============
+
+kABI rules allow distributions to fine-tune certain parts
+of gendwarfksyms output and thus control how symbol
+versions are calculated. These rules are defined in the
+`.discard.gendwarfksyms.kabi_rules` section of the object file and
+consist of simple null-terminated strings with the following structure::
+
+	version\0type\0target\0value\0
+
+This string sequence is repeated as many times as needed to express all
+the rules. The fields are as follows:
+
+- `version`: Ensures backward compatibility for future changes to the
+  structure. Currently expected to be "1".
+- `type`: Indicates the type of rule being applied.
+- `target`: Specifies the target of the rule, typically the fully
+  qualified name of the DWARF Debugging Information Entry (DIE).
+- `value`: Provides rule-specific data.
+
+The following helper macro, for example, can be used to specify rules
+in the source code::
+
+	#define __KABI_RULE(hint, target, value)                             \
+		static const char __PASTE(__gendwarfksyms_rule_,             \
+					  __COUNTER__)[] __used __aligned(1) \
+			__section(".discard.gendwarfksyms.kabi_rules") =     \
+				"1\0" #hint "\0" #target "\0" #value
+
+
+Currently, only the rules discussed in this section are supported, but
+the format is extensible enough to allow further rules to be added as
+need arises.
+
+4.1.1. Managing definition visibility
+=====================================
+
+A declaration can change into a full definition when additional includes
+are pulled into the translation unit. This changes the versions of any
+symbol that references the type even if the ABI remains unchanged. As
+it may not be possible to drop includes without breaking the build, the
+`declonly` rule can be used to specify a type as declaration-only, even
+if the debugging information contains the full definition.
+
+The rule fields are expected to be as follows:
+
+- `type`: "declonly"
+- `target`: The fully qualified name of the target data structure
+  (as shown in **--dump-dies** output).
+- `value`: This field is ignored.
+
+Using the `__KABI_RULE` macro, this rule can be defined as::
+
+	#define KABI_DECLONLY(fqn) __KABI_RULE(declonly, fqn, )
+
+Example usage::
+
+	struct s {
+		/* definition */
+	};
+
+	KABI_DECLONLY(s);
+
+4.1.2. Adding enumerators
+=========================
+
+For enums, all enumerators and their values are included in calculating
+symbol versions, which becomes a problem if we later need to add more
+enumerators without changing symbol versions. The `enumerator_ignore`
+rule allows us to hide named enumerators from the input.
+
+The rule fields are expected to be as follows:
+
+- `type`: "enumerator_ignore"
+- `target`: The fully qualified name of the target enum
+  (as shown in **--dump-dies** output) and the name of the
+  enumerator field separated by a space.
+- `value`: This field is ignored.
+
+Using the `__KABI_RULE` macro, this rule can be defined as::
+
+	#define KABI_ENUMERATOR_IGNORE(fqn, field) \
+		__KABI_RULE(enumerator_ignore, fqn field, )
+
+Example usage::
+
+	enum e {
+		A, B, C, D,
+	};
+
+	KABI_ENUMERATOR_IGNORE(e, B);
+	KABI_ENUMERATOR_IGNORE(e, C);
+
+If the enum additionally includes an end marker and new values must
+be added in the middle, we may need to use the old value for the last
+enumerator when calculating versions. The `enumerator_value` rule allows
+us to override the value of an enumerator for version calculation:
+
+- `type`: "enumerator_value"
+- `target`: The fully qualified name of the target enum
+  (as shown in **--dump-dies** output) and the name of the
+  enumerator field separated by a space.
+- `value`: Integer value used for the field.
+
+Using the `__KABI_RULE` macro, this rule can be defined as::
+
+	#define KABI_ENUMERATOR_VALUE(fqn, field, value) \
+		__KABI_RULE(enumerator_value, fqn field, value)
+
+Example usage::
+
+	enum e {
+		A, B, C, LAST,
+	};
+
+	KABI_ENUMERATOR_IGNORE(e, C);
+	KABI_ENUMERATOR_VALUE(e, LAST, 2);
+
+4.3. Adding structure members
+=============================
+
+Perhaps the most common ABI compatible change is adding a member to a
+kernel data structure. When changes to a structure are anticipated,
+distribution maintainers can pre-emptively reserve space in the
+structure and take it into use later without breaking the ABI. If
+changes are needed to data structures without reserved space, existing
+alignment holes can potentially be used instead. While kABI rules could
+be added for these type of changes, using unions is typically a more
+natural method. This section describes gendwarfksyms support for using
+reserved space in data structures and hiding members that don't change
+the ABI when calculating symbol versions.
+
+4.3.1. Reserving space and replacing members
+============================================
+
+Space is typically reserved for later use by appending integer types, or
+arrays, to the end of the data structure, but any type can be used. Each
+reserved member needs a unique name, but as the actual purpose is usually
+not known at the time the space is reserved, for convenience, names that
+start with `__kabi_` are left out when calculating symbol versions::
+
+        struct s {
+                long a;
+                long __kabi_reserved_0; /* reserved for future use */
+        };
+
+The reserved space can be taken into use by wrapping the member in a
+union, which includes the original type and the replacement member::
+
+        struct s {
+                long a;
+                union {
+                        long __kabi_reserved_0; /* original type */
+                        struct b b; /* replaced field */
+                };
+        };
+
+If the `__kabi_` naming scheme was used when reserving space, the name
+of the first member of the union must start with `__kabi_reserved`. This
+ensures the original type is used when calculating versions, but the name
+is again left out. The rest of the union is ignored.
+
+If we're replacing a member that doesn't follow this naming convention,
+we also need to preserve the original name to avoid changing versions,
+which we can do by changing the first union member's name to start with
+`__kabi_renamed` followed by the original name.
+
+The examples include `KABI_(RESERVE|USE|REPLACE)*` macros that help
+simplify the process and also ensure the replacement member is correctly
+aligned and its size won't exceed the reserved space.
+
+4.3.2. Hiding members
+=====================
+
+Predicting which structures will require changes during the support
+timeframe isn't always possible, in which case one might have to resort
+to placing new members into existing alignment holes::
+
+        struct s {
+                int a;
+                /* a 4-byte alignment hole */
+                unsigned long b;
+        };
+
+
+While this won't change the size of the data structure, one needs to
+be able to hide the added members from symbol versioning. Similarly
+to reserved fields, this can be accomplished by wrapping the added
+member to a union where one of the fields has a name starting with
+`__kabi_ignored`::
+
+        struct s {
+                int a;
+                union {
+                        char __kabi_ignored_0;
+                        int n;
+                };
+                unsigned long b;
+        };
+
+With **--stable**, both versions produce the same symbol version.
diff --git a/Documentation/kbuild/index.rst b/Documentation/kbuild/index.rst
index cee2f99f734b5..e82af05cd652c 100644
--- a/Documentation/kbuild/index.rst
+++ b/Documentation/kbuild/index.rst
@@ -21,6 +21,7 @@ Kernel Build System
     reproducible-builds
     gcc-plugins
     llvm
+    gendwarfksyms
 
 .. only::  subproject and html
 

From 54ac1ac8edeb74ff87fc880d1ee58785bdcbe323 Mon Sep 17 00:00:00 2001
From: Matthew Maurer <mmaurer@google.com>
Date: Fri, 3 Jan 2025 17:37:01 +0000
Subject: [PATCH 071/368] modules: Support extended MODVERSIONS info

Adds a new format for MODVERSIONS which stores each field in a separate
ELF section. This initially adds support for variable length names, but
could later be used to add additional fields to MODVERSIONS in a
backwards compatible way if needed. Any new fields will be ignored by
old user tooling, unlike the current format where user tooling cannot
tolerate adjustments to the format (for example making the name field
longer).

Since PPC munges its version records to strip leading dots, we reproduce
the munging for the new format. Other architectures do not appear to
have architecture-specific usage of this information.

Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Matthew Maurer <mmaurer@google.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 arch/powerpc/kernel/module_64.c | 24 ++++++++-
 kernel/module/internal.h        | 11 ++++
 kernel/module/main.c            | 92 ++++++++++++++++++++++++++++++---
 kernel/module/version.c         | 45 ++++++++++++++++
 4 files changed, 162 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 45dac7b46aa3c..34a5aec4908fb 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -369,6 +369,24 @@ static void dedotify_versions(struct modversion_info *vers,
 		}
 }
 
+/* Same as normal versions, remove a leading dot if present. */
+static void dedotify_ext_version_names(char *str_seq, unsigned long size)
+{
+	unsigned long out = 0;
+	unsigned long in;
+	char last = '\0';
+
+	for (in = 0; in < size; in++) {
+		/* Skip one leading dot */
+		if (last == '\0' && str_seq[in] == '.')
+			in++;
+		last = str_seq[in];
+		str_seq[out++] = last;
+	}
+	/* Zero the trailing portion of the names table for robustness */
+	memset(&str_seq[out], 0, size - out);
+}
+
 /*
  * Undefined symbols which refer to .funcname, hack to funcname. Make .TOC.
  * seem to be defined (value set later).
@@ -438,10 +456,12 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
 			me->arch.toc_section = i;
 			if (sechdrs[i].sh_addralign < 8)
 				sechdrs[i].sh_addralign = 8;
-		}
-		else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0)
+		} else if (strcmp(secstrings + sechdrs[i].sh_name, "__versions") == 0)
 			dedotify_versions((void *)hdr + sechdrs[i].sh_offset,
 					  sechdrs[i].sh_size);
+		else if (strcmp(secstrings + sechdrs[i].sh_name, "__version_ext_names") == 0)
+			dedotify_ext_version_names((void *)hdr + sechdrs[i].sh_offset,
+						   sechdrs[i].sh_size);
 
 		if (sechdrs[i].sh_type == SHT_SYMTAB)
 			dedotify((void *)hdr + sechdrs[i].sh_offset,
diff --git a/kernel/module/internal.h b/kernel/module/internal.h
index f10dc3ea7ff88..887838589020d 100644
--- a/kernel/module/internal.h
+++ b/kernel/module/internal.h
@@ -86,6 +86,8 @@ struct load_info {
 		unsigned int vers;
 		unsigned int info;
 		unsigned int pcpu;
+		unsigned int vers_ext_crc;
+		unsigned int vers_ext_name;
 	} index;
 };
 
@@ -389,6 +391,15 @@ void module_layout(struct module *mod, struct modversion_info *ver, struct kerne
 		   struct kernel_symbol *ks, struct tracepoint * const *tp);
 int check_modstruct_version(const struct load_info *info, struct module *mod);
 int same_magic(const char *amagic, const char *bmagic, bool has_crcs);
+struct modversion_info_ext {
+	size_t remaining;
+	const u32 *crc;
+	const char *name;
+};
+void modversion_ext_start(const struct load_info *info, struct modversion_info_ext *ver);
+void modversion_ext_advance(struct modversion_info_ext *ver);
+#define for_each_modversion_info_ext(ver, info) \
+	for (modversion_ext_start(info, &ver); ver.remaining > 0; modversion_ext_advance(&ver))
 #else /* !CONFIG_MODVERSIONS */
 static inline int check_version(const struct load_info *info,
 				const char *symname,
diff --git a/kernel/module/main.c b/kernel/module/main.c
index e58bff88b8d63..0830892070fea 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -2073,6 +2073,82 @@ static int elf_validity_cache_index_str(struct load_info *info)
 	return 0;
 }
 
+/**
+ * elf_validity_cache_index_versions() - Validate and cache version indices
+ * @info:  Load info to cache version indices in.
+ *         Must have &load_info->sechdrs and &load_info->secstrings populated.
+ * @flags: Load flags, relevant to suppress version loading, see
+ *         uapi/linux/module.h
+ *
+ * If we're ignoring modversions based on @flags, zero all version indices
+ * and return validity. Othewrise check:
+ *
+ * * If "__version_ext_crcs" is present, "__version_ext_names" is present
+ * * There is a name present for every crc
+ *
+ * Then populate:
+ *
+ * * &load_info->index.vers
+ * * &load_info->index.vers_ext_crc
+ * * &load_info->index.vers_ext_names
+ *
+ * if present.
+ *
+ * Return: %0 if valid, %-ENOEXEC on failure.
+ */
+static int elf_validity_cache_index_versions(struct load_info *info, int flags)
+{
+	unsigned int vers_ext_crc;
+	unsigned int vers_ext_name;
+	size_t crc_count;
+	size_t remaining_len;
+	size_t name_size;
+	char *name;
+
+	/* If modversions were suppressed, pretend we didn't find any */
+	if (flags & MODULE_INIT_IGNORE_MODVERSIONS) {
+		info->index.vers = 0;
+		info->index.vers_ext_crc = 0;
+		info->index.vers_ext_name = 0;
+		return 0;
+	}
+
+	vers_ext_crc = find_sec(info, "__version_ext_crcs");
+	vers_ext_name = find_sec(info, "__version_ext_names");
+
+	/* If we have one field, we must have the other */
+	if (!!vers_ext_crc != !!vers_ext_name) {
+		pr_err("extended version crc+name presence does not match");
+		return -ENOEXEC;
+	}
+
+	/*
+	 * If we have extended version information, we should have the same
+	 * number of entries in every section.
+	 */
+	if (vers_ext_crc) {
+		crc_count = info->sechdrs[vers_ext_crc].sh_size / sizeof(u32);
+		name = (void *)info->hdr +
+			info->sechdrs[vers_ext_name].sh_offset;
+		remaining_len = info->sechdrs[vers_ext_name].sh_size;
+
+		while (crc_count--) {
+			name_size = strnlen(name, remaining_len) + 1;
+			if (name_size > remaining_len) {
+				pr_err("more extended version crcs than names");
+				return -ENOEXEC;
+			}
+			remaining_len -= name_size;
+			name += name_size;
+		}
+	}
+
+	info->index.vers = find_sec(info, "__versions");
+	info->index.vers_ext_crc = vers_ext_crc;
+	info->index.vers_ext_name = vers_ext_name;
+	return 0;
+}
+
 /**
  * elf_validity_cache_index() - Resolve, validate, cache section indices
  * @info:  Load info to read from and update.
@@ -2087,9 +2163,7 @@ static int elf_validity_cache_index_str(struct load_info *info)
  * * elf_validity_cache_index_mod()
  * * elf_validity_cache_index_sym()
  * * elf_validity_cache_index_str()
- *
- * If versioning is not suppressed via flags, load the version index from
- * a section called "__versions" with no validation.
+ * * elf_validity_cache_index_versions()
  *
  * If CONFIG_SMP is enabled, load the percpu section by name with no
  * validation.
@@ -2112,11 +2186,9 @@ static int elf_validity_cache_index(struct load_info *info, int flags)
 	err = elf_validity_cache_index_str(info);
 	if (err < 0)
 		return err;
-
-	if (flags & MODULE_INIT_IGNORE_MODVERSIONS)
-		info->index.vers = 0; /* Pretend no __versions section! */
-	else
-		info->index.vers = find_sec(info, "__versions");
+	err = elf_validity_cache_index_versions(info, flags);
+	if (err < 0)
+		return err;
 
 	info->index.pcpu = find_pcpusec(info);
 
@@ -2327,6 +2399,10 @@ static int rewrite_section_headers(struct load_info *info, int flags)
 
 	/* Track but don't keep modinfo and version sections. */
 	info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC;
+	info->sechdrs[info->index.vers_ext_crc].sh_flags &=
+		~(unsigned long)SHF_ALLOC;
+	info->sechdrs[info->index.vers_ext_name].sh_flags &=
+		~(unsigned long)SHF_ALLOC;
 	info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC;
 
 	return 0;
diff --git a/kernel/module/version.c b/kernel/module/version.c
index 4e5731d403af2..3718a88683219 100644
--- a/kernel/module/version.c
+++ b/kernel/module/version.c
@@ -19,11 +19,28 @@ int check_version(const struct load_info *info,
 	unsigned int versindex = info->index.vers;
 	unsigned int i, num_versions;
 	struct modversion_info *versions;
+	struct modversion_info_ext version_ext;
 
 	/* Exporting module didn't supply crcs?  OK, we're already tainted. */
 	if (!crc)
 		return 1;
 
+	/* If we have extended version info, rely on it */
+	if (info->index.vers_ext_crc) {
+		for_each_modversion_info_ext(version_ext, info) {
+			if (strcmp(version_ext.name, symname) != 0)
+				continue;
+			if (*version_ext.crc == *crc)
+				return 1;
+			pr_debug("Found checksum %X vs module %X\n",
+				 *crc, *version_ext.crc);
+			goto bad_version;
+		}
+		pr_warn_once("%s: no extended symbol version for %s\n",
+			     info->name, symname);
+		return 1;
+	}
+
 	/* No versions at all?  modprobe --force does this. */
 	if (versindex == 0)
 		return try_to_force_load(mod, symname) == 0;
@@ -87,6 +104,34 @@ int same_magic(const char *amagic, const char *bmagic,
 	return strcmp(amagic, bmagic) == 0;
 }
 
+void modversion_ext_start(const struct load_info *info,
+			  struct modversion_info_ext *start)
+{
+	unsigned int crc_idx = info->index.vers_ext_crc;
+	unsigned int name_idx = info->index.vers_ext_name;
+	Elf_Shdr *sechdrs = info->sechdrs;
+
+	/*
+	 * Both of these fields are needed for this to be useful
+	 * Any future fields should be initialized to NULL if absent.
+	 */
+	if (crc_idx == 0 || name_idx == 0) {
+		start->remaining = 0;
+		return;
+	}
+
+	start->crc = (const u32 *)sechdrs[crc_idx].sh_addr;
+	start->name = (const char *)sechdrs[name_idx].sh_addr;
+	start->remaining = sechdrs[crc_idx].sh_size / sizeof(*start->crc);
+}
+
+void modversion_ext_advance(struct modversion_info_ext *vers)
+{
+	vers->remaining--;
+	vers->crc++;
+	vers->name += strlen(vers->name) + 1;
+}
+
 /*
  * Generate the signature for all relevant module structures here.
  * If these change, we don't want to try to parse the module.

From fc7d5e3210ae083a29ce224ffce18eaf3d1c645a Mon Sep 17 00:00:00 2001
From: Matthew Maurer <mmaurer@google.com>
Date: Fri, 3 Jan 2025 17:37:02 +0000
Subject: [PATCH 072/368] modpost: Produce extended MODVERSIONS information

Generate both the existing modversions format and the new extended one
when running modpost. Presence of this metadata in the final .ko is
guarded by CONFIG_EXTENDED_MODVERSIONS.

We no longer generate an error on long symbols in modpost if
CONFIG_EXTENDED_MODVERSIONS is set, as they can now be appropriately
encoded in the extended section. These symbols will be skipped in the
previous encoding. An error will still be generated if
CONFIG_EXTENDED_MODVERSIONS is not set.

Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Matthew Maurer <mmaurer@google.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 kernel/module/Kconfig    | 10 +++++++
 scripts/Makefile.modpost |  1 +
 scripts/mod/modpost.c    | 62 +++++++++++++++++++++++++++++++++++++---
 3 files changed, 69 insertions(+), 4 deletions(-)

diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig
index d443fc504ffca..9568b629a03ce 100644
--- a/kernel/module/Kconfig
+++ b/kernel/module/Kconfig
@@ -207,6 +207,16 @@ config ASM_MODVERSIONS
 	  assembly. This can be enabled only when the target architecture
 	  supports it.
 
+config EXTENDED_MODVERSIONS
+	bool "Extended Module Versioning Support"
+	depends on MODVERSIONS
+	help
+	  This enables extended MODVERSIONs support, allowing long symbol
+	  names to be versioned.
+
+	  The most likely reason you would enable this is to enable Rust
+	  support. If unsure, say N.
+
 config MODULE_SRCVERSION_ALL
 	bool "Source checksum for all modules"
 	help
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index ab0e94ea62496..40426fc635098 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -43,6 +43,7 @@ MODPOST = $(objtree)/scripts/mod/modpost
 modpost-args =										\
 	$(if $(CONFIG_MODULES),-M)							\
 	$(if $(CONFIG_MODVERSIONS),-m)							\
+	$(if $(CONFIG_EXTENDED_MODVERSIONS),-x)						\
 	$(if $(CONFIG_MODULE_SRCVERSION_ALL),-a)					\
 	$(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E)					\
 	$(if $(KBUILD_MODPOST_WARN),-w)							\
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index dc907014108bb..38ff3dd4a9a16 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -33,6 +33,8 @@ static bool module_enabled;
 static bool modversions;
 /* Is CONFIG_MODULE_SRCVERSION_ALL set? */
 static bool all_versions;
+/* Is CONFIG_EXTENDED_MODVERSIONS set? */
+static bool extended_modversions;
 /* If we are modposting external module set to 1 */
 static bool external_module;
 /* Only warn about unresolved symbols */
@@ -1805,6 +1807,49 @@ static void add_exported_symbols(struct buffer *buf, struct module *mod)
 	}
 }
 
+/**
+ * Record CRCs for unresolved symbols, supporting long names
+ */
+static void add_extended_versions(struct buffer *b, struct module *mod)
+{
+	struct symbol *s;
+
+	if (!extended_modversions)
+		return;
+
+	buf_printf(b, "\n");
+	buf_printf(b, "static const u32 ____version_ext_crcs[]\n");
+	buf_printf(b, "__used __section(\"__version_ext_crcs\") = {\n");
+	list_for_each_entry(s, &mod->unresolved_symbols, list) {
+		if (!s->module)
+			continue;
+		if (!s->crc_valid) {
+			warn("\"%s\" [%s.ko] has no CRC!\n",
+				s->name, mod->name);
+			continue;
+		}
+		buf_printf(b, "\t0x%08x,\n", s->crc);
+	}
+	buf_printf(b, "};\n");
+
+	buf_printf(b, "static const char ____version_ext_names[]\n");
+	buf_printf(b, "__used __section(\"__version_ext_names\") =\n");
+	list_for_each_entry(s, &mod->unresolved_symbols, list) {
+		if (!s->module)
+			continue;
+		if (!s->crc_valid)
+			/*
+			 * We already warned on this when producing the crc
+			 * table.
+			 * We need to skip its name too, as the indexes in
+			 * both tables need to align.
+			 */
+			continue;
+		buf_printf(b, "\t\"%s\\0\"\n", s->name);
+	}
+	buf_printf(b, ";\n");
+}
+
 /**
  * Record CRCs for unresolved symbols
  **/
@@ -1828,9 +1873,14 @@ static void add_versions(struct buffer *b, struct module *mod)
 			continue;
 		}
 		if (strlen(s->name) >= MODULE_NAME_LEN) {
-			error("too long symbol \"%s\" [%s.ko]\n",
-			      s->name, mod->name);
-			break;
+			if (extended_modversions) {
+				/* this symbol will only be in the extended info */
+				continue;
+			} else {
+				error("too long symbol \"%s\" [%s.ko]\n",
+				      s->name, mod->name);
+				break;
+			}
 		}
 		buf_printf(b, "\t{ 0x%08x, \"%s\" },\n",
 			   s->crc, s->name);
@@ -1961,6 +2011,7 @@ static void write_mod_c_file(struct module *mod)
 	add_header(&buf, mod);
 	add_exported_symbols(&buf, mod);
 	add_versions(&buf, mod);
+	add_extended_versions(&buf, mod);
 	add_depends(&buf, mod);
 
 	buf_printf(&buf, "\n");
@@ -2126,7 +2177,7 @@ int main(int argc, char **argv)
 	LIST_HEAD(dump_lists);
 	struct dump_list *dl, *dl2;
 
-	while ((opt = getopt(argc, argv, "ei:MmnT:to:au:WwENd:")) != -1) {
+	while ((opt = getopt(argc, argv, "ei:MmnT:to:au:WwENd:x")) != -1) {
 		switch (opt) {
 		case 'e':
 			external_module = true;
@@ -2175,6 +2226,9 @@ int main(int argc, char **argv)
 		case 'd':
 			missing_namespace_deps = optarg;
 			break;
+		case 'x':
+			extended_modversions = true;
+			break;
 		default:
 			exit(1);
 		}

From e8639b7ef0f871753b4262ec0eacd3da29eebcee Mon Sep 17 00:00:00 2001
From: Matthew Maurer <mmaurer@google.com>
Date: Fri, 3 Jan 2025 17:37:03 +0000
Subject: [PATCH 073/368] modpost: Allow extended modversions without basic
 MODVERSIONS

If you know that your kernel modules will only ever be loaded by a newer
kernel, you can disable BASIC_MODVERSIONS to save space. This also
allows easy creation of test modules to see how tooling will respond to
modules that only have the new format.

Signed-off-by: Matthew Maurer <mmaurer@google.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 kernel/module/Kconfig    | 15 +++++++++++++++
 scripts/Makefile.modpost |  1 +
 scripts/mod/modpost.c    |  9 +++++++--
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig
index 9568b629a03ce..4538f3af63e1c 100644
--- a/kernel/module/Kconfig
+++ b/kernel/module/Kconfig
@@ -217,6 +217,21 @@ config EXTENDED_MODVERSIONS
 	  The most likely reason you would enable this is to enable Rust
 	  support. If unsure, say N.
 
+config BASIC_MODVERSIONS
+	bool "Basic Module Versioning Support"
+	depends on MODVERSIONS
+	default y
+	help
+	  This enables basic MODVERSIONS support, allowing older tools or
+	  kernels to potentially load modules.
+
+	  Disabling this may cause older `modprobe` or `kmod` to be unable
+	  to read MODVERSIONS information from built modules. With this
+	  disabled, older kernels may treat this module as unversioned.
+
+	  This is enabled by default when MODVERSIONS are enabled.
+	  If unsure, say Y.
+
 config MODULE_SRCVERSION_ALL
 	bool "Source checksum for all modules"
 	help
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index 40426fc635098..d7d45067d08b9 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -43,6 +43,7 @@ MODPOST = $(objtree)/scripts/mod/modpost
 modpost-args =										\
 	$(if $(CONFIG_MODULES),-M)							\
 	$(if $(CONFIG_MODVERSIONS),-m)							\
+	$(if $(CONFIG_BASIC_MODVERSIONS),-b)						\
 	$(if $(CONFIG_EXTENDED_MODVERSIONS),-x)						\
 	$(if $(CONFIG_MODULE_SRCVERSION_ALL),-a)					\
 	$(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E)					\
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 38ff3dd4a9a16..e18ae7dc8140a 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -33,6 +33,8 @@ static bool module_enabled;
 static bool modversions;
 /* Is CONFIG_MODULE_SRCVERSION_ALL set? */
 static bool all_versions;
+/* Is CONFIG_BASIC_MODVERSIONS set? */
+static bool basic_modversions;
 /* Is CONFIG_EXTENDED_MODVERSIONS set? */
 static bool extended_modversions;
 /* If we are modposting external module set to 1 */
@@ -1857,7 +1859,7 @@ static void add_versions(struct buffer *b, struct module *mod)
 {
 	struct symbol *s;
 
-	if (!modversions)
+	if (!basic_modversions)
 		return;
 
 	buf_printf(b, "\n");
@@ -2177,7 +2179,7 @@ int main(int argc, char **argv)
 	LIST_HEAD(dump_lists);
 	struct dump_list *dl, *dl2;
 
-	while ((opt = getopt(argc, argv, "ei:MmnT:to:au:WwENd:x")) != -1) {
+	while ((opt = getopt(argc, argv, "ei:MmnT:to:au:WwENd:xb")) != -1) {
 		switch (opt) {
 		case 'e':
 			external_module = true;
@@ -2226,6 +2228,9 @@ int main(int argc, char **argv)
 		case 'd':
 			missing_namespace_deps = optarg;
 			break;
+		case 'b':
+			basic_modversions = true;
+			break;
 		case 'x':
 			extended_modversions = true;
 			break;

From 272f8a6d625a0cf7fba9c5af5202edc84dee326c Mon Sep 17 00:00:00 2001
From: Matthew Maurer <mmaurer@google.com>
Date: Fri, 3 Jan 2025 17:37:04 +0000
Subject: [PATCH 074/368] Documentation/kbuild: Document storage of symbol
 information

Document where exported and imported symbols are kept, format options,
and limitations.

Signed-off-by: Matthew Maurer <mmaurer@google.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 Documentation/kbuild/modules.rst | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/Documentation/kbuild/modules.rst b/Documentation/kbuild/modules.rst
index 101de236cd0c9..a42f00d8cb90f 100644
--- a/Documentation/kbuild/modules.rst
+++ b/Documentation/kbuild/modules.rst
@@ -423,6 +423,26 @@ Symbols From the Kernel (vmlinux + modules)
 	1) It lists all exported symbols from vmlinux and all modules.
 	2) It lists the CRC if CONFIG_MODVERSIONS is enabled.
 
+Version Information Formats
+---------------------------
+
+	Exported symbols have information stored in __ksymtab or __ksymtab_gpl
+	sections. Symbol names and namespaces are stored in __ksymtab_strings,
+	using a format similar to the string table used for ELF. If
+	CONFIG_MODVERSIONS is enabled, the CRCs corresponding to exported
+	symbols will be added to the __kcrctab or __kcrctab_gpl.
+
+	If CONFIG_BASIC_MODVERSIONS is enabled (default with
+	CONFIG_MODVERSIONS), imported symbols will have their symbol name and
+	CRC stored in the __versions section of the importing module. This
+	mode only supports symbols of length up to 64 bytes.
+
+	If CONFIG_EXTENDED_MODVERSIONS is enabled (required to enable both
+	CONFIG_MODVERSIONS and CONFIG_RUST at the same time), imported symbols
+	will have their symbol name recorded in the __version_ext_names
+	section as a series of concatenated, null-terminated strings. CRCs for
+	these symbols will be recorded in the __version_ext_crcs section.
+
 Symbols and External Modules
 ----------------------------
 

From 60a6002432448bb3f291d80768ae98d62efc9c77 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 11 Jan 2025 01:37:44 -0500
Subject: [PATCH 075/368] hostfs: fix string handling in __dentry_name()

strcpy() should not be used with destination potentially overlapping
the source; what's more, strscpy() in there is pointless - we already
know the amount we want to copy; might as well use memcpy().

Fixes: c278e81b8a02 "hostfs: Remove open coded strcpy()"
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hostfs/hostfs_kern.c | 27 ++++++---------------------
 1 file changed, 6 insertions(+), 21 deletions(-)

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 7e51d2cec64b4..bd6503b731426 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -95,32 +95,17 @@ __uml_setup("hostfs=", hostfs_args,
 static char *__dentry_name(struct dentry *dentry, char *name)
 {
 	char *p = dentry_path_raw(dentry, name, PATH_MAX);
-	char *root;
-	size_t len;
-	struct hostfs_fs_info *fsi;
-
-	fsi = dentry->d_sb->s_fs_info;
-	root = fsi->host_root_path;
-	len = strlen(root);
-	if (IS_ERR(p)) {
-		__putname(name);
-		return NULL;
-	}
-
-	/*
-	 * This function relies on the fact that dentry_path_raw() will place
-	 * the path name at the end of the provided buffer.
-	 */
-	BUG_ON(p + strlen(p) + 1 != name + PATH_MAX);
+	struct hostfs_fs_info *fsi = dentry->d_sb->s_fs_info;
+	char *root = fsi->host_root_path;
+	size_t len = strlen(root);
 
-	strscpy(name, root, PATH_MAX);
-	if (len > p - name) {
+	if (IS_ERR(p) || len > p - name) {
 		__putname(name);
 		return NULL;
 	}
 
-	if (p > name + len)
-		strcpy(name + len, p);
+	memcpy(name, root, len);
+	memmove(name + len, p, name + PATH_MAX - p);
 
 	return name;
 }

From 09c4a610153286cef54d4f0c85398f4e32fc227e Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@linaro.org>
Date: Wed, 11 Dec 2024 12:32:34 +0300
Subject: [PATCH 076/368] rtc: tps6594: Fix integer overflow on 32bit systems

The problem is this multiply in tps6594_rtc_set_offset()

	tmp = offset * TICKS_PER_HOUR;

The "tmp" variable is an s64 but "offset" is a long in the
(-277774)-277774 range.  On 32bit systems a long can hold numbers up to
approximately two billion.  The number of TICKS_PER_HOUR is really large,
(32768 * 3600) or roughly a hundred million.  When you start multiplying
by a hundred million it doesn't take long to overflow the two billion
mark.

Probably the safest way to fix this is to change the type of
TICKS_PER_HOUR to long long because it's such a large number.

Fixes: 9f67c1e63976 ("rtc: tps6594: Add driver for TPS6594 RTC")
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Link: https://lore.kernel.org/r/1074175e-5ecb-4e3d-b721-347d794caa90@stanley.mountain
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-tps6594.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-tps6594.c b/drivers/rtc/rtc-tps6594.c
index e696676341378..7c6246e3f0292 100644
--- a/drivers/rtc/rtc-tps6594.c
+++ b/drivers/rtc/rtc-tps6594.c
@@ -37,7 +37,7 @@
 #define MAX_OFFSET (277774)
 
 // Number of ticks per hour
-#define TICKS_PER_HOUR (32768 * 3600)
+#define TICKS_PER_HOUR (32768 * 3600LL)
 
 // Multiplier for ppb conversions
 #define PPB_MULT NANO

From 3ab8c5ed4f84fa20cd16794fe8dc31f633fbc70c Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Wed, 18 Dec 2024 20:34:58 +0100
Subject: [PATCH 077/368] rtc: pcf85063: fix potential OOB write in PCF85063
 NVMEM read

The nvmem interface supports variable buffer sizes, while the regmap
interface operates with fixed-size storage. If an nvmem client uses a
buffer size less than 4 bytes, regmap_read will write out of bounds
as it expects the buffer to point at an unsigned int.

Fix this by using an intermediary unsigned int to hold the value.

Fixes: fadfd092ee91 ("rtc: pcf85063: add nvram support")
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
Link: https://lore.kernel.org/r/20241218-rtc-pcf85063-stack-corruption-v1-1-12fd0ee0f046@pengutronix.de
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-pcf85063.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c
index fdbc07f14036a..905986c616559 100644
--- a/drivers/rtc/rtc-pcf85063.c
+++ b/drivers/rtc/rtc-pcf85063.c
@@ -322,7 +322,16 @@ static const struct rtc_class_ops pcf85063_rtc_ops = {
 static int pcf85063_nvmem_read(void *priv, unsigned int offset,
 			       void *val, size_t bytes)
 {
-	return regmap_read(priv, PCF85063_REG_RAM, val);
+	unsigned int tmp;
+	int ret;
+
+	ret = regmap_read(priv, PCF85063_REG_RAM, &tmp);
+	if (ret < 0)
+		return ret;
+
+	*(u8 *)val = tmp;
+
+	return 0;
 }
 
 static int pcf85063_nvmem_write(void *priv, unsigned int offset,

From 09471d8f5b390883eaf21b917c4bf3ced1b8a1df Mon Sep 17 00:00:00 2001
From: Ming Wang <wangming01@loongson.cn>
Date: Thu, 5 Dec 2024 19:43:07 +0800
Subject: [PATCH 078/368] rtc: loongson: clear TOY_MATCH0_REG in
 loongson_rtc_isr()

The TOY_MATCH0_REG should be cleared to 0 in the RTC interrupt handler,
otherwise the interrupt cannot be cleared, which will cause the
loongson_rtc_isr() to be triggered multiple times.

The previous code cleared TOY_MATCH0_REG in the loongson_rtc_handler(),
which is an ACPI interrupt. This did not prevent loongson_rtc_isr()
from being triggered multiple times.

This commit moves the clearing of TOY_MATCH0_REG to the
loongson_rtc_isr() to ensure that the interrupt is properly cleared.

Fixes: 1b733a9ebc3d ("rtc: Add rtc driver for the Loongson family chips")
Signed-off-by: Ming Wang <wangming01@loongson.cn>
Reviewed-by: Huacai Chen <chenhuacai@loongson.cn>
Reviewed-by: Keguang Zhang <keguang.zhang@gmail.com> # on LS1B
Tested-by: Keguang Zhang <keguang.zhang@gmail.com>
Link: https://lore.kernel.org/r/20241205114307.1891418-1-wangming01@loongson.cn
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-loongson.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/rtc/rtc-loongson.c b/drivers/rtc/rtc-loongson.c
index 6f5f4430c2ae3..97e5625c064ce 100644
--- a/drivers/rtc/rtc-loongson.c
+++ b/drivers/rtc/rtc-loongson.c
@@ -114,6 +114,13 @@ static irqreturn_t loongson_rtc_isr(int irq, void *id)
 	struct loongson_rtc_priv *priv = (struct loongson_rtc_priv *)id;
 
 	rtc_update_irq(priv->rtcdev, 1, RTC_AF | RTC_IRQF);
+
+	/*
+	 * The TOY_MATCH0_REG should be cleared 0 here,
+	 * otherwise the interrupt cannot be cleared.
+	 */
+	regmap_write(priv->regmap, TOY_MATCH0_REG, 0);
+
 	return IRQ_HANDLED;
 }
 
@@ -131,11 +138,7 @@ static u32 loongson_rtc_handler(void *id)
 	writel(RTC_STS, priv->pm_base + PM1_STS_REG);
 	spin_unlock(&priv->lock);
 
-	/*
-	 * The TOY_MATCH0_REG should be cleared 0 here,
-	 * otherwise the interrupt cannot be cleared.
-	 */
-	return regmap_write(priv->regmap, TOY_MATCH0_REG, 0);
+	return ACPI_INTERRUPT_HANDLED;
 }
 
 static int loongson_rtc_set_enabled(struct device *dev)

From 2a388ff22d2cbfc5cbd628ef085bdcd3b7dc64f5 Mon Sep 17 00:00:00 2001
From: Michal Simek <michal.simek@amd.com>
Date: Wed, 27 Nov 2024 17:01:22 +0100
Subject: [PATCH 079/368] rtc: zynqmp: Fix optional clock name property

Clock description in DT binding introduced by commit f69060c14431
("dt-bindings: rtc: zynqmp: Add clock information") is talking about "rtc"
clock name but driver is checking "rtc_clk" name instead.
Because clock is optional property likely in was never handled properly by
the driver.

Fixes: 07dcc6f9c762 ("rtc: zynqmp: Add calibration set and get support")
Signed-off-by: Michal Simek <michal.simek@amd.com>
Cc: stable@kernel.org
Reviewed-by: Peter Korsgaard <peter@korsgaard.com>
Link: https://lore.kernel.org/r/cd5f0c9d01ec1f5a240e37a7e0d85b8dacb3a869.1732723280.git.michal.simek@amd.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-zynqmp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-zynqmp.c b/drivers/rtc/rtc-zynqmp.c
index 625f708a7cafc..f39102b66eac2 100644
--- a/drivers/rtc/rtc-zynqmp.c
+++ b/drivers/rtc/rtc-zynqmp.c
@@ -318,8 +318,8 @@ static int xlnx_rtc_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	/* Getting the rtc_clk info */
-	xrtcdev->rtc_clk = devm_clk_get_optional(&pdev->dev, "rtc_clk");
+	/* Getting the rtc info */
+	xrtcdev->rtc_clk = devm_clk_get_optional(&pdev->dev, "rtc");
 	if (IS_ERR(xrtcdev->rtc_clk)) {
 		if (PTR_ERR(xrtcdev->rtc_clk) != -EPROBE_DEFER)
 			dev_warn(&pdev->dev, "Device clock not found.\n");

From 3f76ba88c3fda18dd71296aa87e775e56c29a3d5 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Sat, 11 Jan 2025 19:54:05 +0100
Subject: [PATCH 080/368] rtc: stm32: Use syscon_regmap_lookup_by_phandle_args

Use syscon_regmap_lookup_by_phandle_args() which is a wrapper over
syscon_regmap_lookup_by_phandle() combined with getting the syscon
argument.  Except simpler code this annotates within one line that given
phandle has arguments, so grepping for code would be easier.

There is also no real benefit in printing errors on missing syscon
argument, because this is done just too late: runtime check on
static/build-time data.  Dtschema and Devicetree bindings offer the
static/build-time check for this already.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20250111185405.183824-1-krzysztof.kozlowski@linaro.org
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-stm32.c | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/drivers/rtc/rtc-stm32.c b/drivers/rtc/rtc-stm32.c
index 9f1a019ec8afa..a0564d4435690 100644
--- a/drivers/rtc/rtc-stm32.c
+++ b/drivers/rtc/rtc-stm32.c
@@ -1074,26 +1074,18 @@ static int stm32_rtc_probe(struct platform_device *pdev)
 	regs = &rtc->data->regs;
 
 	if (rtc->data->need_dbp) {
-		rtc->dbp = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
-							   "st,syscfg");
+		unsigned int args[2];
+
+		rtc->dbp = syscon_regmap_lookup_by_phandle_args(pdev->dev.of_node,
+								"st,syscfg",
+								2, args);
 		if (IS_ERR(rtc->dbp)) {
 			dev_err(&pdev->dev, "no st,syscfg\n");
 			return PTR_ERR(rtc->dbp);
 		}
 
-		ret = of_property_read_u32_index(pdev->dev.of_node, "st,syscfg",
-						 1, &rtc->dbp_reg);
-		if (ret) {
-			dev_err(&pdev->dev, "can't read DBP register offset\n");
-			return ret;
-		}
-
-		ret = of_property_read_u32_index(pdev->dev.of_node, "st,syscfg",
-						 2, &rtc->dbp_mask);
-		if (ret) {
-			dev_err(&pdev->dev, "can't read DBP register mask\n");
-			return ret;
-		}
+		rtc->dbp_reg = args[0];
+		rtc->dbp_mask = args[1];
 	}
 
 	if (!rtc->data->has_pclk) {

From 6758bd0692e25b7c58ff0b3e47fbefe75be177af Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@denx.de>
Date: Sun, 12 Jan 2025 10:40:27 -0300
Subject: [PATCH 081/368] dt-bindings: rtc: mxc: Document fsl,imx31-rtc

imx31.dtsi uses the following RTC compatible:

compatible = "fsl,imx31-rtc", "fsl,imx21-rtc";

Document 'fsl,imx31-rtc' to fix the following dt-schema warning:

'fsl,imx31-rtc' is not one of ['fsl,imx1-rtc', 'fsl,imx21-rtc']

Signed-off-by: Fabio Estevam <festevam@denx.de>
Link: https://lore.kernel.org/r/20250112134027.1013213-1-festevam@gmail.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 Documentation/devicetree/bindings/rtc/rtc-mxc.yaml | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/rtc/rtc-mxc.yaml b/Documentation/devicetree/bindings/rtc/rtc-mxc.yaml
index a14b52178c4b0..2599b847f406c 100644
--- a/Documentation/devicetree/bindings/rtc/rtc-mxc.yaml
+++ b/Documentation/devicetree/bindings/rtc/rtc-mxc.yaml
@@ -14,9 +14,13 @@ maintainers:
 
 properties:
   compatible:
-    enum:
-      - fsl,imx1-rtc
-      - fsl,imx21-rtc
+    oneOf:
+      - const: fsl,imx1-rtc
+      - const: fsl,imx21-rtc
+      - items:
+          - enum:
+              - fsl,imx31-rtc
+          - const: fsl,imx21-rtc
 
   reg:
     maxItems: 1

From 3e9807aa6481304c5e992c4451976ab02c58f5ec Mon Sep 17 00:00:00 2001
From: Tiwei Bie <tiwei.btw@antgroup.com>
Date: Mon, 13 Jan 2025 23:32:17 +0800
Subject: [PATCH 082/368] um: Include missing headers in asm/pgtable.h

Formerly, asm/pgtable.h relied on the implicit inclusion of asm/page.h
and linux/mm_types.h via asm/fixmap.h. With the removal of asm/fixmap.h,
these headers need to be included explicitly in asm/pgtable.h now.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202501131814.E6CxjulL-lkp@intel.com/
Fixes: 5bfc4a3a0af3 ("um: Remove obsolete fixmap support")
Signed-off-by: Tiwei Bie <tiwei.btw@antgroup.com>
Link: https://patch.msgid.link/20250113153218.3331321-2-tiwei.btw@antgroup.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 arch/um/include/asm/pgtable.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 9be6daca95be4..5601ca98e8a6a 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -8,6 +8,9 @@
 #ifndef __UM_PGTABLE_H
 #define __UM_PGTABLE_H
 
+#include <asm/page.h>
+#include <linux/mm_types.h>
+
 #define _PAGE_PRESENT	0x001
 #define _PAGE_NEEDSYNC	0x002
 #define _PAGE_RW	0x020

From 2d2b61ae38bd91217ea7cc5bc700a2b9e75b3937 Mon Sep 17 00:00:00 2001
From: Tiwei Bie <tiwei.btw@antgroup.com>
Date: Mon, 13 Jan 2025 23:32:18 +0800
Subject: [PATCH 083/368] um: Remove unused asm/archparam.h header

This header is no longer used after the removal of fixmap support
in commit 5bfc4a3a0af3 ("um: Remove obsolete fixmap support").

Signed-off-by: Tiwei Bie <tiwei.btw@antgroup.com>
Link: https://patch.msgid.link/20250113153218.3331321-3-tiwei.btw@antgroup.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 arch/x86/um/asm/archparam.h | 20 --------------------
 1 file changed, 20 deletions(-)
 delete mode 100644 arch/x86/um/asm/archparam.h

diff --git a/arch/x86/um/asm/archparam.h b/arch/x86/um/asm/archparam.h
deleted file mode 100644
index c17cf68dda0f1..0000000000000
--- a/arch/x86/um/asm/archparam.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* 
- * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
- * Copyright 2003 PathScale, Inc.
- * Licensed under the GPL
- */
-
-#ifndef __UM_ARCHPARAM_H
-#define __UM_ARCHPARAM_H
-
-#ifdef CONFIG_X86_32
-
-#ifdef CONFIG_X86_PAE
-#define LAST_PKMAP 512
-#else
-#define LAST_PKMAP 1024
-#endif
-
-#endif
-
-#endif

From 58589c6a6e9ed8781eb8876ece5f4ef4c8dc3eed Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Sun, 15 Dec 2024 02:23:56 +0000
Subject: [PATCH 084/368] rtc: Remove hpet_rtc_dropped_irq()

hpet_rtc_dropped_irq() has been unused since
commit f52ef24be21a ("rtc/alpha: remove legacy rtc driver")

Remove it in rtc, and x86 hpet code.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Acked-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20241215022356.181625-1-linux@treblig.org
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 arch/x86/include/asm/hpet.h | 1 -
 arch/x86/kernel/hpet.c      | 6 ------
 drivers/rtc/rtc-cmos.c      | 5 -----
 3 files changed, 12 deletions(-)

diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index ab9f3dd87c805..ab0c78855ecb2 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -84,7 +84,6 @@ extern int hpet_set_rtc_irq_bit(unsigned long bit_mask);
 extern int hpet_set_alarm_time(unsigned char hrs, unsigned char min,
 			       unsigned char sec);
 extern int hpet_set_periodic_freq(unsigned long freq);
-extern int hpet_rtc_dropped_irq(void);
 extern int hpet_rtc_timer_init(void);
 extern irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id);
 extern int hpet_register_irq_handler(rtc_irq_handler handler);
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index c96ae8fee95e4..7e21018a0e043 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1392,12 +1392,6 @@ int hpet_set_periodic_freq(unsigned long freq)
 }
 EXPORT_SYMBOL_GPL(hpet_set_periodic_freq);
 
-int hpet_rtc_dropped_irq(void)
-{
-	return is_hpet_enabled();
-}
-EXPORT_SYMBOL_GPL(hpet_rtc_dropped_irq);
-
 static void hpet_rtc_timer_reinit(void)
 {
 	unsigned int delta;
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 4bd3a3a04d444..8172869bd3d79 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -151,11 +151,6 @@ static inline int hpet_set_periodic_freq(unsigned long freq)
 	return 0;
 }
 
-static inline int hpet_rtc_dropped_irq(void)
-{
-	return 0;
-}
-
 static inline int hpet_rtc_timer_init(void)
 {
 	return 0;

From 05c14d8fd71b9c19391d0b4d65b1c1764e1c440f Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 10 Dec 2024 09:49:45 -0500
Subject: [PATCH 085/368] tools/power turbostat: add Busy% to "show idle"

Suggested-by: Artem Bityutskiy <artem.bityutskiy@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 7accc4a733667..7a10e51a13496 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -270,7 +270,7 @@ struct msr_counter bic[] = {
 #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
 #define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
 #define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
-#define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6)
+#define BIC_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6)
 #define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
 
 #define BIC_DISABLED_BY_DEFAULT	(BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_SysWatt | BIC_Sys_J)

From 22a835282b6240f38097f479ae2194bbeb0181e4 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 17 Dec 2024 18:00:31 -0500
Subject: [PATCH 086/368] tools/power turbostat: Add an NMI column

Add an NMI column, a proper sub-set of the IRQ column.

It would be preferable if the kernel exported
/sys/kernel/irq/NMI/per_cpu_count.

But since we are already forced to parse /proc/interrupts,
noticing which row is the NMI is simple enough.

Suggested-by: Artem Bityutskiy <artem.bityutskiy@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 54 ++++++++++++++++++++++++---
 1 file changed, 48 insertions(+), 6 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 7a10e51a13496..2620ed000ad07 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -202,6 +202,7 @@ struct msr_counter bic[] = {
 	{ 0x0, "Die%c6", NULL, 0, 0, 0, NULL, 0 },
 	{ 0x0, "SysWatt", NULL, 0, 0, 0, NULL, 0 },
 	{ 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 },
+	{ 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 },
 };
 
 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -266,12 +267,13 @@ struct msr_counter bic[] = {
 #define	BIC_Diec6		(1ULL << 58)
 #define	BIC_SysWatt		(1ULL << 59)
 #define	BIC_Sys_J		(1ULL << 60)
+#define	BIC_NMI			(1ULL << 61)
 
 #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
 #define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
 #define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
 #define BIC_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6)
-#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
+#define BIC_OTHER ( BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
 
 #define BIC_DISABLED_BY_DEFAULT	(BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_SysWatt | BIC_Sys_J)
 
@@ -1628,6 +1630,7 @@ struct thread_data {
 	unsigned long long c1;
 	unsigned long long instr_count;
 	unsigned long long irq_count;
+	unsigned long long nmi_count;
 	unsigned int smi_count;
 	unsigned int cpu_id;
 	unsigned int apic_id;
@@ -1934,6 +1937,7 @@ struct timeval tv_even, tv_odd, tv_delta;
 
 int *irq_column_2_cpu;		/* /proc/interrupts column numbers */
 int *irqs_per_cpu;		/* indexed by cpu_num */
+int *nmi_per_cpu;		/* indexed by cpu_num */
 
 void setup_all_buffers(bool startup);
 
@@ -2319,6 +2323,12 @@ void print_header(char *delim)
 		else
 			outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
 	}
+	if (DO_BIC(BIC_NMI)) {
+		if (sums_need_wide_columns)
+			outp += sprintf(outp, "%s     NMI", (printed++ ? delim : ""));
+		else
+			outp += sprintf(outp, "%sNMI", (printed++ ? delim : ""));
+	}
 
 	if (DO_BIC(BIC_SMI))
 		outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
@@ -2605,6 +2615,8 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
 
 		if (DO_BIC(BIC_IRQ))
 			outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
+		if (DO_BIC(BIC_NMI))
+			outp += sprintf(outp, "IRQ: %lld\n", t->nmi_count);
 		if (DO_BIC(BIC_SMI))
 			outp += sprintf(outp, "SMI: %d\n", t->smi_count);
 
@@ -2824,6 +2836,14 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 			outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
 	}
 
+	/* NMI */
+	if (DO_BIC(BIC_NMI)) {
+		if (sums_need_wide_columns)
+			outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->nmi_count);
+		else
+			outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->nmi_count);
+	}
+
 	/* SMI */
 	if (DO_BIC(BIC_SMI))
 		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
@@ -3439,6 +3459,9 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d
 	if (DO_BIC(BIC_IRQ))
 		old->irq_count = new->irq_count - old->irq_count;
 
+	if (DO_BIC(BIC_NMI))
+		old->nmi_count = new->nmi_count - old->nmi_count;
+
 	if (DO_BIC(BIC_SMI))
 		old->smi_count = new->smi_count - old->smi_count;
 
@@ -3519,6 +3542,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 	t->instr_count = 0;
 
 	t->irq_count = 0;
+	t->nmi_count = 0;
 	t->smi_count = 0;
 
 	c->c3 = 0;
@@ -3623,6 +3647,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	average.threads.instr_count += t->instr_count;
 
 	average.threads.irq_count += t->irq_count;
+	average.threads.nmi_count += t->nmi_count;
 	average.threads.smi_count += t->smi_count;
 
 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
@@ -3764,6 +3789,9 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data
 
 	if (average.threads.irq_count > 9999999)
 		sums_need_wide_columns = 1;
+	if (average.threads.nmi_count > 9999999)
+		sums_need_wide_columns = 1;
+
 
 	average.cores.c3 /= topo.allowed_cores;
 	average.cores.c6 /= topo.allowed_cores;
@@ -4620,6 +4648,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 
 	if (DO_BIC(BIC_IRQ))
 		t->irq_count = irqs_per_cpu[cpu];
+	if (DO_BIC(BIC_NMI))
+		t->nmi_count = nmi_per_cpu[cpu];
 
 	get_cstate_counters(cpu, t, c, p);
 
@@ -5365,6 +5395,7 @@ void free_all_buffers(void)
 
 	free(irq_column_2_cpu);
 	free(irqs_per_cpu);
+	free(nmi_per_cpu);
 
 	for (i = 0; i <= topo.max_cpu_num; ++i) {
 		if (cpus[i].put_ids)
@@ -5821,31 +5852,37 @@ int snapshot_proc_interrupts(void)
 
 		irq_column_2_cpu[column] = cpu_number;
 		irqs_per_cpu[cpu_number] = 0;
+		nmi_per_cpu[cpu_number] = 0;
 	}
 
 	/* read /proc/interrupt count lines and sum up irqs per cpu */
 	while (1) {
 		int column;
 		char buf[64];
+		int this_row_is_nmi = 0;
 
-		retval = fscanf(fp, " %s:", buf);	/* flush irq# "N:" */
+		retval = fscanf(fp, " %s:", buf);	/* irq# "N:" */
 		if (retval != 1)
 			break;
 
+		if (strncmp(buf, "NMI", strlen("NMI")) == 0)
+			this_row_is_nmi = 1;
+
 		/* read the count per cpu */
 		for (column = 0; column < topo.num_cpus; ++column) {
 
 			int cpu_number, irq_count;
 
 			retval = fscanf(fp, " %d", &irq_count);
+
 			if (retval != 1)
 				break;
 
 			cpu_number = irq_column_2_cpu[column];
 			irqs_per_cpu[cpu_number] += irq_count;
-
+			if (this_row_is_nmi)
+				nmi_per_cpu[cpu_number] += irq_count;
 		}
-
 		while (getc(fp) != '\n') ;	/* flush interrupt description */
 
 	}
@@ -5942,7 +5979,7 @@ int snapshot_sys_lpi_us(void)
  */
 int snapshot_proc_sysfs_files(void)
 {
-	if (DO_BIC(BIC_IRQ))
+	if (DO_BIC(BIC_IRQ) || DO_BIC(BIC_NMI))
 		if (snapshot_proc_interrupts())
 			return 1;
 
@@ -8263,6 +8300,7 @@ void process_cpuid()
 		aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1;
 
 	BIC_PRESENT(BIC_IRQ);
+	BIC_PRESENT(BIC_NMI);
 	BIC_PRESENT(BIC_TSC_MHz);
 }
 
@@ -8613,7 +8651,11 @@ void allocate_irq_buffers(void)
 
 	irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
 	if (irqs_per_cpu == NULL)
-		err(-1, "calloc %d", topo.max_cpu_num + 1);
+		err(-1, "calloc %d IRQ", topo.max_cpu_num + 1);
+
+	nmi_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
+	if (nmi_per_cpu == NULL)
+		err(-1, "calloc %d NMI", topo.max_cpu_num + 1);
 }
 
 int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p)

From 4a358ba215dfefe161b5904e51e48f5f0e82652f Mon Sep 17 00:00:00 2001
From: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Date: Wed, 18 Dec 2024 11:43:32 +0100
Subject: [PATCH 087/368] tools/power turbostat: Remove SysWatt from
 DISABLED_BY_DEFAULT

The counter is present on most supporting Intel platforms and provides
useful data to the user. There is no reason to disable the counter by
default.

Signed-off-by: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 | 2 +-
 tools/power/x86/turbostat/turbostat.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 59b89e6b25bf0..f043a93defd4a 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -190,7 +190,7 @@ The system configuration dump (if --quiet is not used) is followed by statistics
 .PP
 \fBRAMWatt\fP Watts consumed by the DRAM DIMMS -- available only on server processors.
 .PP
-\fBSysWatt\fP Watts consumed by the whole platform (RAPL PSYS). Disabled by default.  Enable with --enable SysWatt.
+\fBSysWatt\fP Watts consumed by the whole platform (RAPL PSYS).
 .PP
 \fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package.  Note that the system summary is the sum of the package throttling time, and thus may be higher than 100% on a multi-package system.  Note that the meaning of this field is model specific.  For example, some hardware increments this counter when RAPL responds to thermal limits, but does not increment this counter when RAPL responds to power limits.  Comparing PkgWatt and PkgTmp to system limits is necessary.
 .PP
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 2620ed000ad07..1d99aaf9681b0 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -270,12 +270,12 @@ struct msr_counter bic[] = {
 #define	BIC_NMI			(1ULL << 61)
 
 #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
-#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
+#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt)
 #define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
 #define BIC_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6)
 #define BIC_OTHER ( BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
 
-#define BIC_DISABLED_BY_DEFAULT	(BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_SysWatt | BIC_Sys_J)
+#define BIC_DISABLED_BY_DEFAULT	(BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
 
 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
 unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;

From 2f60f03934a50bc1fb69bb4f47a25cddd6807b0b Mon Sep 17 00:00:00 2001
From: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Date: Fri, 20 Dec 2024 13:38:34 +0100
Subject: [PATCH 088/368] tools/power turbostat: Fix PMT mmaped file size
 rounding

This (the old code) is just not how you round up to a page size.
Noticed on a recent Intel platform. Previous ones must have been
reporting sizes already aligned to a page and so the bug was missed when
testing.

Fixes: f0e4ed752fda ("tools/power turbostat: Add early support for PMT counters")
Signed-off-by: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 1d99aaf9681b0..a2ca1c6c3638a 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -95,6 +95,8 @@
 #define INTEL_ECORE_TYPE	0x20
 #define INTEL_PCORE_TYPE	0x40
 
+#define ROUND_UP_TO_PAGE_SIZE(n) (((n) + 0x1000UL-1UL) & ~(0x1000UL-1UL))
+
 enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
 enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC, COUNTER_K2M };
 enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT, FORMAT_AVERAGE };
@@ -8996,7 +8998,7 @@ struct pmt_mmio *pmt_mmio_open(unsigned int target_guid)
 		if (fd_pmt == -1)
 			goto loop_cleanup_and_break;
 
-		mmap_size = (size + 0x1000UL) & (~0x1000UL);
+		mmap_size = ROUND_UP_TO_PAGE_SIZE(size);
 		mmio = mmap(0, mmap_size, PROT_READ, MAP_SHARED, fd_pmt, 0);
 		if (mmio != MAP_FAILED) {
 

From 3b7d93db450e9d8ead80d75e2a303248f1528c35 Mon Sep 17 00:00:00 2001
From: Antoine Viallon <antoine@lesviallon.fr>
Date: Tue, 14 Jan 2025 23:45:14 +0100
Subject: [PATCH 089/368] ceph: fix memory leak in ceph_mds_auth_match()

We now free the temporary target path substring allocation on every
possible branch, instead of omitting the default branch.  In some
cases, a memory leak occured, which could rapidly crash the system
(depending on how many file accesses were attempted).

This was detected in production because it caused a continuous memory
growth, eventually triggering kernel OOM and completely hard-locking
the kernel.

Relevant kmemleak stacktrace:

    unreferenced object 0xffff888131e69900 (size 128):
      comm "git", pid 66104, jiffies 4295435999
      hex dump (first 32 bytes):
        76 6f 6c 75 6d 65 73 2f 63 6f 6e 74 61 69 6e 65  volumes/containe
        72 73 2f 67 69 74 65 61 2f 67 69 74 65 61 2f 67  rs/gitea/gitea/g
      backtrace (crc 2f3bb450):
        [<ffffffffaa68fb49>] __kmalloc_noprof+0x359/0x510
        [<ffffffffc32bf1df>] ceph_mds_check_access+0x5bf/0x14e0 [ceph]
        [<ffffffffc3235722>] ceph_open+0x312/0xd80 [ceph]
        [<ffffffffaa7dd786>] do_dentry_open+0x456/0x1120
        [<ffffffffaa7e3729>] vfs_open+0x79/0x360
        [<ffffffffaa832875>] path_openat+0x1de5/0x4390
        [<ffffffffaa834fcc>] do_filp_open+0x19c/0x3c0
        [<ffffffffaa7e44a1>] do_sys_openat2+0x141/0x180
        [<ffffffffaa7e4945>] __x64_sys_open+0xe5/0x1a0
        [<ffffffffac2cc2f7>] do_syscall_64+0xb7/0x210
        [<ffffffffac400130>] entry_SYSCALL_64_after_hwframe+0x77/0x7f

It can be triggered by mouting a subdirectory of a CephFS filesystem,
and then trying to access files on this subdirectory with an auth token
using a path-scoped capability:

    $ ceph auth get client.services
    [client.services]
            key = REDACTED
            caps mds = "allow rw fsname=cephfs path=/volumes/"
            caps mon = "allow r fsname=cephfs"
            caps osd = "allow rw tag cephfs data=cephfs"

    $ cat /proc/self/mounts
    services@[REDACTED].cephfs=/volumes/containers /ceph/containers ceph rw,noatime,name=services,secret=<hidden>,ms_mode=prefer-crc,mount_timeout=300,acl,mon_addr=[REDACTED]:3300,recover_session=clean 0 0

    $ seq 1 1000000 | xargs -P32 --replace={} touch /ceph/containers/file-{} && \
    seq 1 1000000 | xargs -P32 --replace={} cat /ceph/containers/file-{}

[ idryomov: combine if statements, rename rc to path_matched and make
            it a bool, formatting ]

Cc: stable@vger.kernel.org
Fixes: 596afb0b8933 ("ceph: add ceph_mds_check_access() helper")
Signed-off-by: Antoine Viallon <antoine@lesviallon.fr>
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 785fe489ef4b8..ae37f0e24c996 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -5690,18 +5690,18 @@ static int ceph_mds_auth_match(struct ceph_mds_client *mdsc,
 			 *
 			 * All the other cases                       --> mismatch
 			 */
+			bool path_matched = true;
 			char *first = strstr(_tpath, auth->match.path);
-			if (first != _tpath) {
-				if (free_tpath)
-					kfree(_tpath);
-				return 0;
+			if (first != _tpath ||
+			    (tlen > len && _tpath[len] != '/')) {
+				path_matched = false;
 			}
 
-			if (tlen > len && _tpath[len] != '/') {
-				if (free_tpath)
-					kfree(_tpath);
+			if (free_tpath)
+				kfree(_tpath);
+
+			if (!path_matched)
 				return 0;
-			}
 		}
 	}
 

From 5f4e6f7f8b77a3b1fb0005f6e1692475785ae05f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 12 Nov 2024 16:20:45 -0500
Subject: [PATCH 090/368] fs/overlayfs/namei.c: get rid of include
 ../internal.h

Added for the sake of vfs_path_lookup(), which is in linux/namei.h
these days.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/overlayfs/namei.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 7e27b7d4adee8..600046ebc2f3f 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -14,8 +14,6 @@
 #include <linux/exportfs.h>
 #include "overlayfs.h"
 
-#include "../internal.h"	/* for vfs_path_lookup */
-
 struct ovl_lookup_data {
 	struct super_block *sb;
 	const struct ovl_layer *layer;

From ac61506bf2d1a6766d98b7d94b0c7b2134a0806a Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Fri, 3 Jan 2025 17:37:05 +0000
Subject: [PATCH 091/368] rust: Use gendwarfksyms + extended modversions for
 CONFIG_MODVERSIONS

Previously, two things stopped Rust from using MODVERSIONS:
1. Rust symbols are occasionally too long to be represented in the
   original versions table
2. Rust types cannot be properly hashed by the existing genksyms
   approach because:
	* Looking up type definitions in Rust is more complex than C
	* Type layout is potentially dependent on the compiler in Rust,
	  not just the source type declaration.

CONFIG_EXTENDED_MODVERSIONS addresses the first point, and
CONFIG_GENDWARFKSYMS the second. If Rust wants to use MODVERSIONS, allow
it to do so by selecting both features.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Co-developed-by: Matthew Maurer <mmaurer@google.com>
Signed-off-by: Matthew Maurer <mmaurer@google.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 init/Kconfig  |  3 ++-
 rust/Makefile | 34 ++++++++++++++++++++++++++++++++--
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/init/Kconfig b/init/Kconfig
index a20e6efd3f0fb..2cfbefe0933ed 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1956,7 +1956,8 @@ config RUST
 	bool "Rust support"
 	depends on HAVE_RUST
 	depends on RUST_IS_AVAILABLE
-	depends on !MODVERSIONS
+	select EXTENDED_MODVERSIONS if MODVERSIONS
+	depends on !MODVERSIONS || GENDWARFKSYMS
 	depends on !GCC_PLUGIN_RANDSTRUCT
 	depends on !RANDSTRUCT
 	depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE
diff --git a/rust/Makefile b/rust/Makefile
index a40a3936126d6..ab300bfb46f6a 100644
--- a/rust/Makefile
+++ b/rust/Makefile
@@ -329,10 +329,11 @@ $(obj)/bindings/bindings_helpers_generated.rs: private bindgen_target_extra = ;
 $(obj)/bindings/bindings_helpers_generated.rs: $(src)/helpers/helpers.c FORCE
 	$(call if_changed_dep,bindgen)
 
+rust_exports = $(NM) -p --defined-only $(1) | awk '$$2~/(T|R|D|B)/ && $$3!~/__cfi/ { printf $(2),$$3 }'
+
 quiet_cmd_exports = EXPORTS $@
       cmd_exports = \
-	$(NM) -p --defined-only $< \
-		| awk '$$2~/(T|R|D|B)/ && $$3!~/__cfi/ {printf "EXPORT_SYMBOL_RUST_GPL(%s);\n",$$3}' > $@
+	$(call rust_exports,$<,"EXPORT_SYMBOL_RUST_GPL(%s);\n") > $@
 
 $(obj)/exports_core_generated.h: $(obj)/core.o FORCE
 	$(call if_changed,exports)
@@ -401,11 +402,36 @@ ifneq ($(or $(CONFIG_ARM64),$(and $(CONFIG_RISCV),$(CONFIG_64BIT))),)
 		__ashlti3 __lshrti3
 endif
 
+ifdef CONFIG_MODVERSIONS
+cmd_gendwarfksyms = $(if $(skip_gendwarfksyms),, \
+	$(call rust_exports,$@,"%s\n") | \
+	scripts/gendwarfksyms/gendwarfksyms \
+		$(if $(KBUILD_GENDWARFKSYMS_STABLE), --stable) \
+		$(if $(KBUILD_SYMTYPES), --symtypes $(@:.o=.symtypes),) \
+		$@ >> $(dot-target).cmd)
+endif
+
 define rule_rustc_library
 	$(call cmd_and_fixdep,rustc_library)
 	$(call cmd,gen_objtooldep)
+	$(call cmd,gendwarfksyms)
 endef
 
+define rule_rust_cc_library
+	$(call if_changed_rule,cc_o_c)
+	$(call cmd,force_checksrc)
+	$(call cmd,gendwarfksyms)
+endef
+
+# helpers.o uses the same export mechanism as Rust libraries, so ensure symbol
+# versions are calculated for the helpers too.
+$(obj)/helpers/helpers.o: $(src)/helpers/helpers.c $(recordmcount_source) FORCE
+	+$(call if_changed_rule,rust_cc_library)
+
+# Disable symbol versioning for exports.o to avoid conflicts with the actual
+# symbol versions generated from Rust objects.
+$(obj)/exports.o: private skip_gendwarfksyms = 1
+
 $(obj)/core.o: private skip_clippy = 1
 $(obj)/core.o: private skip_flags = -Wunreachable_pub
 $(obj)/core.o: private rustc_objcopy = $(foreach sym,$(redirect-intrinsics),--redefine-sym $(sym)=__rust$(sym))
@@ -417,13 +443,16 @@ ifneq ($(or $(CONFIG_X86_64),$(CONFIG_X86_32)),)
 $(obj)/core.o: scripts/target.json
 endif
 
+$(obj)/compiler_builtins.o: private skip_gendwarfksyms = 1
 $(obj)/compiler_builtins.o: private rustc_objcopy = -w -W '__*'
 $(obj)/compiler_builtins.o: $(src)/compiler_builtins.rs $(obj)/core.o FORCE
 	+$(call if_changed_rule,rustc_library)
 
+$(obj)/build_error.o: private skip_gendwarfksyms = 1
 $(obj)/build_error.o: $(src)/build_error.rs $(obj)/compiler_builtins.o FORCE
 	+$(call if_changed_rule,rustc_library)
 
+$(obj)/ffi.o: private skip_gendwarfksyms = 1
 $(obj)/ffi.o: $(src)/ffi.rs $(obj)/compiler_builtins.o FORCE
 	+$(call if_changed_rule,rustc_library)
 
@@ -435,6 +464,7 @@ $(obj)/bindings.o: $(src)/bindings/lib.rs \
 	+$(call if_changed_rule,rustc_library)
 
 $(obj)/uapi.o: private rustc_target_flags = --extern ffi
+$(obj)/uapi.o: private skip_gendwarfksyms = 1
 $(obj)/uapi.o: $(src)/uapi/lib.rs \
     $(obj)/ffi.o \
     $(obj)/uapi/uapi_generated.rs FORCE

From 3b7f793acc13b6108452271b306d4aa94a3c4940 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 22 Dec 2024 09:15:00 +0900
Subject: [PATCH 092/368] ARC: migrate to the generic rule for built-in DTB

Commit 654102df2ac2 ("kbuild: add generic support for built-in boot
DTBs") introduced generic support for built-in DTBs.

Select GENERIC_BUILTIN_DTB to use the generic rule.

To keep consistency across architectures, this commit also renames
CONFIG_ARC_BUILTIN_DTB_NAME to CONFIG_BUILTIN_DTB_NAME.

Now, "nsim_700" is the default value for CONFIG_BUILTIN_DTB_NAME, rather
than a fallback in case it is empty.

Acked-by: Vineet Gupta <vgupta@kernel.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 arch/arc/Kconfig                           | 7 ++++---
 arch/arc/Makefile                          | 3 ---
 arch/arc/boot/dts/Makefile                 | 9 +--------
 arch/arc/configs/axs101_defconfig          | 2 +-
 arch/arc/configs/axs103_defconfig          | 2 +-
 arch/arc/configs/axs103_smp_defconfig      | 2 +-
 arch/arc/configs/haps_hs_defconfig         | 2 +-
 arch/arc/configs/haps_hs_smp_defconfig     | 2 +-
 arch/arc/configs/hsdk_defconfig            | 2 +-
 arch/arc/configs/nsim_700_defconfig        | 2 +-
 arch/arc/configs/nsimosci_defconfig        | 2 +-
 arch/arc/configs/nsimosci_hs_defconfig     | 2 +-
 arch/arc/configs/nsimosci_hs_smp_defconfig | 2 +-
 arch/arc/configs/tb10x_defconfig           | 2 +-
 arch/arc/configs/vdk_hs38_defconfig        | 2 +-
 arch/arc/configs/vdk_hs38_smp_defconfig    | 2 +-
 16 files changed, 18 insertions(+), 27 deletions(-)

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 4f2eeda907ecb..0f813653658b4 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -18,6 +18,7 @@ config ARC
 	select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC
 	select ARCH_32BIT_OFF_T
 	select BUILDTIME_TABLE_SORT
+	select GENERIC_BUILTIN_DTB
 	select CLONE_BACKWARDS
 	select COMMON_CLK
 	select DMA_DIRECT_REMAP
@@ -551,11 +552,11 @@ config ARC_DBG_JUMP_LABEL
 	  part of static keys (jump labels) related code.
 endif
 
-config ARC_BUILTIN_DTB_NAME
+config BUILTIN_DTB_NAME
 	string "Built in DTB"
+	default "nsim_700"
 	help
-	  Set the name of the DTB to embed in the vmlinux binary
-	  Leaving it blank selects the "nsim_700" dtb.
+	  Set the name of the DTB to embed in the vmlinux binary.
 
 endmenu	 # "ARC Architecture Configuration"
 
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index fb98478ed1ab0..0c5e6e6314f29 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -82,9 +82,6 @@ KBUILD_CFLAGS	+= $(cflags-y)
 KBUILD_AFLAGS	+= $(KBUILD_CFLAGS)
 KBUILD_LDFLAGS	+= $(ldflags-y)
 
-# w/o this dtb won't embed into kernel binary
-core-y		+= arch/arc/boot/dts/
-
 core-y				+= arch/arc/plat-sim/
 core-$(CONFIG_ARC_PLAT_TB10X)	+= arch/arc/plat-tb10x/
 core-$(CONFIG_ARC_PLAT_AXS10X)	+= arch/arc/plat-axs10x/
diff --git a/arch/arc/boot/dts/Makefile b/arch/arc/boot/dts/Makefile
index 48704dfdf75cb..ee5664f0640d5 100644
--- a/arch/arc/boot/dts/Makefile
+++ b/arch/arc/boot/dts/Makefile
@@ -1,13 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
-# Built-in dtb
-builtindtb-y		:= nsim_700
 
-ifneq ($(CONFIG_ARC_BUILTIN_DTB_NAME),)
-	builtindtb-y	:= $(CONFIG_ARC_BUILTIN_DTB_NAME)
-endif
-
-obj-y   += $(builtindtb-y).dtb.o
-dtb-y := $(builtindtb-y).dtb
+dtb-y	:= $(addsuffix .dtb, $(CONFIG_BUILTIN_DTB_NAME))
 
 # for CONFIG_OF_ALL_DTBS test
 dtb-	:= $(patsubst $(src)/%.dts,%.dtb, $(wildcard $(src)/*.dts))
diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig
index 319bbe2703223..a7cd526dd7ca3 100644
--- a/arch/arc/configs/axs101_defconfig
+++ b/arch/arc/configs/axs101_defconfig
@@ -23,7 +23,7 @@ CONFIG_PARTITION_ADVANCED=y
 CONFIG_ARC_PLAT_AXS10X=y
 CONFIG_AXS101=y
 CONFIG_ARC_CACHE_LINE_SHIFT=5
-CONFIG_ARC_BUILTIN_DTB_NAME="axs101"
+CONFIG_BUILTIN_DTB_NAME="axs101"
 CONFIG_PREEMPT=y
 # CONFIG_COMPACTION is not set
 CONFIG_NET=y
diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig
index 8c1f1a111a175..afa6a348f4445 100644
--- a/arch/arc/configs/axs103_defconfig
+++ b/arch/arc/configs/axs103_defconfig
@@ -22,7 +22,7 @@ CONFIG_PARTITION_ADVANCED=y
 CONFIG_ARC_PLAT_AXS10X=y
 CONFIG_AXS103=y
 CONFIG_ISA_ARCV2=y
-CONFIG_ARC_BUILTIN_DTB_NAME="axs103"
+CONFIG_BUILTIN_DTB_NAME="axs103"
 CONFIG_PREEMPT=y
 # CONFIG_COMPACTION is not set
 CONFIG_NET=y
diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig
index 75cab9f25b5bb..2bfa6371953cc 100644
--- a/arch/arc/configs/axs103_smp_defconfig
+++ b/arch/arc/configs/axs103_smp_defconfig
@@ -22,7 +22,7 @@ CONFIG_ARC_PLAT_AXS10X=y
 CONFIG_AXS103=y
 CONFIG_ISA_ARCV2=y
 CONFIG_SMP=y
-CONFIG_ARC_BUILTIN_DTB_NAME="axs103_idu"
+CONFIG_BUILTIN_DTB_NAME="axs103_idu"
 CONFIG_PREEMPT=y
 # CONFIG_COMPACTION is not set
 CONFIG_NET=y
diff --git a/arch/arc/configs/haps_hs_defconfig b/arch/arc/configs/haps_hs_defconfig
index 8c3ed5d6e6c35..3a15771120782 100644
--- a/arch/arc/configs/haps_hs_defconfig
+++ b/arch/arc/configs/haps_hs_defconfig
@@ -14,7 +14,7 @@ CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
 CONFIG_PERF_EVENTS=y
 # CONFIG_COMPAT_BRK is not set
-CONFIG_ARC_BUILTIN_DTB_NAME="haps_hs"
+CONFIG_BUILTIN_DTB_NAME="haps_hs"
 CONFIG_MODULES=y
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_COMPACTION is not set
diff --git a/arch/arc/configs/haps_hs_smp_defconfig b/arch/arc/configs/haps_hs_smp_defconfig
index 6fc98c1b9b368..a3cf940b1f5b4 100644
--- a/arch/arc/configs/haps_hs_smp_defconfig
+++ b/arch/arc/configs/haps_hs_smp_defconfig
@@ -16,7 +16,7 @@ CONFIG_PERF_EVENTS=y
 # CONFIG_VM_EVENT_COUNTERS is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_SMP=y
-CONFIG_ARC_BUILTIN_DTB_NAME="haps_hs_idu"
+CONFIG_BUILTIN_DTB_NAME="haps_hs_idu"
 CONFIG_KPROBES=y
 CONFIG_MODULES=y
 # CONFIG_BLK_DEV_BSG is not set
diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig
index 9e79154b5535a..1558e8e87767e 100644
--- a/arch/arc/configs/hsdk_defconfig
+++ b/arch/arc/configs/hsdk_defconfig
@@ -20,7 +20,7 @@ CONFIG_ISA_ARCV2=y
 CONFIG_SMP=y
 CONFIG_LINUX_LINK_BASE=0x90000000
 CONFIG_LINUX_RAM_BASE=0x80000000
-CONFIG_ARC_BUILTIN_DTB_NAME="hsdk"
+CONFIG_BUILTIN_DTB_NAME="hsdk"
 CONFIG_PREEMPT=y
 # CONFIG_COMPACTION is not set
 CONFIG_NET=y
diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig
index 51092c39e3607..f8b3235d9a65e 100644
--- a/arch/arc/configs/nsim_700_defconfig
+++ b/arch/arc/configs/nsim_700_defconfig
@@ -17,7 +17,7 @@ CONFIG_PERF_EVENTS=y
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_ISA_ARCOMPACT=y
-CONFIG_ARC_BUILTIN_DTB_NAME="nsim_700"
+CONFIG_BUILTIN_DTB_NAME="nsim_700"
 CONFIG_KPROBES=y
 CONFIG_MODULES=y
 # CONFIG_BLK_DEV_BSG is not set
diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig
index 70c17bca49397..ee45dc0877fbc 100644
--- a/arch/arc/configs/nsimosci_defconfig
+++ b/arch/arc/configs/nsimosci_defconfig
@@ -19,7 +19,7 @@ CONFIG_ISA_ARCOMPACT=y
 CONFIG_KPROBES=y
 CONFIG_MODULES=y
 # CONFIG_BLK_DEV_BSG is not set
-CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci"
+CONFIG_BUILTIN_DTB_NAME="nsimosci"
 # CONFIG_COMPACTION is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig
index 59a3b6642fe71..e0a309970c20b 100644
--- a/arch/arc/configs/nsimosci_hs_defconfig
+++ b/arch/arc/configs/nsimosci_hs_defconfig
@@ -19,7 +19,7 @@ CONFIG_KPROBES=y
 CONFIG_MODULES=y
 # CONFIG_BLK_DEV_BSG is not set
 CONFIG_ISA_ARCV2=y
-CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs"
+CONFIG_BUILTIN_DTB_NAME="nsimosci_hs"
 # CONFIG_COMPACTION is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig
index 1419fc946a083..88325b8b49cf4 100644
--- a/arch/arc/configs/nsimosci_hs_smp_defconfig
+++ b/arch/arc/configs/nsimosci_hs_smp_defconfig
@@ -16,7 +16,7 @@ CONFIG_MODULES=y
 CONFIG_ISA_ARCV2=y
 CONFIG_SMP=y
 # CONFIG_ARC_TIMERS_64BIT is not set
-CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs_idu"
+CONFIG_BUILTIN_DTB_NAME="nsimosci_hs_idu"
 CONFIG_PREEMPT=y
 # CONFIG_COMPACTION is not set
 CONFIG_NET=y
diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig
index 5aba3d850fa2f..865fbc19ef031 100644
--- a/arch/arc/configs/tb10x_defconfig
+++ b/arch/arc/configs/tb10x_defconfig
@@ -26,7 +26,7 @@ CONFIG_MODULE_UNLOAD=y
 CONFIG_ARC_PLAT_TB10X=y
 CONFIG_ARC_CACHE_LINE_SHIFT=5
 CONFIG_HZ=250
-CONFIG_ARC_BUILTIN_DTB_NAME="abilis_tb100_dvk"
+CONFIG_BUILTIN_DTB_NAME="abilis_tb100_dvk"
 CONFIG_PREEMPT_VOLUNTARY=y
 # CONFIG_COMPACTION is not set
 CONFIG_NET=y
diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig
index 50c3439138257..03d9ac20baa98 100644
--- a/arch/arc/configs/vdk_hs38_defconfig
+++ b/arch/arc/configs/vdk_hs38_defconfig
@@ -13,7 +13,7 @@ CONFIG_PARTITION_ADVANCED=y
 CONFIG_ARC_PLAT_AXS10X=y
 CONFIG_AXS103=y
 CONFIG_ISA_ARCV2=y
-CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38"
+CONFIG_BUILTIN_DTB_NAME="vdk_hs38"
 CONFIG_PREEMPT=y
 CONFIG_NET=y
 CONFIG_PACKET=y
diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig
index 6d9e1d9f71d21..c09488992f131 100644
--- a/arch/arc/configs/vdk_hs38_smp_defconfig
+++ b/arch/arc/configs/vdk_hs38_smp_defconfig
@@ -15,7 +15,7 @@ CONFIG_AXS103=y
 CONFIG_ISA_ARCV2=y
 CONFIG_SMP=y
 # CONFIG_ARC_TIMERS_64BIT is not set
-CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38_smp"
+CONFIG_BUILTIN_DTB_NAME="vdk_hs38_smp"
 CONFIG_PREEMPT=y
 CONFIG_NET=y
 CONFIG_PACKET=y

From 25ff08aa43e373a61c3e36fc7d7cae88ed0fc2d7 Mon Sep 17 00:00:00 2001
From: Torsten Hilbrich <torsten.hilbrich@secunet.com>
Date: Mon, 13 Jan 2025 07:01:29 +0100
Subject: [PATCH 093/368] kbuild: Fix signing issue for external modules

When running the sign script the kernel is within the source directory
of external modules. This caused issues when the kernel uses relative
paths, like:

make[5]: Entering directory '/build/client/devel/kernel/work/linux-2.6'
make[6]: Entering directory '/build/client/devel/addmodules/vtx/work/vtx'
   INSTALL /build/client/devel/addmodules/vtx/_/lib/modules/6.13.0-devel+/extra/vtx.ko
   SIGN    /build/client/devel/addmodules/vtx/_/lib/modules/6.13.0-devel+/extra/vtx.ko
/bin/sh: 1: scripts/sign-file: not found
   DEPMOD  /build/client/devel/addmodules/vtx/_/lib/modules/6.13.0-devel+

Working around it by using absolute pathes here.

Fixes: 13b25489b6f8 ("kbuild: change working directory to external module directory with M=")
Signed-off-by: Torsten Hilbrich <torsten.hilbrich@secunet.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/Makefile.modinst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst
index f97c9926ed31b..1628198f3e830 100644
--- a/scripts/Makefile.modinst
+++ b/scripts/Makefile.modinst
@@ -105,7 +105,7 @@ else
 sig-key := $(CONFIG_MODULE_SIG_KEY)
 endif
 quiet_cmd_sign = SIGN    $@
-      cmd_sign = scripts/sign-file $(CONFIG_MODULE_SIG_HASH) "$(sig-key)" certs/signing_key.x509 $@ \
+      cmd_sign = $(objtree)/scripts/sign-file $(CONFIG_MODULE_SIG_HASH) "$(sig-key)" $(objtree)/certs/signing_key.x509 $@ \
                  $(if $(KBUILD_EXTMOD),|| true)
 
 ifeq ($(sign-only),)

From 015b0bfe754ae157cfccd7a13c41391124b115f2 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 14 Jan 2025 00:00:39 +0900
Subject: [PATCH 094/368] genksyms: rename m_abstract_declarator to
 abstract_declarator

This is called "abstract-declarator" in K&R. [1]

I am not sure what "m_" stands for, but the name is clear enough
without it.

No functional changes are intended.

[1] https://cs.wmich.edu/~gupta/teaching/cs4850/sumII06/The%20syntax%20of%20C%20in%20Backus-Naur%20form.htm

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Nicolas Schier <n.schier@avm.de>
---
 scripts/genksyms/parse.y | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y
index 689cb6bb40b65..02f2f713ec5a4 100644
--- a/scripts/genksyms/parse.y
+++ b/scripts/genksyms/parse.y
@@ -367,17 +367,17 @@ parameter_declaration_list:
 	;
 
 parameter_declaration:
-	decl_specifier_seq m_abstract_declarator
+	decl_specifier_seq abstract_declarator
 		{ $$ = $2 ? $2 : $1; }
 	;
 
-m_abstract_declarator:
-	ptr_operator m_abstract_declarator
+abstract_declarator:
+	ptr_operator abstract_declarator
 		{ $$ = $2 ? $2 : $1; }
-	| direct_m_abstract_declarator
+	| direct_abstract_declarator
 	;
 
-direct_m_abstract_declarator:
+direct_abstract_declarator:
 	/* empty */					{ $$ = NULL; }
 	| IDENT
 		{ /* For version 2 checksums, we don't want to remember
@@ -391,13 +391,13 @@ direct_m_abstract_declarator:
 		{ remove_node($1);
 		  $$ = $1;
 		}
-	| direct_m_abstract_declarator '(' parameter_declaration_clause ')'
+	| direct_abstract_declarator '(' parameter_declaration_clause ')'
 		{ $$ = $4; }
-	| direct_m_abstract_declarator '(' error ')'
+	| direct_abstract_declarator '(' error ')'
 		{ $$ = $4; }
-	| direct_m_abstract_declarator BRACKET_PHRASE
+	| direct_abstract_declarator BRACKET_PHRASE
 		{ $$ = $2; }
-	| '(' m_abstract_declarator ')'
+	| '(' abstract_declarator ')'
 		{ $$ = $3; }
 	| '(' error ')'
 		{ $$ = $3; }

From f33bfbd171a03c5d3f64ce956ccdfbece7114da4 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 14 Jan 2025 00:00:40 +0900
Subject: [PATCH 095/368] genksyms: rename cvar_qualifier to type_qualifier

I believe "cvar" stands for "Const, Volatile, Attribute, or Restrict".

This is called "type-qualifier" in K&R. [1]

Adopt this more generic naming.

No functional changes are intended.

[1] https://cs.wmich.edu/~gupta/teaching/cs4850/sumII06/The%20syntax%20of%20C%20in%20Backus-Naur%20form.htm

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Nicolas Schier <n.schier@avm.de>
---
 scripts/genksyms/parse.y | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y
index 02f2f713ec5a4..8f62b9f0d99c4 100644
--- a/scripts/genksyms/parse.y
+++ b/scripts/genksyms/parse.y
@@ -223,7 +223,7 @@ storage_class_specifier:
 
 type_specifier:
 	simple_type_specifier
-	| cvar_qualifier
+	| type_qualifier
 	| TYPEOF_KEYW '(' parameter_declaration ')'
 	| TYPEOF_PHRASE
 
@@ -270,21 +270,21 @@ simple_type_specifier:
 	;
 
 ptr_operator:
-	'*' cvar_qualifier_seq_opt
+	'*' type_qualifier_seq_opt
 		{ $$ = $2 ? $2 : $1; }
 	;
 
-cvar_qualifier_seq_opt:
+type_qualifier_seq_opt:
 	/* empty */					{ $$ = NULL; }
-	| cvar_qualifier_seq
+	| type_qualifier_seq
 	;
 
-cvar_qualifier_seq:
-	cvar_qualifier
-	| cvar_qualifier_seq cvar_qualifier		{ $$ = $2; }
+type_qualifier_seq:
+	type_qualifier
+	| type_qualifier_seq type_qualifier		{ $$ = $2; }
 	;
 
-cvar_qualifier:
+type_qualifier:
 	CONST_KEYW | VOLATILE_KEYW | ATTRIBUTE_PHRASE
 	| RESTRICT_KEYW
 		{ /* restrict has no effect in prototypes so ignore it */

From bc3a812b751ae1a4d91b3ea667ed77e76398bf46 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 14 Jan 2025 00:00:41 +0900
Subject: [PATCH 096/368] genksyms: reduce type_qualifier directly to
 decl_specifier

A type_qualifier (const, volatile, etc.) is not a type_specifier.

According to K&R [1], a type-qualifier should be directly reduced to
a declaration-specifier.

  <declaration-specifier> ::= <storage-class-specifier>
                            | <type-specifier>
                            | <type-qualifier>

[1]: https://cs.wmich.edu/~gupta/teaching/cs4850/sumII06/The%20syntax%20of%20C%20in%20Backus-Naur%20form.htm

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Nicolas Schier <n.schier@avm.de>
---
 scripts/genksyms/parse.y | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y
index 8f62b9f0d99c4..20cb3db7f149b 100644
--- a/scripts/genksyms/parse.y
+++ b/scripts/genksyms/parse.y
@@ -211,6 +211,7 @@ decl_specifier:
 		  $$ = $1;
 		}
 	| type_specifier
+	| type_qualifier
 	;
 
 storage_class_specifier:
@@ -223,7 +224,6 @@ storage_class_specifier:
 
 type_specifier:
 	simple_type_specifier
-	| type_qualifier
 	| TYPEOF_KEYW '(' parameter_declaration ')'
 	| TYPEOF_PHRASE
 

From 3ccda63a3af5f12c9e0b01c06561285227d2f79c Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 14 Jan 2025 00:00:42 +0900
Subject: [PATCH 097/368] genksyms: fix 6 shift/reduce conflicts and 5
 reduce/reduce conflicts

The genksyms parser has ambiguities in its grammar, which are currently
suppressed by a workaround in scripts/genksyms/Makefile.

Building genksyms with W=1 generates the following warnings:

    YACC    scripts/genksyms/parse.tab.[ch]
  scripts/genksyms/parse.y: warning: 9 shift/reduce conflicts [-Wconflicts-sr]
  scripts/genksyms/parse.y: warning: 5 reduce/reduce conflicts [-Wconflicts-rr]
  scripts/genksyms/parse.y: note: rerun with option '-Wcounterexamples' to generate conflict counterexamples

The comment in the parser describes the current problem:

    /* This wasn't really a typedef name but an identifier that
       shadows one.  */

Consider the following simple C code:

    typedef int foo;
    void my_func(foo foo) {}

In the function parameter list (foo foo), the first 'foo' is a type
specifier (typedef'ed as 'int'), while the second 'foo' is an identifier.

However, the lexer cannot distinguish between the two. Since 'foo' is
already typedef'ed, the lexer returns TYPE for both instances, instead
of returning IDENT for the second one.

To support shadowed identifiers, TYPE can be reduced to either a
simple_type_specifier or a direct_abstract_declarator, which creates
a grammatical ambiguity.

Without analyzing the grammar context, it is very difficult to resolve
this correctly.

This commit introduces a flag, dont_want_type_specifier, which allows
the parser to inform the lexer whether an identifier is expected. When
dont_want_type_specifier is true, the type lookup is suppressed, and
the lexer returns IDENT regardless of any preceding typedef.

After this commit, only 3 shift/reduce conflicts will remain.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Nicolas Schier <n.schier@avm.de>
---
 scripts/genksyms/genksyms.h |  3 +++
 scripts/genksyms/lex.l      |  9 ++++++++-
 scripts/genksyms/parse.y    | 37 +++++++++++++++----------------------
 3 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/scripts/genksyms/genksyms.h b/scripts/genksyms/genksyms.h
index 8c45ada59ece5..0c355075f0e67 100644
--- a/scripts/genksyms/genksyms.h
+++ b/scripts/genksyms/genksyms.h
@@ -12,6 +12,7 @@
 #ifndef MODUTILS_GENKSYMS_H
 #define MODUTILS_GENKSYMS_H 1
 
+#include <stdbool.h>
 #include <stdio.h>
 
 #include <list_types.h>
@@ -66,6 +67,8 @@ struct string_list *copy_list_range(struct string_list *start,
 int yylex(void);
 int yyparse(void);
 
+extern bool dont_want_type_specifier;
+
 void error_with_pos(const char *, ...) __attribute__ ((format(printf, 1, 2)));
 
 /*----------------------------------------------------------------------*/
diff --git a/scripts/genksyms/lex.l b/scripts/genksyms/lex.l
index a4d7495eaf75e..e886133af5783 100644
--- a/scripts/genksyms/lex.l
+++ b/scripts/genksyms/lex.l
@@ -12,6 +12,7 @@
 %{
 
 #include <limits.h>
+#include <stdbool.h>
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
@@ -113,6 +114,12 @@ MC_TOKEN		([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
 /* The second stage lexer.  Here we incorporate knowledge of the state
    of the parser to tailor the tokens that are returned.  */
 
+/*
+ * The lexer cannot distinguish whether a typedef'ed string is a TYPE or an
+ * IDENT. We need a hint from the parser to handle this accurately.
+ */
+bool dont_want_type_specifier;
+
 int
 yylex(void)
 {
@@ -207,7 +214,7 @@ repeat:
 		    goto repeat;
 		  }
 	      }
-	    if (!suppress_type_lookup)
+	    if (!suppress_type_lookup && !dont_want_type_specifier)
 	      {
 		if (find_symbol(yytext, SYM_TYPEDEF, 1))
 		  token = TYPE;
diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y
index 20cb3db7f149b..dc575d467bbfe 100644
--- a/scripts/genksyms/parse.y
+++ b/scripts/genksyms/parse.y
@@ -12,6 +12,7 @@
 %{
 
 #include <assert.h>
+#include <stdbool.h>
 #include <stdlib.h>
 #include <string.h>
 #include "genksyms.h"
@@ -148,6 +149,7 @@ simple_declaration:
 		    current_name = NULL;
 		  }
 		  $$ = $3;
+		  dont_want_type_specifier = false;
 		}
 	;
 
@@ -169,6 +171,7 @@ init_declarator_list:
 			     is_typedef ? SYM_TYPEDEF : SYM_NORMAL, decl, is_extern);
 		  current_name = NULL;
 		  $$ = $1;
+		  dont_want_type_specifier = true;
 		}
 	| init_declarator_list ',' init_declarator
 		{ struct string_list *decl = *$3;
@@ -184,6 +187,7 @@ init_declarator_list:
 			     is_typedef ? SYM_TYPEDEF : SYM_NORMAL, decl, is_extern);
 		  current_name = NULL;
 		  $$ = $3;
+		  dont_want_type_specifier = true;
 		}
 	;
 
@@ -210,7 +214,7 @@ decl_specifier:
 		  remove_node($1);
 		  $$ = $1;
 		}
-	| type_specifier
+	| type_specifier	{ dont_want_type_specifier = true; $$ = $1; }
 	| type_qualifier
 	;
 
@@ -307,15 +311,7 @@ direct_declarator:
 		    current_name = (*$1)->string;
 		    $$ = $1;
 		  }
-		}
-	| TYPE
-		{ if (current_name != NULL) {
-		    error_with_pos("unexpected second declaration name");
-		    YYERROR;
-		  } else {
-		    current_name = (*$1)->string;
-		    $$ = $1;
-		  }
+		  dont_want_type_specifier = false;
 		}
 	| direct_declarator '(' parameter_declaration_clause ')'
 		{ $$ = $4; }
@@ -335,8 +331,7 @@ nested_declarator:
 	;
 
 direct_nested_declarator:
-	IDENT
-	| TYPE
+	IDENT	{ $$ = $1; dont_want_type_specifier = false; }
 	| direct_nested_declarator '(' parameter_declaration_clause ')'
 		{ $$ = $4; }
 	| direct_nested_declarator '(' error ')'
@@ -362,8 +357,9 @@ parameter_declaration_list_opt:
 
 parameter_declaration_list:
 	parameter_declaration
+		{ $$ = $1; dont_want_type_specifier = false; }
 	| parameter_declaration_list ',' parameter_declaration
-		{ $$ = $3; }
+		{ $$ = $3; dont_want_type_specifier = false; }
 	;
 
 parameter_declaration:
@@ -375,6 +371,7 @@ abstract_declarator:
 	ptr_operator abstract_declarator
 		{ $$ = $2 ? $2 : $1; }
 	| direct_abstract_declarator
+		{ $$ = $1; dont_want_type_specifier = false; }
 	;
 
 direct_abstract_declarator:
@@ -385,12 +382,6 @@ direct_abstract_declarator:
 		  remove_node($1);
 		  $$ = $1;
 		}
-	/* This wasn't really a typedef name but an identifier that
-	   shadows one.  */
-	| TYPE
-		{ remove_node($1);
-		  $$ = $1;
-		}
 	| direct_abstract_declarator '(' parameter_declaration_clause ')'
 		{ $$ = $4; }
 	| direct_abstract_declarator '(' error ')'
@@ -440,9 +431,9 @@ member_specification:
 
 member_declaration:
 	decl_specifier_seq_opt member_declarator_list_opt ';'
-		{ $$ = $3; }
+		{ $$ = $3; dont_want_type_specifier = false; }
 	| error ';'
-		{ $$ = $2; }
+		{ $$ = $2; dont_want_type_specifier = false; }
 	;
 
 member_declarator_list_opt:
@@ -452,7 +443,9 @@ member_declarator_list_opt:
 
 member_declarator_list:
 	member_declarator
-	| member_declarator_list ',' member_declarator	{ $$ = $3; }
+		{ $$ = $1; dont_want_type_specifier = true; }
+	| member_declarator_list ',' member_declarator
+		{ $$ = $3; dont_want_type_specifier = true; }
 	;
 
 member_declarator:

From 668de2b9d48dccdc1b992e07287f15459515fefb Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 14 Jan 2025 00:00:43 +0900
Subject: [PATCH 098/368] genksyms: fix last 3 shift/reduce conflicts

The genksyms parser has ambiguities in its grammar, which are currently
suppressed by a workaround in scripts/genksyms/Makefile.

Building genksyms with W=1 generates the following warnings:

    YACC    scripts/genksyms/parse.tab.[ch]
  scripts/genksyms/parse.y: warning: 3 shift/reduce conflicts [-Wconflicts-sr]
  scripts/genksyms/parse.y: note: rerun with option '-Wcounterexamples' to generate conflict counterexamples

The ambiguity arises when decl_specifier_seq is followed by '(' because
the following two interpretations are possible:

  - decl_specifier_seq direct_abstract_declarator '(' parameter_declaration_clause ')'
  - decl_specifier_seq '(' abstract_declarator ')'

This issue occurs because the current parser allows an empty string to
be reduced to direct_abstract_declarator, which is incorrect.

K&R [1] explains the correct grammar:

    <parameter-declaration> ::= {<declaration-specifier>}+ <declarator>
                              | {<declaration-specifier>}+ <abstract-declarator>
                              | {<declaration-specifier>}+

    <abstract-declarator> ::= <pointer>
                            | <pointer> <direct-abstract-declarator>
                            | <direct-abstract-declarator>

    <direct-abstract-declarator> ::=  ( <abstract-declarator> )
                                   | {<direct-abstract-declarator>}? [ {<constant-expression>}? ]
                                   | {<direct-abstract-declarator>}? ( {<parameter-type-list>}? )

This commit resolves all remaining conflicts.

We need to consider the difference between the following two examples:

[Example 1] ( <abstract-declarator> ) can become <direct-abstract-declarator>

        void my_func(int (foo));

    ... is equivalent to:

        void my_func(int foo);

[Example 2] ( <parameter-type-list> ) can become <direct-abstract-declarator>

        typedef int foo;
        void my_func(int (foo));

    ... is equivalent to:

        void my_func(int (*callback)(int));

Please note that the function declaration is identical in both examples,
but the preceding typedef creates the distinction. I introduced a new
term, open_paren, to enable the type lookup immediately after the '('
token. Without this, we cannot distinguish between [Example 1] and
[Example 2].

[1]: https://cs.wmich.edu/~gupta/teaching/cs4850/sumII06/The%20syntax%20of%20C%20in%20Backus-Naur%20form.htm

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Nicolas Schier <n.schier@avm.de>
---
 scripts/genksyms/parse.y | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y
index dc575d467bbfe..fafce939c32f7 100644
--- a/scripts/genksyms/parse.y
+++ b/scripts/genksyms/parse.y
@@ -363,35 +363,47 @@ parameter_declaration_list:
 	;
 
 parameter_declaration:
-	decl_specifier_seq abstract_declarator
+	decl_specifier_seq abstract_declarator_opt
 		{ $$ = $2 ? $2 : $1; }
 	;
 
+abstract_declarator_opt:
+	/* empty */				{ $$ = NULL; }
+	| abstract_declarator
+	;
+
 abstract_declarator:
-	ptr_operator abstract_declarator
+	ptr_operator
+	| ptr_operator abstract_declarator
 		{ $$ = $2 ? $2 : $1; }
 	| direct_abstract_declarator
 		{ $$ = $1; dont_want_type_specifier = false; }
 	;
 
 direct_abstract_declarator:
-	/* empty */					{ $$ = NULL; }
-	| IDENT
+	  IDENT
 		{ /* For version 2 checksums, we don't want to remember
 		     private parameter names.  */
 		  remove_node($1);
 		  $$ = $1;
 		}
-	| direct_abstract_declarator '(' parameter_declaration_clause ')'
+	| direct_abstract_declarator open_paren parameter_declaration_clause ')'
 		{ $$ = $4; }
-	| direct_abstract_declarator '(' error ')'
+	| direct_abstract_declarator open_paren error ')'
 		{ $$ = $4; }
 	| direct_abstract_declarator BRACKET_PHRASE
 		{ $$ = $2; }
-	| '(' abstract_declarator ')'
+	| open_paren parameter_declaration_clause ')'
 		{ $$ = $3; }
-	| '(' error ')'
+	| open_paren abstract_declarator ')'
 		{ $$ = $3; }
+	| open_paren error ')'
+		{ $$ = $3; }
+	| BRACKET_PHRASE
+	;
+
+open_paren:
+	'('	{ $$ = $1; dont_want_type_specifier = false; }
 	;
 
 function_definition:

From a95298656c434357b38bec242412c65dcf6114d1 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 14 Jan 2025 00:00:44 +0900
Subject: [PATCH 099/368] genksyms: remove Makefile hack

This workaround was introduced for suppressing the reduce/reduce conflict
warnings because the %expect-rr directive, which is applicable only to GLR
parsers, cannot be used for genksyms.

Since there are no longer any conflicts, this Makefile hack is now
unnecessary.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Nicolas Schier <n.schier@avm.de>
---
 scripts/genksyms/Makefile | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/scripts/genksyms/Makefile b/scripts/genksyms/Makefile
index 312edccda7363..4350311fb7b39 100644
--- a/scripts/genksyms/Makefile
+++ b/scripts/genksyms/Makefile
@@ -4,24 +4,6 @@ hostprogs-always-y	+= genksyms
 
 genksyms-objs	:= genksyms.o parse.tab.o lex.lex.o
 
-# FIXME: fix the ambiguous grammar in parse.y and delete this hack
-#
-# Suppress shift/reduce, reduce/reduce conflicts warnings
-# unless W=1 is specified.
-#
-# Just in case, run "$(YACC) --version" without suppressing stderr
-# so that 'bison: not found' will be displayed if it is missing.
-ifeq ($(findstring 1,$(KBUILD_EXTRA_WARN)),)
-
-quiet_cmd_bison_no_warn = $(quiet_cmd_bison)
-      cmd_bison_no_warn = $(YACC) --version >/dev/null; \
-			  $(cmd_bison) 2>/dev/null
-
-$(obj)/pars%.tab.c $(obj)/pars%.tab.h: $(src)/pars%.y FORCE
-	$(call if_changed,bison_no_warn)
-
-endif
-
 # -I needed for generated C source to include headers in source tree
 HOSTCFLAGS_parse.tab.o := -I $(src)
 HOSTCFLAGS_lex.lex.o := -I $(src)

From c2f1846ba87ead7ac544be624c13249d6b90eca0 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 14 Jan 2025 00:00:45 +0900
Subject: [PATCH 100/368] genksyms: restrict direct-abstract-declarator to take
 one parameter-type-list

While there is no more grammatical ambiguity in genksyms, the parser
logic is still inaccurate.

For example, genksyms accepts the following invalid C code:

    void my_func(int ()(int));

This should result in a syntax error because () cannot be reduced to
<direct-abstract-declarator>.

( <abstract-declarator> ) can be reduced, but <abstract-declarator>
must not be empty in the following grammar from K&R [1]:

  <direct-abstract-declarator> ::=  ( <abstract-declarator> )
                                 | {<direct-abstract-declarator>}? [ {<constant-expression>}? ]
                                 | {<direct-abstract-declarator>}? ( {<parameter-type-list>}? )

Furthermore, genksyms accepts the following weird code:

    void my_func(int (*callback)(int)(int)(int));

The parser allows <direct-abstract-declarator> to recursively absorb
multiple ( {<parameter-type-list>}? ), but this behavior is incorrect.

In the example above, (*callback) should be followed by at most one
(int).

[1]: https://cs.wmich.edu/~gupta/teaching/cs4850/sumII06/The%20syntax%20of%20C%20in%20Backus-Naur%20form.htm

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Nicolas Schier <n.schier@avm.de>
---
 scripts/genksyms/parse.y | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y
index fafce939c32f7..03cdd8d53c13a 100644
--- a/scripts/genksyms/parse.y
+++ b/scripts/genksyms/parse.y
@@ -381,20 +381,24 @@ abstract_declarator:
 	;
 
 direct_abstract_declarator:
+	direct_abstract_declarator1
+	| direct_abstract_declarator1 open_paren parameter_declaration_clause ')'
+		{ $$ = $4; }
+	| open_paren parameter_declaration_clause ')'
+		{ $$ = $3; }
+	;
+
+direct_abstract_declarator1:
 	  IDENT
 		{ /* For version 2 checksums, we don't want to remember
 		     private parameter names.  */
 		  remove_node($1);
 		  $$ = $1;
 		}
-	| direct_abstract_declarator open_paren parameter_declaration_clause ')'
-		{ $$ = $4; }
-	| direct_abstract_declarator open_paren error ')'
+	| direct_abstract_declarator1 open_paren error ')'
 		{ $$ = $4; }
-	| direct_abstract_declarator BRACKET_PHRASE
+	| direct_abstract_declarator1 BRACKET_PHRASE
 		{ $$ = $2; }
-	| open_paren parameter_declaration_clause ')'
-		{ $$ = $3; }
 	| open_paren abstract_declarator ')'
 		{ $$ = $3; }
 	| open_paren error ')'

From aa710cee0d677043f49a447c4665df51a553a2ba Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 14 Jan 2025 00:00:46 +0900
Subject: [PATCH 101/368] genksyms: restrict direct-declarator to take one
 parameter-type-list

Similar to the previous commit, this change makes the parser logic a
little more accurate.

Currently, genksyms accepts the following invalid code:

    struct foo {
            int (*callback)(int)(int)(int);
    };

A direct-declarator should not recursively absorb multiple
( parameter-type-list ) constructs.

In the example above, (*callback) should be followed by at most one
(int).

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Nicolas Schier <n.schier@avm.de>
---
 scripts/genksyms/parse.y | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y
index 03cdd8d53c13a..33a6aab53b69d 100644
--- a/scripts/genksyms/parse.y
+++ b/scripts/genksyms/parse.y
@@ -331,12 +331,16 @@ nested_declarator:
 	;
 
 direct_nested_declarator:
-	IDENT	{ $$ = $1; dont_want_type_specifier = false; }
-	| direct_nested_declarator '(' parameter_declaration_clause ')'
+	direct_nested_declarator1
+	| direct_nested_declarator1 '(' parameter_declaration_clause ')'
 		{ $$ = $4; }
-	| direct_nested_declarator '(' error ')'
+	;
+
+direct_nested_declarator1:
+	IDENT	{ $$ = $1; dont_want_type_specifier = false; }
+	| direct_nested_declarator1 '(' error ')'
 		{ $$ = $4; }
-	| direct_nested_declarator BRACKET_PHRASE
+	| direct_nested_declarator1 BRACKET_PHRASE
 		{ $$ = $2; }
 	| '(' nested_declarator ')'
 		{ $$ = $3; }

From ccc11a195c69b0c01ee140aecadfbdcdcdd03605 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 14 Jan 2025 00:00:47 +0900
Subject: [PATCH 102/368] genksyms: record attributes consistently for
 init-declarator

I believe the missing action here is a bug.

For rules with no explicit action, the following default is used:

    { $$ = $1; }

However, in this case, $1 is the value of attribute_opt itself. As a
result, the value of attribute_opt is always NULL.

The following test code demonstrates inconsistent behavior.

    int x __attribute__((__aligned__(4)));
    int y __attribute__((__aligned__(4))) = 0;

The attribute is recorded only when followed by an initializer.

This commit adds the correct action to propagate the value of the
ATTRIBUTE_PHRASE token.

With this change, the attribute in the example above is consistently
recorded for both 'x' and 'y'.

[Before]

    $ cat <<EOF | scripts/genksyms/genksyms -d
    int x __attribute__((__aligned__(4)));
    int y __attribute__((__aligned__(4))) = 0;
    EOF
    Defn for type0 x == <int x >
    Defn for type0 y == <int y __attribute__ ( ( __aligned__ ( 4 ) ) ) >
    Hash table occupancy 2/4096 = 0.000488281

[After]

    $ cat <<EOF | scripts/genksyms/genksyms -d
    int x __attribute__((__aligned__(4)));
    int y __attribute__((__aligned__(4))) = 0;
    EOF
    Defn for type0 x == <int x __attribute__ ( ( __aligned__ ( 4 ) ) ) >
    Defn for type0 y == <int y __attribute__ ( ( __aligned__ ( 4 ) ) ) >
    Hash table occupancy 2/4096 = 0.000488281

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Nicolas Schier <n.schier@avm.de>
---
 scripts/genksyms/parse.y | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y
index 33a6aab53b69d..e3c1600461436 100644
--- a/scripts/genksyms/parse.y
+++ b/scripts/genksyms/parse.y
@@ -480,7 +480,7 @@ member_bitfield_declarator:
 
 attribute_opt:
 	/* empty */					{ $$ = NULL; }
-	| attribute_opt ATTRIBUTE_PHRASE
+	| attribute_opt ATTRIBUTE_PHRASE		{ $$ = $2; }
 	;
 
 enum_body:

From ec28bfff83c49b65527f0055e313d9d7c8c04a31 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 14 Jan 2025 00:00:48 +0900
Subject: [PATCH 103/368] genksyms: decouple ATTRIBUTE_PHRASE from
 type-qualifier

The __attribute__ keyword can appear in more contexts than 'const' or
'volatile'.

To avoid grammatical conflicts with future changes, ATTRIBUTE_PHRASE
should not be reduced into type_qualifier.

No functional changes are intended.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Nicolas Schier <n.schier@avm.de>
---
 scripts/genksyms/parse.y | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y
index e3c1600461436..cd933a95548d6 100644
--- a/scripts/genksyms/parse.y
+++ b/scripts/genksyms/parse.y
@@ -216,6 +216,7 @@ decl_specifier:
 		}
 	| type_specifier	{ dont_want_type_specifier = true; $$ = $1; }
 	| type_qualifier
+	| ATTRIBUTE_PHRASE
 	;
 
 storage_class_specifier:
@@ -285,11 +286,13 @@ type_qualifier_seq_opt:
 
 type_qualifier_seq:
 	type_qualifier
+	| ATTRIBUTE_PHRASE
 	| type_qualifier_seq type_qualifier		{ $$ = $2; }
+	| type_qualifier_seq ATTRIBUTE_PHRASE		{ $$ = $2; }
 	;
 
 type_qualifier:
-	CONST_KEYW | VOLATILE_KEYW | ATTRIBUTE_PHRASE
+	CONST_KEYW | VOLATILE_KEYW
 	| RESTRICT_KEYW
 		{ /* restrict has no effect in prototypes so ignore it */
 		  remove_node($1);

From 2966b66c94a2b0d897f8626b8f2c50a0fd4878a9 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 14 Jan 2025 00:00:49 +0900
Subject: [PATCH 104/368] genksyms: fix syntax error for attribute before
 abstract_declarator

A longstanding issue with genksyms is that it has hidden syntax errors.

When a syntax error occurs, yyerror() is called. However,
error_with_pos() is a no-op unless the -w option is provided.

You can observe syntax errors by manually passing the -w option.

For example, with CONFIG_MODVERSIONS=y on v6.13-rc1:

    $ make -s KCFLAGS=-D__GENKSYMS__ init/main.i
    $ cat init/main.i | scripts/genksyms/genksyms -w
        [ snip ]
    ./include/linux/efi.h:1225: syntax error

The syntax error occurs in the following code in include/linux/efi.h:

    efi_status_t
    efi_call_acpi_prm_handler(efi_status_t (__efiapi *handler_addr)(u64, void *),
                              u64 param_buffer_addr, void *context);

The issue arises from __efiapi, which is defined as either
__attribute__((ms_abi)) or __attribute__((regparm(0))).

This commit allows abstract_declarator to be prefixed with attributes.

To avoid conflicts, I tweaked the rule for decl_specifier_seq. Due to
this change, a standalone attribute cannot become decl_specifier_seq.
Otherwise, I do not know how to resolve the conflicts.

The following code, which was previously accepted by genksyms, will now
result in a syntax error:

    void my_func(__attribute__((unused))x);

I do not think it is a big deal because GCC also fails to parse it.

    $ echo 'void my_func(__attribute__((unused))x);' | gcc -c -x c -
    <stdin>:1:37: error: unknown type name 'x'

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Nicolas Schier <n.schier@avm.de>
---
 scripts/genksyms/parse.y | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y
index cd933a95548d6..54e16c2e0b4b0 100644
--- a/scripts/genksyms/parse.y
+++ b/scripts/genksyms/parse.y
@@ -203,8 +203,9 @@ decl_specifier_seq_opt:
 	;
 
 decl_specifier_seq:
-	decl_specifier				{ decl_spec = *$1; }
+	attribute_opt decl_specifier		{ decl_spec = *$2; }
 	| decl_specifier_seq decl_specifier	{ decl_spec = *$2; }
+	| decl_specifier_seq ATTRIBUTE_PHRASE	{ decl_spec = *$2; }
 	;
 
 decl_specifier:
@@ -216,7 +217,6 @@ decl_specifier:
 		}
 	| type_specifier	{ dont_want_type_specifier = true; $$ = $1; }
 	| type_qualifier
-	| ATTRIBUTE_PHRASE
 	;
 
 storage_class_specifier:
@@ -406,8 +406,8 @@ direct_abstract_declarator1:
 		{ $$ = $4; }
 	| direct_abstract_declarator1 BRACKET_PHRASE
 		{ $$ = $2; }
-	| open_paren abstract_declarator ')'
-		{ $$ = $3; }
+	| open_paren attribute_opt abstract_declarator ')'
+		{ $$ = $4; }
 	| open_paren error ')'
 		{ $$ = $3; }
 	| BRACKET_PHRASE

From a8b7d066f8626ec847d3e66aef1320968d1fe298 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 14 Jan 2025 00:00:50 +0900
Subject: [PATCH 105/368] genksyms: fix syntax error for attribute before
 nested_declarator

A longstanding issue with genksyms is that it has hidden syntax errors.

When a syntax error occurs, yyerror() is called. However,
error_with_pos() is a no-op unless the -w option is provided.

You can observe syntax errors by manually passing the -w option.

For example, with CONFIG_MODVERSIONS=y on v6.13-rc1:

    $ make -s KCFLAGS=-D__GENKSYMS__ drivers/acpi/prmt.i
    $ cat drivers/acpi/prmt.i | scripts/genksyms/genksyms -w
        [ snip ]
    drivers/acpi/prmt.c:56: syntax error

The syntax error occurs in the following code in drivers/acpi/prmt.c:

    struct prm_handler_info {
            [ snip ]
            efi_status_t (__efiapi *handler_addr)(u64, void *);
            [ snip ]
    };

The issue arises from __efiapi, which is defined as either
__attribute__((ms_abi)) or __attribute__((regparm(0))).

This commit allows nested_declarator to be prefixed with attributes.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Nicolas Schier <n.schier@avm.de>
---
 scripts/genksyms/parse.y | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y
index 54e16c2e0b4b0..49d3e536b9a87 100644
--- a/scripts/genksyms/parse.y
+++ b/scripts/genksyms/parse.y
@@ -345,8 +345,8 @@ direct_nested_declarator1:
 		{ $$ = $4; }
 	| direct_nested_declarator1 BRACKET_PHRASE
 		{ $$ = $2; }
-	| '(' nested_declarator ')'
-		{ $$ = $3; }
+	| '(' attribute_opt nested_declarator ')'
+		{ $$ = $4; }
 	| '(' error ')'
 		{ $$ = $3; }
 	;

From 2ac068cb0b366c61e7aebaccf0240eae8b2c1b43 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 14 Jan 2025 00:00:51 +0900
Subject: [PATCH 106/368] genksyms: fix syntax error for attribute after
 abstact_declarator

A longstanding issue with genksyms is that it has hidden syntax errors.

When a syntax error occurs, yyerror() is called. However,
error_with_pos() is a no-op unless the -w option is provided.

You can observe syntax errors by manually passing the -w option.

For example, with CONFIG_MODVERSIONS=y on v6.13-rc1:

    $ make -s KCFLAGS=-D__GENKSYMS__ kernel/module/main.i
    $ cat kernel/module/main.i | scripts/genksyms/genksyms -w
        [ snip ]
    kernel/module/main.c:97: syntax error

The syntax error occurs in the following code in kernel/module/main.c:

    static void __mod_update_bounds(enum mod_mem_type type __maybe_unused, void *base,
                                    unsigned int size, struct mod_tree_root *tree)
    {
            [ snip ]
    }

The issue arises from __maybe_unused, which is defined as
__attribute__((__unused__)).

This commit allows direct_abstract_declarator to be followed with
attributes.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Nicolas Schier <n.schier@avm.de>
---
 scripts/genksyms/parse.y | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y
index 49d3e536b9a87..82774df506421 100644
--- a/scripts/genksyms/parse.y
+++ b/scripts/genksyms/parse.y
@@ -383,8 +383,8 @@ abstract_declarator:
 	ptr_operator
 	| ptr_operator abstract_declarator
 		{ $$ = $2 ? $2 : $1; }
-	| direct_abstract_declarator
-		{ $$ = $1; dont_want_type_specifier = false; }
+	| direct_abstract_declarator attribute_opt
+		{ $$ = $2; dont_want_type_specifier = false; }
 	;
 
 direct_abstract_declarator:

From 82db1c29103ebf581484c0b30805e68726121dcb Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 14 Jan 2025 00:00:52 +0900
Subject: [PATCH 107/368] genksyms: fix syntax error for attribute after
 'struct'

A longstanding issue with genksyms is that it has hidden syntax errors.

When a syntax error occurs, yyerror() is called. However,
error_with_pos() is a no-op unless the -w option is provided.

You can observe syntax errors by manually passing the -w option.

For example, with CONFIG_MODVERSIONS=y on v6.13-rc1:

    $ make -s KCFLAGS=-D__GENKSYMS__ arch/x86/kernel/cpu/mshyperv.i
    $ cat arch/x86/kernel/cpu/mshyperv.i | scripts/genksyms/genksyms -w
        [ snip ]
    ./arch/x86/include/asm/svm.h:122: syntax error

The syntax error occurs in the following code in arch/x86/include/asm/svm.h:

    struct __attribute__ ((__packed__)) vmcb_control_area {
            [ snip ]
    };

The issue arises from __attribute__ immediately after the 'struct'
keyword.

This commit allows the 'struct' keyword to be followed by attributes.

The lexer must be adjusted because dont_want_brace_phase should not be
decremented while processing attributes.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Nicolas Schier <n.schier@avm.de>
---
 scripts/genksyms/lex.l   |  7 ++++++-
 scripts/genksyms/parse.y | 10 +++++-----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/scripts/genksyms/lex.l b/scripts/genksyms/lex.l
index e886133af5783..a1f969dcf24f1 100644
--- a/scripts/genksyms/lex.l
+++ b/scripts/genksyms/lex.l
@@ -438,7 +438,12 @@ fini:
 
   if (suppress_type_lookup > 0)
     --suppress_type_lookup;
-  if (dont_want_brace_phrase > 0)
+
+  /*
+   *  __attribute__() can be placed immediately after the 'struct' keyword.
+   *  e.g.) struct __attribute__((__packed__)) foo { ... };
+   */
+  if (token != ATTRIBUTE_PHRASE && dont_want_brace_phrase > 0)
     --dont_want_brace_phrase;
 
   yylval = &next_node->next;
diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y
index 82774df506421..33639232a709e 100644
--- a/scripts/genksyms/parse.y
+++ b/scripts/genksyms/parse.y
@@ -234,16 +234,16 @@ type_specifier:
 
 	/* References to s/u/e's defined elsewhere.  Rearrange things
 	   so that it is easier to expand the definition fully later.  */
-	| STRUCT_KEYW IDENT
-		{ remove_node($1); (*$2)->tag = SYM_STRUCT; $$ = $2; }
+	| STRUCT_KEYW attribute_opt IDENT
+		{ remove_node($1); (*$3)->tag = SYM_STRUCT; $$ = $3; }
 	| UNION_KEYW IDENT
 		{ remove_node($1); (*$2)->tag = SYM_UNION; $$ = $2; }
 	| ENUM_KEYW IDENT
 		{ remove_node($1); (*$2)->tag = SYM_ENUM; $$ = $2; }
 
 	/* Full definitions of an s/u/e.  Record it.  */
-	| STRUCT_KEYW IDENT class_body
-		{ record_compound($1, $2, $3, SYM_STRUCT); $$ = $3; }
+	| STRUCT_KEYW attribute_opt IDENT class_body
+		{ record_compound($1, $3, $4, SYM_STRUCT); $$ = $4; }
 	| UNION_KEYW IDENT class_body
 		{ record_compound($1, $2, $3, SYM_UNION); $$ = $3; }
 	| ENUM_KEYW IDENT enum_body
@@ -254,7 +254,7 @@ type_specifier:
 	| ENUM_KEYW enum_body
 		{ add_symbol(NULL, SYM_ENUM, NULL, 0); $$ = $2; }
 	/* Anonymous s/u definitions.  Nothing needs doing.  */
-	| STRUCT_KEYW class_body			{ $$ = $2; }
+	| STRUCT_KEYW attribute_opt class_body		{ $$ = $3; }
 	| UNION_KEYW class_body				{ $$ = $2; }
 	;
 

From 97bbf9e312c3fbaf0baa56120238825d2eb23b8a Mon Sep 17 00:00:00 2001
From: Denis Arefev <arefev@swemel.ru>
Date: Mon, 2 Dec 2024 12:36:52 +0300
Subject: [PATCH 108/368] ubi: Add a check for ubi_num

Added a check for ubi_num for negative numbers
If the variable ubi_num takes negative values then we get:

qemu-system-arm ... -append "ubi.mtd=0,0,0,-22222345" ...
[    0.745065]  ubi_attach_mtd_dev from ubi_init+0x178/0x218
[    0.745230]  ubi_init from do_one_initcall+0x70/0x1ac
[    0.745344]  do_one_initcall from kernel_init_freeable+0x198/0x224
[    0.745474]  kernel_init_freeable from kernel_init+0x18/0x134
[    0.745600]  kernel_init from ret_from_fork+0x14/0x28
[    0.745727] Exception stack(0x90015fb0 to 0x90015ff8)

Found by Linux Verification Center (linuxtesting.org) with SVACE.

Fixes: 83ff59a06663 ("UBI: support ubi_num on mtd.ubi command line")
Cc: stable@vger.kernel.org
Signed-off-by: Denis Arefev <arefev@swemel.ru>
Reviewed-by: Zhihao Cheng <chengzhihao1@huawei.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/ubi/build.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 30be4ed68fad2..ef6a22f372f95 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -1537,7 +1537,7 @@ static int ubi_mtd_param_parse(const char *val, const struct kernel_param *kp)
 	if (token) {
 		int err = kstrtoint(token, 10, &p->ubi_num);
 
-		if (err) {
+		if (err || p->ubi_num < UBI_DEV_NUM_AUTO) {
 			pr_err("UBI error: bad value for ubi_num parameter: %s\n",
 			       token);
 			return -EINVAL;

From 923d3583ead133da742b42d9debbb7d5c5a56587 Mon Sep 17 00:00:00 2001
From: Pintu Kumar <quic_pintu@quicinc.com>
Date: Mon, 9 Dec 2024 19:29:36 +0530
Subject: [PATCH 109/368] ubifs: dump_lpt_leb: remove return at end of void
 function

Noticed that there is a useless return statement at the end of void
function dump_lpt_leb().
Just removing it.

Signed-off-by: Pintu Kumar <quic_pintu@quicinc.com>
Reviewed-by: Zhihao Cheng <chengzhihao1@huawei.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/lpt_commit.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index aa8837e6247cf..f2cb214581fd9 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -1932,7 +1932,6 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
 	pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum);
 out:
 	vfree(buf);
-	return;
 }
 
 /**

From 404de7abc05758254ad57d9501d30427d9c57417 Mon Sep 17 00:00:00 2001
From: Pintu Kumar <quic_pintu@quicinc.com>
Date: Mon, 9 Dec 2024 21:51:04 +0530
Subject: [PATCH 110/368] ubifs: ubifs_dump_leb: remove return from end of void
 function

Noticed that there is a useless return statement at the end of void
function ubifs_dump_leb().
Just removed it.

Signed-off-by: Pintu Kumar <quic_pintu@quicinc.com>
Reviewed-by: Zhihao Cheng <chengzhihao1@huawei.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/debug.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 5cc69beaa62ec..987eb5b6782ab 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -863,7 +863,6 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum)
 
 out:
 	vfree(buf);
-	return;
 }
 
 void ubifs_dump_znode(const struct ubifs_info *c,

From 844c6fdc13cf3d9d251533631988a58f8356a8c8 Mon Sep 17 00:00:00 2001
From: Zhihao Cheng <chengzhihao1@huawei.com>
Date: Sat, 14 Dec 2024 19:01:53 +0800
Subject: [PATCH 111/368] ubi: Revert "ubi: wl: Close down wear-leveling before
 nand is suspended"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 5580cdae05ae ("ubi: wl: Close down wear-leveling before nand is
suspended") added a reboot notification in UBI layer to shutdown the
wear-leveling subsystem, which imported an UAF problem[1]. Besides that,
the method also brings other potential UAF problems, for example:
       reboot             kworker
 ubi_wl_reboot_notifier
  ubi_wl_close
   ubi_fastmap_close
    kfree(ubi->fm)
                     update_fastmap_work_fn
		      ubi_update_fastmap
		       old_fm = ubi->fm
		       if (old_fm && old_fm->e[i]) // UAF!

Actually, the problem fixed by commit 5580cdae05ae ("ubi: wl: Close down
wear-leveling before nand is suspended") has been solved by commit
8cba323437a4 ("mtd: rawnand: protect access to rawnand devices while in
suspend"), which was discussed in [2]. So we can revert the commit
5580cdae05ae ("ubi: wl: Close down wear-leveling before nand is
suspended") directly.

[1] https://lore.kernel.org/linux-mtd/20241208175211.9406-2-dennis.lamerice@gmail.com/
[2] https://lore.kernel.org/all/9bf76f5d-12a4-46ff-90d4-4a7f0f47c381@axis.com/

Fixes: 5580cdae05ae ("ubi: wl: Close down wear-leveling before nand is suspended")
Reported-by: Dennis Lam <dennis.lamerice@gmail.com>
Closes: https://lore.kernel.org/linux-mtd/20241208175211.9406-2-dennis.lamerice@gmail.com/
Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
Acked-by: Mårten Lindahl <marten.lindahl@axis.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/ubi/ubi.h |  2 --
 drivers/mtd/ubi/wl.c  | 21 ---------------------
 2 files changed, 23 deletions(-)

diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index 26cc53ad34ec7..c792b9bcab9bc 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -549,7 +549,6 @@ struct ubi_debug_info {
  * @peb_buf: a buffer of PEB size used for different purposes
  * @buf_mutex: protects @peb_buf
  * @ckvol_mutex: serializes static volume checking when opening
- * @wl_reboot_notifier: close all wear-leveling work before reboot
  *
  * @dbg: debugging information for this UBI device
  */
@@ -652,7 +651,6 @@ struct ubi_device {
 	void *peb_buf;
 	struct mutex buf_mutex;
 	struct mutex ckvol_mutex;
-	struct notifier_block wl_reboot_notifier;
 
 	struct ubi_debug_info dbg;
 };
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index 4f6f339d8fb8a..fbd399cf65033 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -89,7 +89,6 @@
 #include <linux/crc32.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
-#include <linux/reboot.h>
 #include "ubi.h"
 #include "wl.h"
 
@@ -128,8 +127,6 @@ static int self_check_in_wl_tree(const struct ubi_device *ubi,
 				 struct ubi_wl_entry *e, struct rb_root *root);
 static int self_check_in_pq(const struct ubi_device *ubi,
 			    struct ubi_wl_entry *e);
-static int ubi_wl_reboot_notifier(struct notifier_block *n,
-				  unsigned long state, void *cmd);
 
 /**
  * wl_tree_add - add a wear-leveling entry to a WL RB-tree.
@@ -1953,13 +1950,6 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai)
 	if (!ubi->ro_mode && !ubi->fm_disabled)
 		ubi_ensure_anchor_pebs(ubi);
 #endif
-
-	if (!ubi->wl_reboot_notifier.notifier_call) {
-		ubi->wl_reboot_notifier.notifier_call = ubi_wl_reboot_notifier;
-		ubi->wl_reboot_notifier.priority = 1; /* Higher than MTD */
-		register_reboot_notifier(&ubi->wl_reboot_notifier);
-	}
-
 	return 0;
 
 out_free:
@@ -2005,17 +1995,6 @@ void ubi_wl_close(struct ubi_device *ubi)
 	kfree(ubi->lookuptbl);
 }
 
-static int ubi_wl_reboot_notifier(struct notifier_block *n,
-				  unsigned long state, void *cmd)
-{
-	struct ubi_device *ubi;
-
-	ubi = container_of(n, struct ubi_device, wl_reboot_notifier);
-	ubi_wl_close(ubi);
-
-	return NOTIFY_DONE;
-}
-
 /**
  * self_check_ec - make sure that the erase counter of a PEB is correct.
  * @ubi: UBI device description object

From bdb0ca39e0acccf6771db49c3f94ed787d05f2d7 Mon Sep 17 00:00:00 2001
From: pangliyuan <pangliyuan1@huawei.com>
Date: Tue, 24 Dec 2024 16:18:23 +0800
Subject: [PATCH 112/368] ubifs: skip dumping tnc tree when zroot is null

Clearing slab cache will free all znode in memory and make
c->zroot.znode = NULL, then dumping tnc tree will access
c->zroot.znode which cause null pointer dereference.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=219624#c0
Fixes: 1e51764a3c2a ("UBIFS: add new flash file system")
Signed-off-by: pangliyuan <pangliyuan1@huawei.com>
Reviewed-by: Zhihao Cheng <chengzhihao1@huawei.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/debug.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 987eb5b6782ab..b01f382ce8db0 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -945,16 +945,20 @@ void ubifs_dump_tnc(struct ubifs_info *c)
 
 	pr_err("\n");
 	pr_err("(pid %d) start dumping TNC tree\n", current->pid);
-	znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL);
-	level = znode->level;
-	pr_err("== Level %d ==\n", level);
-	while (znode) {
-		if (level != znode->level) {
-			level = znode->level;
-			pr_err("== Level %d ==\n", level);
+	if (c->zroot.znode) {
+		znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL);
+		level = znode->level;
+		pr_err("== Level %d ==\n", level);
+		while (znode) {
+			if (level != znode->level) {
+				level = znode->level;
+				pr_err("== Level %d ==\n", level);
+			}
+			ubifs_dump_znode(c, znode);
+			znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode);
 		}
-		ubifs_dump_znode(c, znode);
-		znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode);
+	} else {
+		pr_err("empty TNC tree in memory\n");
 	}
 	pr_err("(pid %d) finish dumping TNC tree\n", current->pid);
 }

From 3156ceb222414456084d964f43ada071206039b8 Mon Sep 17 00:00:00 2001
From: Rickard Andersson <rickard.andersson@axis.com>
Date: Mon, 16 Dec 2024 09:54:19 +0100
Subject: [PATCH 113/368] ubi: Expose interface for detailed erase counters

Using the ioctl command 'UBI_IOCECNFO' user space can obtain
detailed erase counter information of all blocks of a device.

Signed-off-by: Rickard Andersson <rickard.andersson@axis.com>
Reviewed-by: Zhihao Cheng <chengzhihao1@huawei.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 include/uapi/mtd/ubi-user.h | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/include/uapi/mtd/ubi-user.h b/include/uapi/mtd/ubi-user.h
index e1571603175e7..aa872a41ffb9b 100644
--- a/include/uapi/mtd/ubi-user.h
+++ b/include/uapi/mtd/ubi-user.h
@@ -175,6 +175,8 @@
 #define UBI_IOCRPEB _IOW(UBI_IOC_MAGIC, 4, __s32)
 /* Force scrubbing on the specified PEB */
 #define UBI_IOCSPEB _IOW(UBI_IOC_MAGIC, 5, __s32)
+/* Read detailed device erase counter information */
+#define UBI_IOCECNFO _IOWR(UBI_IOC_MAGIC, 6, struct ubi_ecinfo_req)
 
 /* ioctl commands of the UBI control character device */
 
@@ -412,6 +414,37 @@ struct ubi_rnvol_req {
 	} ents[UBI_MAX_RNVOL];
 } __packed;
 
+/**
+ * struct ubi_ecinfo_req - a data structure used for requesting and receiving
+ * erase block counter information from a UBI device.
+ *
+ * @start: index of first physical erase block to read (in)
+ * @length: number of erase counters to read (in)
+ * @read_length: number of erase counters that was actually read (out)
+ * @padding: reserved for future, not used, has to be zeroed
+ * @erase_counters: array of erase counter values (out)
+ *
+ * This structure is used to retrieve erase counter information for a specified
+ * range of PEBs on a UBI device.
+ * Erase counters are read from @start and attempts to read @length number of
+ * erase counters.
+ * The retrieved values are stored in the @erase_counters array. It is the
+ * responsibility of the caller to allocate enough memory for storing @length
+ * elements in the @erase_counters array.
+ * If a block is bad or if the erase counter is unknown the corresponding value
+ * in the array will be set to -1.
+ * The @read_length field will indicate the number of erase counters actually
+ * read. Typically @read_length will be limited due to memory or the number of
+ * PEBs on the UBI device.
+ */
+struct ubi_ecinfo_req {
+	__s32 start;
+	__s32 length;
+	__s32 read_length;
+	__s8  padding[16];
+	__s32 erase_counters[];
+}  __packed;
+
 /**
  * struct ubi_leb_change_req - a data structure used in atomic LEB change
  *                             requests.

From 01099f635a4c68b8574d350a972ba062dd5142e9 Mon Sep 17 00:00:00 2001
From: Rickard Andersson <rickard.andersson@axis.com>
Date: Mon, 16 Dec 2024 09:54:20 +0100
Subject: [PATCH 114/368] ubi: Implement ioctl for detailed erase counters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, "max_ec" can be read from sysfs, which provides a limited
view of the flash device’s wear. In certain cases, such as bugs in
the wear-leveling algorithm, specific blocks can be worn down more
than others, resulting in uneven wear distribution. Also some use cases
can wear the erase blocks of the fastmap area more heavily than other
parts of flash.
Providing detailed erase counter values give a better understanding of
the overall flash wear and is needed to be able to calculate for example
expected life time.
There exists more detailed info in debugfs, but this information is
only available for debug builds.

Signed-off-by: Rickard Andersson <rickard.andersson@axis.com>
Tested-by: Zhihao Cheng <chengzhihao1@huawei.com>
Reviewed-by: Zhihao Cheng <chengzhihao1@huawei.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/ubi/cdev.c | 69 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 6bb80d7714bc8..4c3e4edb68532 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -828,6 +828,69 @@ static int rename_volumes(struct ubi_device *ubi,
 	return err;
 }
 
+static int ubi_get_ec_info(struct ubi_device *ubi, struct ubi_ecinfo_req __user *ureq)
+{
+	struct ubi_ecinfo_req req;
+	struct ubi_wl_entry *wl;
+	int read_cnt;
+	int peb;
+	int end_peb;
+
+	/* Copy the input arguments */
+	if (copy_from_user(&req, ureq, sizeof(struct ubi_ecinfo_req)))
+		return -EFAULT;
+
+	/* Check input arguments */
+	if (req.length <= 0 || req.start < 0 || req.start >= ubi->peb_count)
+		return -EINVAL;
+
+	if (check_add_overflow(req.start, req.length, &end_peb))
+		return -EINVAL;
+
+	if (end_peb > ubi->peb_count)
+		end_peb = ubi->peb_count;
+
+	/* Check access rights before filling erase_counters array */
+	if (!access_ok(ureq->erase_counters, (end_peb-req.start) * sizeof(int32_t)))
+		return -EFAULT;
+
+	/* Fill erase counter array */
+	read_cnt = 0;
+	for (peb = req.start; peb < end_peb; read_cnt++, peb++) {
+		int ec;
+
+		if (ubi_io_is_bad(ubi, peb)) {
+			if (__put_user(UBI_UNKNOWN, ureq->erase_counters+read_cnt))
+				return -EFAULT;
+
+			continue;
+		}
+
+		spin_lock(&ubi->wl_lock);
+
+		wl = ubi->lookuptbl[peb];
+		if (wl)
+			ec = wl->ec;
+		else
+			ec = UBI_UNKNOWN;
+
+		spin_unlock(&ubi->wl_lock);
+
+		if (__put_user(ec, ureq->erase_counters+read_cnt))
+			return -EFAULT;
+
+	}
+
+	/* Return actual read length */
+	req.read_length = read_cnt;
+
+	/* Copy everything except erase counter array */
+	if (copy_to_user(ureq, &req, sizeof(struct ubi_ecinfo_req)))
+		return -EFAULT;
+
+	return 0;
+}
+
 static long ubi_cdev_ioctl(struct file *file, unsigned int cmd,
 			   unsigned long arg)
 {
@@ -991,6 +1054,12 @@ static long ubi_cdev_ioctl(struct file *file, unsigned int cmd,
 		break;
 	}
 
+	case UBI_IOCECNFO:
+	{
+		err = ubi_get_ec_info(ubi, argp);
+		break;
+	}
+
 	default:
 		err = -ENOTTY;
 		break;

From 9d87cf525fd2e1a5fcbbb40ee3df216d1d266c88 Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@rivosinc.com>
Date: Wed, 15 Jan 2025 10:02:51 -0800
Subject: [PATCH 115/368] RISC-V: Mark riscv_v_init() as __init

This trips up with Xtheadvector enabled, but as far as I can tell it's
just been an issue since the original patchset.

Fixes: 7ca7a7b9b635 ("riscv: Add sysctl to set the default vector rule for new processes")
Reviewed-by: Charlie Jenkins <charlie@rivosinc.com>
Tested-by: Charlie Jenkins <charlie@rivosinc.com>
Link: https://lore.kernel.org/r/20250115180251.31444-1-palmer@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/kernel/vector.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
index 821818886fab0..39f0577f580de 100644
--- a/arch/riscv/kernel/vector.c
+++ b/arch/riscv/kernel/vector.c
@@ -309,7 +309,7 @@ static int __init riscv_v_sysctl_init(void)
 static int __init riscv_v_sysctl_init(void) { return 0; }
 #endif /* ! CONFIG_SYSCTL */
 
-static int riscv_v_init(void)
+static int __init riscv_v_init(void)
 {
 	return riscv_v_sysctl_init();
 }

From e576b7cb818343e2dc740185fbea6af580763dde Mon Sep 17 00:00:00 2001
From: Charlie Jenkins <charlie@rivosinc.com>
Date: Wed, 13 Nov 2024 18:21:07 -0800
Subject: [PATCH 116/368] dt-bindings: riscv: Add xtheadvector ISA extension
 description

The xtheadvector ISA extension is described on the T-Head extension spec
Github page [1] at commit 95358cb2cca9.

Link: https://github.com/T-head-Semi/thead-extension-spec/blob/95358cb2cca9489361c61d335e03d3134b14133f/xtheadvector.adoc [1]

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Tested-by: Yangyu Chen <cyy@cyyself.name>
Link: https://lore.kernel.org/r/20241113-xtheadvector-v11-1-236c22791ef9@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 .../devicetree/bindings/riscv/extensions.yaml          | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml
index af7e5237b2c03..b49278e2f2aa3 100644
--- a/Documentation/devicetree/bindings/riscv/extensions.yaml
+++ b/Documentation/devicetree/bindings/riscv/extensions.yaml
@@ -593,6 +593,10 @@ properties:
             latency, as ratified in commit 56ed795 ("Update
             riscv-crypto-spec-vector.adoc") of riscv-crypto.
 
+        # vendor extensions, each extension sorted alphanumerically under the
+        # vendor they belong to. Vendors are sorted alphanumerically as well.
+
+        # Andes
         - const: xandespmu
           description:
             The Andes Technology performance monitor extension for counter overflow
@@ -600,6 +604,12 @@ properties:
             Registers in the AX45MP datasheet.
             https://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf
 
+        # T-HEAD
+        - const: xtheadvector
+          description:
+            The T-HEAD specific 0.7.1 vector implementation as written in
+            https://github.com/T-head-Semi/thead-extension-spec/blob/95358cb2cca9489361c61d335e03d3134b14133f/xtheadvector.adoc.
+
     allOf:
       # Zcb depends on Zca
       - if:

From bf6279b38a4bbdb2954c3d159523d41367763a48 Mon Sep 17 00:00:00 2001
From: Charlie Jenkins <charlie@rivosinc.com>
Date: Wed, 13 Nov 2024 18:21:08 -0800
Subject: [PATCH 117/368] dt-bindings: cpus: add a thead vlen register length
 property

Add a property analogous to the vlenb CSR so that software can detect
the vector length of each CPU prior to it being brought online.
Currently software has to assume that the vector length read from the
boot CPU applies to all possible CPUs. On T-Head CPUs implementing
pre-ratification vector, reading the th.vlenb CSR may produce an illegal
instruction trap, so this property is required on such systems.

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Tested-by: Yangyu Chen <cyy@cyyself.name>
Link: https://lore.kernel.org/r/20241113-xtheadvector-v11-2-236c22791ef9@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 .../devicetree/bindings/riscv/cpus.yaml       | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml
index 8edc8261241ad..c0cf6cf56749d 100644
--- a/Documentation/devicetree/bindings/riscv/cpus.yaml
+++ b/Documentation/devicetree/bindings/riscv/cpus.yaml
@@ -26,6 +26,18 @@ description: |
 allOf:
   - $ref: /schemas/cpu.yaml#
   - $ref: extensions.yaml
+  - if:
+      not:
+        properties:
+          compatible:
+            contains:
+              enum:
+                - thead,c906
+                - thead,c910
+                - thead,c920
+    then:
+      properties:
+        thead,vlenb: false
 
 properties:
   compatible:
@@ -95,6 +107,13 @@ properties:
     description:
       The blocksize in bytes for the Zicboz cache operations.
 
+  thead,vlenb:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      VLEN/8, the vector register length in bytes. This property is required on
+      thead systems where the vector register length is not identical on all harts, or
+      the vlenb CSR is not available.
+
   # RISC-V has multiple properties for cache op block sizes as the sizes
   # differ between individual CBO extensions
   cache-op-block-size: false

From ce1daeeba600a79b776864f12d19e799f1eb124f Mon Sep 17 00:00:00 2001
From: Charlie Jenkins <charlie@rivosinc.com>
Date: Wed, 13 Nov 2024 18:21:09 -0800
Subject: [PATCH 118/368] riscv: dts: allwinner: Add xtheadvector to the D1/D1s
 devicetree

The D1/D1s SoCs support xtheadvector so it can be included in the
devicetree. Also include vlenb for the cpu.

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Tested-by: Yangyu Chen <cyy@cyyself.name>
Link: https://lore.kernel.org/r/20241113-xtheadvector-v11-3-236c22791ef9@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi b/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi
index 64c3c2e6cbe02..6367112e614a1 100644
--- a/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi
+++ b/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi
@@ -27,7 +27,8 @@
 			riscv,isa = "rv64imafdc";
 			riscv,isa-base = "rv64i";
 			riscv,isa-extensions = "i", "m", "a", "f", "d", "c", "zicntr", "zicsr",
-					       "zifencei", "zihpm";
+					       "zifencei", "zihpm", "xtheadvector";
+			thead,vlenb = <128>;
 			#cooling-cells = <2>;
 
 			cpu0_intc: interrupt-controller {

From cddd63869f9214f2bc5c4b89a8ea1bd0ff4d89c5 Mon Sep 17 00:00:00 2001
From: Charlie Jenkins <charlie@rivosinc.com>
Date: Wed, 13 Nov 2024 18:21:10 -0800
Subject: [PATCH 119/368] riscv: Add thead and xtheadvector as a vendor
 extension

Add support to the kernel for THead vendor extensions with the target of
the new extension xtheadvector.

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Tested-by: Yangyu Chen <cyy@cyyself.name>
Link: https://lore.kernel.org/r/20241113-xtheadvector-v11-4-236c22791ef9@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/Kconfig.vendor                      | 13 +++++++++++++
 .../include/asm/vendor_extensions/thead.h      | 16 ++++++++++++++++
 arch/riscv/kernel/cpufeature.c                 |  1 +
 arch/riscv/kernel/vendor_extensions.c          | 10 ++++++++++
 arch/riscv/kernel/vendor_extensions/Makefile   |  1 +
 arch/riscv/kernel/vendor_extensions/thead.c    | 18 ++++++++++++++++++
 6 files changed, 59 insertions(+)
 create mode 100644 arch/riscv/include/asm/vendor_extensions/thead.h
 create mode 100644 arch/riscv/kernel/vendor_extensions/thead.c

diff --git a/arch/riscv/Kconfig.vendor b/arch/riscv/Kconfig.vendor
index 6f1cdd32ed29a..9897442bd44ff 100644
--- a/arch/riscv/Kconfig.vendor
+++ b/arch/riscv/Kconfig.vendor
@@ -16,4 +16,17 @@ config RISCV_ISA_VENDOR_EXT_ANDES
 	  If you don't know what to do here, say Y.
 endmenu
 
+menu "T-Head"
+config RISCV_ISA_VENDOR_EXT_THEAD
+	bool "T-Head vendor extension support"
+	select RISCV_ISA_VENDOR_EXT
+	default y
+	help
+	  Say N here to disable detection of and support for all T-Head vendor
+	  extensions. Without this option enabled, T-Head vendor extensions will
+	  not be detected at boot and their presence not reported to userspace.
+
+	  If you don't know what to do here, say Y.
+endmenu
+
 endmenu
diff --git a/arch/riscv/include/asm/vendor_extensions/thead.h b/arch/riscv/include/asm/vendor_extensions/thead.h
new file mode 100644
index 0000000000000..48421d1553ada
--- /dev/null
+++ b/arch/riscv/include/asm/vendor_extensions/thead.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RISCV_VENDOR_EXTENSIONS_THEAD_H
+#define _ASM_RISCV_VENDOR_EXTENSIONS_THEAD_H
+
+#include <asm/vendor_extensions.h>
+
+#include <linux/types.h>
+
+/*
+ * Extension keys must be strictly less than RISCV_ISA_VENDOR_EXT_MAX.
+ */
+#define RISCV_ISA_VENDOR_EXT_XTHEADVECTOR		0
+
+extern struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_thead;
+
+#endif
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index eb904ca64ad03..d752291d829bb 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -25,6 +25,7 @@
 #include <asm/sbi.h>
 #include <asm/vector.h>
 #include <asm/vendor_extensions.h>
+#include <asm/vendor_extensions/thead.h>
 
 #define NUM_ALPHA_EXTS ('z' - 'a' + 1)
 
diff --git a/arch/riscv/kernel/vendor_extensions.c b/arch/riscv/kernel/vendor_extensions.c
index a8126d1183412..a31ff84740ebc 100644
--- a/arch/riscv/kernel/vendor_extensions.c
+++ b/arch/riscv/kernel/vendor_extensions.c
@@ -6,6 +6,7 @@
 #include <asm/vendorid_list.h>
 #include <asm/vendor_extensions.h>
 #include <asm/vendor_extensions/andes.h>
+#include <asm/vendor_extensions/thead.h>
 
 #include <linux/array_size.h>
 #include <linux/types.h>
@@ -14,6 +15,9 @@ struct riscv_isa_vendor_ext_data_list *riscv_isa_vendor_ext_list[] = {
 #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES
 	&riscv_isa_vendor_ext_list_andes,
 #endif
+#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD
+	&riscv_isa_vendor_ext_list_thead,
+#endif
 };
 
 const size_t riscv_isa_vendor_ext_list_size = ARRAY_SIZE(riscv_isa_vendor_ext_list);
@@ -41,6 +45,12 @@ bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsig
 		cpu_bmap = riscv_isa_vendor_ext_list_andes.per_hart_isa_bitmap;
 		break;
 	#endif
+	#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD
+	case THEAD_VENDOR_ID:
+		bmap = &riscv_isa_vendor_ext_list_thead.all_harts_isa_bitmap;
+		cpu_bmap = riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap;
+		break;
+	#endif
 	default:
 		return false;
 	}
diff --git a/arch/riscv/kernel/vendor_extensions/Makefile b/arch/riscv/kernel/vendor_extensions/Makefile
index 6a61aed944f17..353522cb3bf09 100644
--- a/arch/riscv/kernel/vendor_extensions/Makefile
+++ b/arch/riscv/kernel/vendor_extensions/Makefile
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
 obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_ANDES)	+= andes.o
+obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD)	+= thead.o
diff --git a/arch/riscv/kernel/vendor_extensions/thead.c b/arch/riscv/kernel/vendor_extensions/thead.c
new file mode 100644
index 0000000000000..0f27baf8d2458
--- /dev/null
+++ b/arch/riscv/kernel/vendor_extensions/thead.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <asm/cpufeature.h>
+#include <asm/vendor_extensions.h>
+#include <asm/vendor_extensions/thead.h>
+
+#include <linux/array_size.h>
+#include <linux/types.h>
+
+/* All T-Head vendor extensions supported in Linux */
+static const struct riscv_isa_ext_data riscv_isa_vendor_ext_thead[] = {
+	__RISCV_ISA_EXT_DATA(xtheadvector, RISCV_ISA_VENDOR_EXT_XTHEADVECTOR),
+};
+
+struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_thead = {
+	.ext_data_count = ARRAY_SIZE(riscv_isa_vendor_ext_thead),
+	.ext_data = riscv_isa_vendor_ext_thead,
+};

From 377be47f90e411c10440650864d72d2ecb639bd7 Mon Sep 17 00:00:00 2001
From: Charlie Jenkins <charlie@rivosinc.com>
Date: Wed, 13 Nov 2024 18:21:11 -0800
Subject: [PATCH 120/368] riscv: vector: Use vlenb from DT for thead

If thead,vlenb is provided in the device tree, prefer that over reading
the vlenb csr.

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Tested-by: Yangyu Chen <cyy@cyyself.name>
Link: https://lore.kernel.org/r/20241113-xtheadvector-v11-5-236c22791ef9@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/Kconfig.vendor                     | 13 +++++
 arch/riscv/include/asm/cpufeature.h           |  2 +
 .../include/asm/vendor_extensions/thead.h     |  6 +++
 arch/riscv/kernel/cpufeature.c                | 48 +++++++++++++++++++
 arch/riscv/kernel/vector.c                    | 12 ++++-
 arch/riscv/kernel/vendor_extensions/thead.c   | 11 +++++
 6 files changed, 91 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/Kconfig.vendor b/arch/riscv/Kconfig.vendor
index 9897442bd44ff..b096548fe0ffd 100644
--- a/arch/riscv/Kconfig.vendor
+++ b/arch/riscv/Kconfig.vendor
@@ -26,6 +26,19 @@ config RISCV_ISA_VENDOR_EXT_THEAD
 	  extensions. Without this option enabled, T-Head vendor extensions will
 	  not be detected at boot and their presence not reported to userspace.
 
+	  If you don't know what to do here, say Y.
+
+config RISCV_ISA_XTHEADVECTOR
+	bool "xtheadvector extension support"
+	depends on RISCV_ISA_VENDOR_EXT_THEAD
+	depends on RISCV_ISA_V
+	depends on FPU
+	default y
+	help
+	  Say N here if you want to disable all xtheadvector related procedures
+	  in the kernel. This will disable vector for any T-Head board that
+	  contains xtheadvector rather than the standard vector.
+
 	  If you don't know what to do here, say Y.
 endmenu
 
diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
index 4bd054c54c21a..569140d6e6399 100644
--- a/arch/riscv/include/asm/cpufeature.h
+++ b/arch/riscv/include/asm/cpufeature.h
@@ -34,6 +34,8 @@ DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
 /* Per-cpu ISA extensions. */
 extern struct riscv_isainfo hart_isa[NR_CPUS];
 
+extern u32 thead_vlenb_of;
+
 void __init riscv_user_isa_enable(void);
 
 #define _RISCV_ISA_EXT_DATA(_name, _id, _subset_exts, _subset_exts_size, _validate) {	\
diff --git a/arch/riscv/include/asm/vendor_extensions/thead.h b/arch/riscv/include/asm/vendor_extensions/thead.h
index 48421d1553ada..93fcbf46c87e7 100644
--- a/arch/riscv/include/asm/vendor_extensions/thead.h
+++ b/arch/riscv/include/asm/vendor_extensions/thead.h
@@ -13,4 +13,10 @@
 
 extern struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_thead;
 
+#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD
+void disable_xtheadvector(void);
+#else
+static inline void disable_xtheadvector(void) { }
+#endif
+
 #endif
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index d752291d829bb..7d9e8bbfaef28 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -39,6 +39,8 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
 /* Per-cpu ISA extensions. */
 struct riscv_isainfo hart_isa[NR_CPUS];
 
+u32 thead_vlenb_of;
+
 /**
  * riscv_isa_extension_base() - Get base extension word
  *
@@ -779,6 +781,46 @@ static void __init riscv_fill_vendor_ext_list(int cpu)
 	}
 }
 
+static int has_thead_homogeneous_vlenb(void)
+{
+	int cpu;
+	u32 prev_vlenb = 0;
+	u32 vlenb;
+
+	/* Ignore thead,vlenb property if xtheavector is not enabled in the kernel */
+	if (!IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR))
+		return 0;
+
+	for_each_possible_cpu(cpu) {
+		struct device_node *cpu_node;
+
+		cpu_node = of_cpu_device_node_get(cpu);
+		if (!cpu_node) {
+			pr_warn("Unable to find cpu node\n");
+			return -ENOENT;
+		}
+
+		if (of_property_read_u32(cpu_node, "thead,vlenb", &vlenb)) {
+			of_node_put(cpu_node);
+
+			if (prev_vlenb)
+				return -ENOENT;
+			continue;
+		}
+
+		if (prev_vlenb && vlenb != prev_vlenb) {
+			of_node_put(cpu_node);
+			return -ENOENT;
+		}
+
+		prev_vlenb = vlenb;
+		of_node_put(cpu_node);
+	}
+
+	thead_vlenb_of = vlenb;
+	return 0;
+}
+
 static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap)
 {
 	unsigned int cpu;
@@ -832,6 +874,12 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap)
 		riscv_fill_vendor_ext_list(cpu);
 	}
 
+	if (riscv_isa_vendor_extension_available(THEAD_VENDOR_ID, XTHEADVECTOR) &&
+	    has_thead_homogeneous_vlenb() < 0) {
+		pr_warn("Unsupported heterogeneous vlenb detected, vector extension disabled.\n");
+		disable_xtheadvector();
+	}
+
 	if (bitmap_empty(riscv_isa, RISCV_ISA_EXT_MAX))
 		return -ENOENT;
 
diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
index 39f0577f580de..6ed16a5f3e87f 100644
--- a/arch/riscv/kernel/vector.c
+++ b/arch/riscv/kernel/vector.c
@@ -33,7 +33,17 @@ int riscv_v_setup_vsize(void)
 {
 	unsigned long this_vsize;
 
-	/* There are 32 vector registers with vlenb length. */
+	/*
+	 * There are 32 vector registers with vlenb length.
+	 *
+	 * If the thead,vlenb property was provided by the firmware, use that
+	 * instead of probing the CSRs.
+	 */
+	if (thead_vlenb_of) {
+		riscv_v_vsize = thead_vlenb_of * 32;
+		return 0;
+	}
+
 	riscv_v_enable();
 	this_vsize = csr_read(CSR_VLENB) * 32;
 	riscv_v_disable();
diff --git a/arch/riscv/kernel/vendor_extensions/thead.c b/arch/riscv/kernel/vendor_extensions/thead.c
index 0f27baf8d2458..519dbf70710af 100644
--- a/arch/riscv/kernel/vendor_extensions/thead.c
+++ b/arch/riscv/kernel/vendor_extensions/thead.c
@@ -5,6 +5,7 @@
 #include <asm/vendor_extensions/thead.h>
 
 #include <linux/array_size.h>
+#include <linux/cpumask.h>
 #include <linux/types.h>
 
 /* All T-Head vendor extensions supported in Linux */
@@ -16,3 +17,13 @@ struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_thead = {
 	.ext_data_count = ARRAY_SIZE(riscv_isa_vendor_ext_thead),
 	.ext_data = riscv_isa_vendor_ext_thead,
 };
+
+void disable_xtheadvector(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		clear_bit(RISCV_ISA_VENDOR_EXT_XTHEADVECTOR, riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap[cpu].isa);
+
+	clear_bit(RISCV_ISA_VENDOR_EXT_XTHEADVECTOR, riscv_isa_vendor_ext_list_thead.all_harts_isa_bitmap.isa);
+}

From 66f197785d515d3fe5257ed65e189e4ee0b9b4e3 Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Wed, 13 Nov 2024 18:21:12 -0800
Subject: [PATCH 121/368] RISC-V: define the elements of the VCSR vector CSR

The VCSR CSR contains two elements VXRM[2:1] and VXSAT[0].

Define constants for those to access the elements in a readable way.

Acked-by: Guo Ren <guoren@kernel.org>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
Tested-by: Yangyu Chen <cyy@cyyself.name>
Link: https://lore.kernel.org/r/20241113-xtheadvector-v11-6-236c22791ef9@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/asm/csr.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index fe5d4eb9adea1..db1d26dfaef90 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -314,6 +314,10 @@
 #define CSR_STIMECMP		0x14D
 #define CSR_STIMECMPH		0x15D
 
+#define VCSR_VXRM_MASK			3
+#define VCSR_VXRM_SHIFT			1
+#define VCSR_VXSAT_MASK			1
+
 /* Supervisor-Level Window to Indirectly Accessed Registers (AIA) */
 #define CSR_SISELECT		0x150
 #define CSR_SIREG		0x151

From b9a9314424512e536db5e54ff554c2f10759c657 Mon Sep 17 00:00:00 2001
From: Charlie Jenkins <charlie@rivosinc.com>
Date: Wed, 13 Nov 2024 18:21:13 -0800
Subject: [PATCH 122/368] riscv: csr: Add CSR encodings for CSR_VXRM/CSR_VXSAT

The VXRM vector csr for xtheadvector has an encoding of 0xa and VXSAT
has an encoding of 0x9.

Co-developed-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
Tested-by: Yangyu Chen <cyy@cyyself.name>
Link: https://lore.kernel.org/r/20241113-xtheadvector-v11-7-236c22791ef9@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/asm/csr.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index db1d26dfaef90..2155f5afffd63 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -314,9 +314,14 @@
 #define CSR_STIMECMP		0x14D
 #define CSR_STIMECMPH		0x15D
 
-#define VCSR_VXRM_MASK			3
-#define VCSR_VXRM_SHIFT			1
-#define VCSR_VXSAT_MASK			1
+/* xtheadvector symbolic CSR names */
+#define CSR_VXSAT		0x9
+#define CSR_VXRM		0xa
+
+/* xtheadvector CSR masks */
+#define CSR_VXRM_MASK		3
+#define CSR_VXRM_SHIFT		1
+#define CSR_VXSAT_MASK		1
 
 /* Supervisor-Level Window to Indirectly Accessed Registers (AIA) */
 #define CSR_SISELECT		0x150

From 01e3313e34d0e3912a7031c217367df051603149 Mon Sep 17 00:00:00 2001
From: Charlie Jenkins <charlie@rivosinc.com>
Date: Wed, 13 Nov 2024 18:21:14 -0800
Subject: [PATCH 123/368] riscv: Add xtheadvector instruction definitions

xtheadvector uses different encodings than standard vector for
vsetvli and vector loads/stores. Write the instruction formats to be
used in assembly code.

Co-developed-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
Tested-by: Yangyu Chen <cyy@cyyself.name>
Link: https://lore.kernel.org/r/20241113-xtheadvector-v11-8-236c22791ef9@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 .../include/asm/vendor_extensions/thead.h     | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/arch/riscv/include/asm/vendor_extensions/thead.h b/arch/riscv/include/asm/vendor_extensions/thead.h
index 93fcbf46c87e7..e85c75b3b3408 100644
--- a/arch/riscv/include/asm/vendor_extensions/thead.h
+++ b/arch/riscv/include/asm/vendor_extensions/thead.h
@@ -19,4 +19,29 @@ void disable_xtheadvector(void);
 static inline void disable_xtheadvector(void) { }
 #endif
 
+/* Extension specific helpers */
+
+/*
+ * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older
+ * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for
+ * vsetvli	t4, x0, e8, m8, d1
+ */
+#define THEAD_VSETVLI_T4X0E8M8D1	".long	0x00307ed7\n\t"
+
+/*
+ * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same
+ * encoding as the standard vse8.v and vle8.v, compilers seem to optimize
+ * the call resulting in a different encoding and then using a value for
+ * the "mop" field that is not part of vector-0.7.1
+ * So encode specific variants for vstate_save and _restore.
+ */
+#define THEAD_VSB_V_V0T0		".long	0x02028027\n\t"
+#define THEAD_VSB_V_V8T0		".long	0x02028427\n\t"
+#define THEAD_VSB_V_V16T0		".long	0x02028827\n\t"
+#define THEAD_VSB_V_V24T0		".long	0x02028c27\n\t"
+#define THEAD_VLB_V_V0T0		".long	0x012028007\n\t"
+#define THEAD_VLB_V_V8T0		".long	0x012028407\n\t"
+#define THEAD_VLB_V_V16T0		".long	0x012028807\n\t"
+#define THEAD_VLB_V_V24T0		".long	0x012028c07\n\t"
+
 #endif

From d863910eabaffc68eb28aaf476dd870fc3f7197d Mon Sep 17 00:00:00 2001
From: Charlie Jenkins <charlie@rivosinc.com>
Date: Wed, 13 Nov 2024 18:21:15 -0800
Subject: [PATCH 124/368] riscv: vector: Support xtheadvector save/restore

Use alternatives to add support for xtheadvector vector save/restore
routines.

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Tested-by: Yangyu Chen <cyy@cyyself.name>
Link: https://lore.kernel.org/r/20241113-xtheadvector-v11-9-236c22791ef9@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/asm/csr.h           |   6 +
 arch/riscv/include/asm/switch_to.h     |   2 +-
 arch/riscv/include/asm/vector.h        | 222 +++++++++++++++++++------
 arch/riscv/kernel/cpufeature.c         |   6 +-
 arch/riscv/kernel/kernel_mode_vector.c |   8 +-
 arch/riscv/kernel/process.c            |   4 +-
 arch/riscv/kernel/signal.c             |   6 +-
 arch/riscv/kernel/vector.c             |  12 +-
 8 files changed, 198 insertions(+), 68 deletions(-)

diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 2155f5afffd63..ee23d0366cb2e 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -30,6 +30,12 @@
 #define SR_VS_CLEAN	_AC(0x00000400, UL)
 #define SR_VS_DIRTY	_AC(0x00000600, UL)
 
+#define SR_VS_THEAD		_AC(0x01800000, UL) /* xtheadvector Status */
+#define SR_VS_OFF_THEAD		_AC(0x00000000, UL)
+#define SR_VS_INITIAL_THEAD	_AC(0x00800000, UL)
+#define SR_VS_CLEAN_THEAD	_AC(0x01000000, UL)
+#define SR_VS_DIRTY_THEAD	_AC(0x01800000, UL)
+
 #define SR_XS		_AC(0x00018000, UL) /* Extension Status */
 #define SR_XS_OFF	_AC(0x00000000, UL)
 #define SR_XS_INITIAL	_AC(0x00008000, UL)
diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h
index 94e33216b2d94..0e71eb82f920c 100644
--- a/arch/riscv/include/asm/switch_to.h
+++ b/arch/riscv/include/asm/switch_to.h
@@ -117,7 +117,7 @@ do {							\
 	__set_prev_cpu(__prev->thread);			\
 	if (has_fpu())					\
 		__switch_to_fpu(__prev, __next);	\
-	if (has_vector())					\
+	if (has_vector() || has_xtheadvector())		\
 		__switch_to_vector(__prev, __next);	\
 	if (switch_to_should_flush_icache(__next))	\
 		local_flush_icache_all();		\
diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
index c7c023afbacd7..e8a83f55be2ba 100644
--- a/arch/riscv/include/asm/vector.h
+++ b/arch/riscv/include/asm/vector.h
@@ -18,6 +18,27 @@
 #include <asm/cpufeature.h>
 #include <asm/csr.h>
 #include <asm/asm.h>
+#include <asm/vendorid_list.h>
+#include <asm/vendor_extensions.h>
+#include <asm/vendor_extensions/thead.h>
+
+#define __riscv_v_vstate_or(_val, TYPE) ({				\
+	typeof(_val) _res = _val;					\
+	if (has_xtheadvector()) \
+		_res = (_res & ~SR_VS_THEAD) | SR_VS_##TYPE##_THEAD;	\
+	else								\
+		_res = (_res & ~SR_VS) | SR_VS_##TYPE;			\
+	_res;								\
+})
+
+#define __riscv_v_vstate_check(_val, TYPE) ({				\
+	bool _res;							\
+	if (has_xtheadvector()) \
+		_res = ((_val) & SR_VS_THEAD) == SR_VS_##TYPE##_THEAD;	\
+	else								\
+		_res = ((_val) & SR_VS) == SR_VS_##TYPE;		\
+	_res;								\
+})
 
 extern unsigned long riscv_v_vsize;
 int riscv_v_setup_vsize(void);
@@ -41,39 +62,62 @@ static __always_inline bool has_vector(void)
 	return riscv_has_extension_unlikely(RISCV_ISA_EXT_ZVE32X);
 }
 
+static __always_inline bool has_xtheadvector_no_alternatives(void)
+{
+	if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR))
+		return riscv_isa_vendor_extension_available(THEAD_VENDOR_ID, XTHEADVECTOR);
+	else
+		return false;
+}
+
+static __always_inline bool has_xtheadvector(void)
+{
+	if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR))
+		return riscv_has_vendor_extension_unlikely(THEAD_VENDOR_ID,
+							   RISCV_ISA_VENDOR_EXT_XTHEADVECTOR);
+	else
+		return false;
+}
+
 static inline void __riscv_v_vstate_clean(struct pt_regs *regs)
 {
-	regs->status = (regs->status & ~SR_VS) | SR_VS_CLEAN;
+	regs->status = __riscv_v_vstate_or(regs->status, CLEAN);
 }
 
 static inline void __riscv_v_vstate_dirty(struct pt_regs *regs)
 {
-	regs->status = (regs->status & ~SR_VS) | SR_VS_DIRTY;
+	regs->status = __riscv_v_vstate_or(regs->status, DIRTY);
 }
 
 static inline void riscv_v_vstate_off(struct pt_regs *regs)
 {
-	regs->status = (regs->status & ~SR_VS) | SR_VS_OFF;
+	regs->status = __riscv_v_vstate_or(regs->status, OFF);
 }
 
 static inline void riscv_v_vstate_on(struct pt_regs *regs)
 {
-	regs->status = (regs->status & ~SR_VS) | SR_VS_INITIAL;
+	regs->status = __riscv_v_vstate_or(regs->status, INITIAL);
 }
 
 static inline bool riscv_v_vstate_query(struct pt_regs *regs)
 {
-	return (regs->status & SR_VS) != 0;
+	return !__riscv_v_vstate_check(regs->status, OFF);
 }
 
 static __always_inline void riscv_v_enable(void)
 {
-	csr_set(CSR_SSTATUS, SR_VS);
+	if (has_xtheadvector())
+		csr_set(CSR_SSTATUS, SR_VS_THEAD);
+	else
+		csr_set(CSR_SSTATUS, SR_VS);
 }
 
 static __always_inline void riscv_v_disable(void)
 {
-	csr_clear(CSR_SSTATUS, SR_VS);
+	if (has_xtheadvector())
+		csr_clear(CSR_SSTATUS, SR_VS_THEAD);
+	else
+		csr_clear(CSR_SSTATUS, SR_VS);
 }
 
 static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest)
@@ -82,10 +126,36 @@ static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest)
 		"csrr	%0, " __stringify(CSR_VSTART) "\n\t"
 		"csrr	%1, " __stringify(CSR_VTYPE) "\n\t"
 		"csrr	%2, " __stringify(CSR_VL) "\n\t"
-		"csrr	%3, " __stringify(CSR_VCSR) "\n\t"
-		"csrr	%4, " __stringify(CSR_VLENB) "\n\t"
 		: "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl),
-		  "=r" (dest->vcsr), "=r" (dest->vlenb) : :);
+		"=r" (dest->vcsr) : :);
+
+	if (has_xtheadvector()) {
+		unsigned long status;
+
+		/*
+		 * CSR_VCSR is defined as
+		 * [2:1] - vxrm[1:0]
+		 * [0] - vxsat
+		 * The earlier vector spec implemented by T-Head uses separate
+		 * registers for the same bit-elements, so just combine those
+		 * into the existing output field.
+		 *
+		 * Additionally T-Head cores need FS to be enabled when accessing
+		 * the VXRM and VXSAT CSRs, otherwise ending in illegal instructions.
+		 * Though the cores do not implement the VXRM and VXSAT fields in the
+		 * FCSR CSR that vector-0.7.1 specifies.
+		 */
+		status = csr_read_set(CSR_STATUS, SR_FS_DIRTY);
+		dest->vcsr = csr_read(CSR_VXSAT) | csr_read(CSR_VXRM) << CSR_VXRM_SHIFT;
+
+		dest->vlenb = riscv_v_vsize / 32;
+
+		if ((status & SR_FS) != SR_FS_DIRTY)
+			csr_write(CSR_STATUS, status);
+	} else {
+		dest->vcsr = csr_read(CSR_VCSR);
+		dest->vlenb = csr_read(CSR_VLENB);
+	}
 }
 
 static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src)
@@ -96,9 +166,25 @@ static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src
 		"vsetvl	 x0, %2, %1\n\t"
 		".option pop\n\t"
 		"csrw	" __stringify(CSR_VSTART) ", %0\n\t"
-		"csrw	" __stringify(CSR_VCSR) ", %3\n\t"
-		: : "r" (src->vstart), "r" (src->vtype), "r" (src->vl),
-		    "r" (src->vcsr) :);
+		: : "r" (src->vstart), "r" (src->vtype), "r" (src->vl));
+
+	if (has_xtheadvector()) {
+		unsigned long status = csr_read(CSR_SSTATUS);
+
+		/*
+		 * Similar to __vstate_csr_save above, restore values for the
+		 * separate VXRM and VXSAT CSRs from the vcsr variable.
+		 */
+		status = csr_read_set(CSR_STATUS, SR_FS_DIRTY);
+
+		csr_write(CSR_VXRM, (src->vcsr >> CSR_VXRM_SHIFT) & CSR_VXRM_MASK);
+		csr_write(CSR_VXSAT, src->vcsr & CSR_VXSAT_MASK);
+
+		if ((status & SR_FS) != SR_FS_DIRTY)
+			csr_write(CSR_STATUS, status);
+	} else {
+		csr_write(CSR_VCSR, src->vcsr);
+	}
 }
 
 static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
@@ -108,19 +194,33 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
 
 	riscv_v_enable();
 	__vstate_csr_save(save_to);
-	asm volatile (
-		".option push\n\t"
-		".option arch, +zve32x\n\t"
-		"vsetvli	%0, x0, e8, m8, ta, ma\n\t"
-		"vse8.v		v0, (%1)\n\t"
-		"add		%1, %1, %0\n\t"
-		"vse8.v		v8, (%1)\n\t"
-		"add		%1, %1, %0\n\t"
-		"vse8.v		v16, (%1)\n\t"
-		"add		%1, %1, %0\n\t"
-		"vse8.v		v24, (%1)\n\t"
-		".option pop\n\t"
-		: "=&r" (vl) : "r" (datap) : "memory");
+	if (has_xtheadvector()) {
+		asm volatile (
+			"mv t0, %0\n\t"
+			THEAD_VSETVLI_T4X0E8M8D1
+			THEAD_VSB_V_V0T0
+			"add		t0, t0, t4\n\t"
+			THEAD_VSB_V_V0T0
+			"add		t0, t0, t4\n\t"
+			THEAD_VSB_V_V0T0
+			"add		t0, t0, t4\n\t"
+			THEAD_VSB_V_V0T0
+			: : "r" (datap) : "memory", "t0", "t4");
+	} else {
+		asm volatile (
+			".option push\n\t"
+			".option arch, +zve32x\n\t"
+			"vsetvli	%0, x0, e8, m8, ta, ma\n\t"
+			"vse8.v		v0, (%1)\n\t"
+			"add		%1, %1, %0\n\t"
+			"vse8.v		v8, (%1)\n\t"
+			"add		%1, %1, %0\n\t"
+			"vse8.v		v16, (%1)\n\t"
+			"add		%1, %1, %0\n\t"
+			"vse8.v		v24, (%1)\n\t"
+			".option pop\n\t"
+			: "=&r" (vl) : "r" (datap) : "memory");
+	}
 	riscv_v_disable();
 }
 
@@ -130,19 +230,33 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
 	unsigned long vl;
 
 	riscv_v_enable();
-	asm volatile (
-		".option push\n\t"
-		".option arch, +zve32x\n\t"
-		"vsetvli	%0, x0, e8, m8, ta, ma\n\t"
-		"vle8.v		v0, (%1)\n\t"
-		"add		%1, %1, %0\n\t"
-		"vle8.v		v8, (%1)\n\t"
-		"add		%1, %1, %0\n\t"
-		"vle8.v		v16, (%1)\n\t"
-		"add		%1, %1, %0\n\t"
-		"vle8.v		v24, (%1)\n\t"
-		".option pop\n\t"
-		: "=&r" (vl) : "r" (datap) : "memory");
+	if (has_xtheadvector()) {
+		asm volatile (
+			"mv t0, %0\n\t"
+			THEAD_VSETVLI_T4X0E8M8D1
+			THEAD_VLB_V_V0T0
+			"add		t0, t0, t4\n\t"
+			THEAD_VLB_V_V0T0
+			"add		t0, t0, t4\n\t"
+			THEAD_VLB_V_V0T0
+			"add		t0, t0, t4\n\t"
+			THEAD_VLB_V_V0T0
+			: : "r" (datap) : "memory", "t0", "t4");
+	} else {
+		asm volatile (
+			".option push\n\t"
+			".option arch, +zve32x\n\t"
+			"vsetvli	%0, x0, e8, m8, ta, ma\n\t"
+			"vle8.v		v0, (%1)\n\t"
+			"add		%1, %1, %0\n\t"
+			"vle8.v		v8, (%1)\n\t"
+			"add		%1, %1, %0\n\t"
+			"vle8.v		v16, (%1)\n\t"
+			"add		%1, %1, %0\n\t"
+			"vle8.v		v24, (%1)\n\t"
+			".option pop\n\t"
+			: "=&r" (vl) : "r" (datap) : "memory");
+	}
 	__vstate_csr_restore(restore_from);
 	riscv_v_disable();
 }
@@ -152,33 +266,41 @@ static inline void __riscv_v_vstate_discard(void)
 	unsigned long vl, vtype_inval = 1UL << (BITS_PER_LONG - 1);
 
 	riscv_v_enable();
+	if (has_xtheadvector())
+		asm volatile (THEAD_VSETVLI_T4X0E8M8D1 : : : "t4");
+	else
+		asm volatile (
+			".option push\n\t"
+			".option arch, +zve32x\n\t"
+			"vsetvli	%0, x0, e8, m8, ta, ma\n\t"
+			".option pop\n\t": "=&r" (vl));
+
 	asm volatile (
 		".option push\n\t"
 		".option arch, +zve32x\n\t"
-		"vsetvli	%0, x0, e8, m8, ta, ma\n\t"
 		"vmv.v.i	v0, -1\n\t"
 		"vmv.v.i	v8, -1\n\t"
 		"vmv.v.i	v16, -1\n\t"
 		"vmv.v.i	v24, -1\n\t"
 		"vsetvl		%0, x0, %1\n\t"
 		".option pop\n\t"
-		: "=&r" (vl) : "r" (vtype_inval) : "memory");
+		: "=&r" (vl) : "r" (vtype_inval));
+
 	riscv_v_disable();
 }
 
 static inline void riscv_v_vstate_discard(struct pt_regs *regs)
 {
-	if ((regs->status & SR_VS) == SR_VS_OFF)
-		return;
-
-	__riscv_v_vstate_discard();
-	__riscv_v_vstate_dirty(regs);
+	if (riscv_v_vstate_query(regs)) {
+		__riscv_v_vstate_discard();
+		__riscv_v_vstate_dirty(regs);
+	}
 }
 
 static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate,
 				       struct pt_regs *regs)
 {
-	if ((regs->status & SR_VS) == SR_VS_DIRTY) {
+	if (__riscv_v_vstate_check(regs->status, DIRTY)) {
 		__riscv_v_vstate_save(vstate, vstate->datap);
 		__riscv_v_vstate_clean(regs);
 	}
@@ -187,7 +309,7 @@ static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate,
 static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate,
 					  struct pt_regs *regs)
 {
-	if ((regs->status & SR_VS) != SR_VS_OFF) {
+	if (riscv_v_vstate_query(regs)) {
 		__riscv_v_vstate_restore(vstate, vstate->datap);
 		__riscv_v_vstate_clean(regs);
 	}
@@ -196,7 +318,7 @@ static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate,
 static inline void riscv_v_vstate_set_restore(struct task_struct *task,
 					      struct pt_regs *regs)
 {
-	if ((regs->status & SR_VS) != SR_VS_OFF) {
+	if (riscv_v_vstate_query(regs)) {
 		set_tsk_thread_flag(task, TIF_RISCV_V_DEFER_RESTORE);
 		riscv_v_vstate_on(regs);
 	}
@@ -270,6 +392,8 @@ struct pt_regs;
 static inline int riscv_v_setup_vsize(void) { return -EOPNOTSUPP; }
 static __always_inline bool has_vector(void) { return false; }
 static __always_inline bool insn_is_vector(u32 insn_buf) { return false; }
+static __always_inline bool has_xtheadvector_no_alternatives(void) { return false; }
+static __always_inline bool has_xtheadvector(void) { return false; }
 static inline bool riscv_v_first_use_handler(struct pt_regs *regs) { return false; }
 static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; }
 static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; }
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 7d9e8bbfaef28..ba6976132638c 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -874,8 +874,7 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap)
 		riscv_fill_vendor_ext_list(cpu);
 	}
 
-	if (riscv_isa_vendor_extension_available(THEAD_VENDOR_ID, XTHEADVECTOR) &&
-	    has_thead_homogeneous_vlenb() < 0) {
+	if (has_xtheadvector_no_alternatives() && has_thead_homogeneous_vlenb() < 0) {
 		pr_warn("Unsupported heterogeneous vlenb detected, vector extension disabled.\n");
 		disable_xtheadvector();
 	}
@@ -932,7 +931,8 @@ void __init riscv_fill_hwcap(void)
 		elf_hwcap &= ~COMPAT_HWCAP_ISA_F;
 	}
 
-	if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_ZVE32X)) {
+	if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_ZVE32X) ||
+	    has_xtheadvector_no_alternatives()) {
 		/*
 		 * This cannot fail when called on the boot hart
 		 */
diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c
index 6afe80c7f03ab..99972a48e86bc 100644
--- a/arch/riscv/kernel/kernel_mode_vector.c
+++ b/arch/riscv/kernel/kernel_mode_vector.c
@@ -143,7 +143,7 @@ static int riscv_v_start_kernel_context(bool *is_nested)
 
 	/* Transfer the ownership of V from user to kernel, then save */
 	riscv_v_start(RISCV_PREEMPT_V | RISCV_PREEMPT_V_DIRTY);
-	if ((task_pt_regs(current)->status & SR_VS) == SR_VS_DIRTY) {
+	if (__riscv_v_vstate_check(task_pt_regs(current)->status, DIRTY)) {
 		uvstate = &current->thread.vstate;
 		__riscv_v_vstate_save(uvstate, uvstate->datap);
 	}
@@ -160,7 +160,7 @@ asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs)
 		return;
 
 	depth = riscv_v_ctx_get_depth();
-	if (depth == 0 && (regs->status & SR_VS) == SR_VS_DIRTY)
+	if (depth == 0 && __riscv_v_vstate_check(regs->status, DIRTY))
 		riscv_preempt_v_set_dirty();
 
 	riscv_v_ctx_depth_inc();
@@ -208,7 +208,7 @@ void kernel_vector_begin(void)
 {
 	bool nested = false;
 
-	if (WARN_ON(!has_vector()))
+	if (WARN_ON(!(has_vector() || has_xtheadvector())))
 		return;
 
 	BUG_ON(!may_use_simd());
@@ -236,7 +236,7 @@ EXPORT_SYMBOL_GPL(kernel_vector_begin);
  */
 void kernel_vector_end(void)
 {
-	if (WARN_ON(!has_vector()))
+	if (WARN_ON(!(has_vector() || has_xtheadvector())))
 		return;
 
 	riscv_v_disable();
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 58b6482c2bf66..6534264dfce26 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -190,7 +190,7 @@ void flush_thread(void)
 void arch_release_task_struct(struct task_struct *tsk)
 {
 	/* Free the vector context of datap. */
-	if (has_vector())
+	if (has_vector() || has_xtheadvector())
 		riscv_v_thread_free(tsk);
 }
 
@@ -240,7 +240,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 		p->thread.s[0] = 0;
 	}
 	p->thread.riscv_v_flags = 0;
-	if (has_vector())
+	if (has_vector() || has_xtheadvector())
 		riscv_v_thread_alloc(p);
 	p->thread.ra = (unsigned long)ret_from_fork;
 	p->thread.sp = (unsigned long)childregs; /* kernel sp */
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index dcd2824194561..94e905eea1dee 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -189,7 +189,7 @@ static long restore_sigcontext(struct pt_regs *regs,
 
 			return 0;
 		case RISCV_V_MAGIC:
-			if (!has_vector() || !riscv_v_vstate_query(regs) ||
+			if (!(has_vector() || has_xtheadvector()) || !riscv_v_vstate_query(regs) ||
 			    size != riscv_v_sc_size)
 				return -EINVAL;
 
@@ -211,7 +211,7 @@ static size_t get_rt_frame_size(bool cal_all)
 
 	frame_size = sizeof(*frame);
 
-	if (has_vector()) {
+	if (has_vector() || has_xtheadvector()) {
 		if (cal_all || riscv_v_vstate_query(task_pt_regs(current)))
 			total_context_size += riscv_v_sc_size;
 	}
@@ -284,7 +284,7 @@ static long setup_sigcontext(struct rt_sigframe __user *frame,
 	if (has_fpu())
 		err |= save_fp_state(regs, &sc->sc_fpregs);
 	/* Save the vector state. */
-	if (has_vector() && riscv_v_vstate_query(regs))
+	if ((has_vector() || has_xtheadvector()) && riscv_v_vstate_query(regs))
 		err |= save_v_state(regs, (void __user **)&sc_ext_ptr);
 	/* Write zero to fp-reserved space and check it on restore_sigcontext */
 	err |= __put_user(0, &sc->sc_extdesc.reserved);
diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
index 6ed16a5f3e87f..aba5805119c33 100644
--- a/arch/riscv/kernel/vector.c
+++ b/arch/riscv/kernel/vector.c
@@ -63,7 +63,7 @@ int riscv_v_setup_vsize(void)
 
 void __init riscv_v_setup_ctx_cache(void)
 {
-	if (!has_vector())
+	if (!(has_vector() || has_xtheadvector()))
 		return;
 
 	riscv_v_user_cachep = kmem_cache_create_usercopy("riscv_vector_ctx",
@@ -183,7 +183,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs)
 	u32 __user *epc = (u32 __user *)regs->epc;
 	u32 insn = (u32)regs->badaddr;
 
-	if (!has_vector())
+	if (!(has_vector() || has_xtheadvector()))
 		return false;
 
 	/* Do not handle if V is not supported, or disabled */
@@ -226,7 +226,7 @@ void riscv_v_vstate_ctrl_init(struct task_struct *tsk)
 	bool inherit;
 	int cur, next;
 
-	if (!has_vector())
+	if (!(has_vector() || has_xtheadvector()))
 		return;
 
 	next = riscv_v_ctrl_get_next(tsk);
@@ -248,7 +248,7 @@ void riscv_v_vstate_ctrl_init(struct task_struct *tsk)
 
 long riscv_v_vstate_ctrl_get_current(void)
 {
-	if (!has_vector())
+	if (!(has_vector() || has_xtheadvector()))
 		return -EINVAL;
 
 	return current->thread.vstate_ctrl & PR_RISCV_V_VSTATE_CTRL_MASK;
@@ -259,7 +259,7 @@ long riscv_v_vstate_ctrl_set_current(unsigned long arg)
 	bool inherit;
 	int cur, next;
 
-	if (!has_vector())
+	if (!(has_vector() || has_xtheadvector()))
 		return -EINVAL;
 
 	if (arg & ~PR_RISCV_V_VSTATE_CTRL_MASK)
@@ -309,7 +309,7 @@ static struct ctl_table riscv_v_default_vstate_table[] = {
 
 static int __init riscv_v_sysctl_init(void)
 {
-	if (has_vector())
+	if (has_vector() || has_xtheadvector())
 		if (!register_sysctl("abi", riscv_v_default_vstate_table))
 			return -EINVAL;
 	return 0;

From a5ea53da65c588339890c825e63c0da5baef6897 Mon Sep 17 00:00:00 2001
From: Charlie Jenkins <charlie@rivosinc.com>
Date: Wed, 13 Nov 2024 18:21:16 -0800
Subject: [PATCH 125/368] riscv: hwprobe: Add thead vendor extension probing

Add a new hwprobe key "RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0" which
allows userspace to probe for the new RISCV_ISA_VENDOR_EXT_XTHEADVECTOR
vendor extension.

This new key will allow userspace code to probe for which thead vendor
extensions are supported. This API is modeled to be consistent with
RISCV_HWPROBE_KEY_IMA_EXT_0. The bitmask returned will have each bit
corresponding to a supported thead vendor extension of the cpumask set.
Just like RISCV_HWPROBE_KEY_IMA_EXT_0, this allows a userspace program
to determine all of the supported thead vendor extensions in one call.

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
Reviewed-by: Evan Green <evan@rivosinc.com>
Tested-by: Yangyu Chen <cyy@cyyself.name>
Link: https://lore.kernel.org/r/20241113-xtheadvector-v11-10-236c22791ef9@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/asm/hwprobe.h              |  5 ++-
 .../asm/vendor_extensions/thead_hwprobe.h     | 19 ++++++++++
 .../asm/vendor_extensions/vendor_hwprobe.h    | 37 +++++++++++++++++++
 arch/riscv/include/uapi/asm/hwprobe.h         |  3 +-
 arch/riscv/include/uapi/asm/vendor/thead.h    |  3 ++
 arch/riscv/kernel/sys_hwprobe.c               |  5 +++
 arch/riscv/kernel/vendor_extensions/Makefile  |  1 +
 .../kernel/vendor_extensions/thead_hwprobe.c  | 19 ++++++++++
 8 files changed, 89 insertions(+), 3 deletions(-)
 create mode 100644 arch/riscv/include/asm/vendor_extensions/thead_hwprobe.h
 create mode 100644 arch/riscv/include/asm/vendor_extensions/vendor_hwprobe.h
 create mode 100644 arch/riscv/include/uapi/asm/vendor/thead.h
 create mode 100644 arch/riscv/kernel/vendor_extensions/thead_hwprobe.c

diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
index 1ce1df6d0ff3c..dd624523981c8 100644
--- a/arch/riscv/include/asm/hwprobe.h
+++ b/arch/riscv/include/asm/hwprobe.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
- * Copyright 2023 Rivos, Inc
+ * Copyright 2023-2024 Rivos, Inc
  */
 
 #ifndef _ASM_HWPROBE_H
@@ -8,7 +8,7 @@
 
 #include <uapi/asm/hwprobe.h>
 
-#define RISCV_HWPROBE_MAX_KEY 10
+#define RISCV_HWPROBE_MAX_KEY 11
 
 static inline bool riscv_hwprobe_key_is_valid(__s64 key)
 {
@@ -21,6 +21,7 @@ static inline bool hwprobe_key_is_bitmask(__s64 key)
 	case RISCV_HWPROBE_KEY_BASE_BEHAVIOR:
 	case RISCV_HWPROBE_KEY_IMA_EXT_0:
 	case RISCV_HWPROBE_KEY_CPUPERF_0:
+	case RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0:
 		return true;
 	}
 
diff --git a/arch/riscv/include/asm/vendor_extensions/thead_hwprobe.h b/arch/riscv/include/asm/vendor_extensions/thead_hwprobe.h
new file mode 100644
index 0000000000000..65a9c5612466d
--- /dev/null
+++ b/arch/riscv/include/asm/vendor_extensions/thead_hwprobe.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RISCV_VENDOR_EXTENSIONS_THEAD_HWPROBE_H
+#define _ASM_RISCV_VENDOR_EXTENSIONS_THEAD_HWPROBE_H
+
+#include <linux/cpumask.h>
+
+#include <uapi/asm/hwprobe.h>
+
+#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD
+void hwprobe_isa_vendor_ext_thead_0(struct riscv_hwprobe *pair, const struct cpumask *cpus);
+#else
+static inline void hwprobe_isa_vendor_ext_thead_0(struct riscv_hwprobe *pair,
+						  const struct cpumask *cpus)
+{
+	pair->value = 0;
+}
+#endif
+
+#endif
diff --git a/arch/riscv/include/asm/vendor_extensions/vendor_hwprobe.h b/arch/riscv/include/asm/vendor_extensions/vendor_hwprobe.h
new file mode 100644
index 0000000000000..6b9293e984a92
--- /dev/null
+++ b/arch/riscv/include/asm/vendor_extensions/vendor_hwprobe.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2024 Rivos, Inc
+ */
+
+#ifndef _ASM_RISCV_SYS_HWPROBE_H
+#define _ASM_RISCV_SYS_HWPROBE_H
+
+#include <asm/cpufeature.h>
+
+#define VENDOR_EXT_KEY(ext)								\
+	do {										\
+		if (__riscv_isa_extension_available(isainfo->isa, RISCV_ISA_VENDOR_EXT_##ext)) \
+			pair->value |= RISCV_HWPROBE_VENDOR_EXT_##ext;			\
+		else									\
+			missing |= RISCV_HWPROBE_VENDOR_EXT_##ext;			\
+	} while (false)
+
+/*
+ * Loop through and record extensions that 1) anyone has, and 2) anyone
+ * doesn't have.
+ *
+ * _extension_checks is an arbitrary C block to set the values of pair->value
+ * and missing. It should be filled with VENDOR_EXT_KEY expressions.
+ */
+#define VENDOR_EXTENSION_SUPPORTED(pair, cpus, per_hart_vendor_bitmap, _extension_checks)	\
+	do {											\
+		int cpu;									\
+		u64 missing = 0;								\
+		for_each_cpu(cpu, (cpus)) {							\
+			struct riscv_isavendorinfo *isainfo = &(per_hart_vendor_bitmap)[cpu];	\
+			_extension_checks							\
+		}										\
+		(pair)->value &= ~missing;							\
+	} while (false)										\
+
+#endif /* _ASM_RISCV_SYS_HWPROBE_H */
diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
index 3af142b99f778..c3c1cc951cb94 100644
--- a/arch/riscv/include/uapi/asm/hwprobe.h
+++ b/arch/riscv/include/uapi/asm/hwprobe.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
- * Copyright 2023 Rivos, Inc
+ * Copyright 2023-2024 Rivos, Inc
  */
 
 #ifndef _UAPI_ASM_HWPROBE_H
@@ -94,6 +94,7 @@ struct riscv_hwprobe {
 #define		RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW		2
 #define		RISCV_HWPROBE_MISALIGNED_VECTOR_FAST		3
 #define		RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED	4
+#define RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0	11
 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
 
 /* Flags */
diff --git a/arch/riscv/include/uapi/asm/vendor/thead.h b/arch/riscv/include/uapi/asm/vendor/thead.h
new file mode 100644
index 0000000000000..43790ebe5faf3
--- /dev/null
+++ b/arch/riscv/include/uapi/asm/vendor/thead.h
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#define		RISCV_HWPROBE_VENDOR_EXT_XTHEADVECTOR	(1 << 0)
diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
index 9050f32462645..13eff75d78a8b 100644
--- a/arch/riscv/kernel/sys_hwprobe.c
+++ b/arch/riscv/kernel/sys_hwprobe.c
@@ -15,6 +15,7 @@
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/vector.h>
+#include <asm/vendor_extensions/thead_hwprobe.h>
 #include <vdso/vsyscall.h>
 
 
@@ -286,6 +287,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
 		pair->value = riscv_timebase;
 		break;
 
+	case RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0:
+		hwprobe_isa_vendor_ext_thead_0(pair, cpus);
+		break;
+
 	/*
 	 * For forward compatibility, unknown keys don't fail the whole
 	 * call, but get their element key set to -1 and value set to 0
diff --git a/arch/riscv/kernel/vendor_extensions/Makefile b/arch/riscv/kernel/vendor_extensions/Makefile
index 353522cb3bf09..866414c81a9f5 100644
--- a/arch/riscv/kernel/vendor_extensions/Makefile
+++ b/arch/riscv/kernel/vendor_extensions/Makefile
@@ -2,3 +2,4 @@
 
 obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_ANDES)	+= andes.o
 obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD)	+= thead.o
+obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD)	+= thead_hwprobe.o
diff --git a/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c b/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c
new file mode 100644
index 0000000000000..2eba340117869
--- /dev/null
+++ b/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <asm/vendor_extensions/thead.h>
+#include <asm/vendor_extensions/thead_hwprobe.h>
+#include <asm/vendor_extensions/vendor_hwprobe.h>
+
+#include <linux/cpumask.h>
+#include <linux/types.h>
+
+#include <uapi/asm/hwprobe.h>
+#include <uapi/asm/vendor/thead.h>
+
+void hwprobe_isa_vendor_ext_thead_0(struct riscv_hwprobe *pair, const struct cpumask *cpus)
+{
+	VENDOR_EXTENSION_SUPPORTED(pair, cpus,
+				   riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap, {
+		VENDOR_EXT_KEY(XTHEADVECTOR);
+	});
+}

From 7fa00fd6ff5366b50dcba2525b9743e1612da2aa Mon Sep 17 00:00:00 2001
From: Charlie Jenkins <charlie@rivosinc.com>
Date: Wed, 13 Nov 2024 18:21:17 -0800
Subject: [PATCH 126/368] riscv: hwprobe: Document thead vendor extensions and
 xtheadvector extension

Document support for thead vendor extensions using the key
RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0 and xtheadvector extension using
the key RISCV_HWPROBE_VENDOR_EXT_XTHEADVECTOR.

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
Reviewed-by: Evan Green <evan@rivosinc.com>
Tested-by: Yangyu Chen <cyy@cyyself.name>
Link: https://lore.kernel.org/r/20241113-xtheadvector-v11-11-236c22791ef9@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 Documentation/arch/riscv/hwprobe.rst | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst
index 955fbcd19ce90..f273ea15a8e83 100644
--- a/Documentation/arch/riscv/hwprobe.rst
+++ b/Documentation/arch/riscv/hwprobe.rst
@@ -293,3 +293,13 @@ The following keys are defined:
 
   * :c:macro:`RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED`: Misaligned vector accesses are
     not supported at all and will generate a misaligned address fault.
+
+* :c:macro:`RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0`: A bitmask containing the
+  thead vendor extensions that are compatible with the
+  :c:macro:`RISCV_HWPROBE_BASE_BEHAVIOR_IMA`: base system behavior.
+
+  * T-HEAD
+
+    * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XTHEADVECTOR`: The xtheadvector vendor
+        extension is supported in the T-Head ISA extensions spec starting from
+	commit a18c801634 ("Add T-Head VECTOR vendor extension. ").

From 57d7713af93e4b7344d3022fad9ddf0f10f815ec Mon Sep 17 00:00:00 2001
From: Charlie Jenkins <charlie@rivosinc.com>
Date: Wed, 13 Nov 2024 18:21:18 -0800
Subject: [PATCH 127/368] selftests: riscv: Fix vector tests

Overhaul the riscv vector tests to use kselftest_harness to help the
test cases correctly report the results and decouple the individual test
cases from each other. With this refactoring, only run the test cases if
vector is reported and properly report the test case as skipped
otherwise. The v_initval_nolibc test was previously not checking if
vector was supported and used a function (malloc) which invalidates
the state of the vector registers.

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
Tested-by: Yangyu Chen <cyy@cyyself.name>
Link: https://lore.kernel.org/r/20241113-xtheadvector-v11-12-236c22791ef9@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 .../testing/selftests/riscv/vector/.gitignore |   3 +-
 tools/testing/selftests/riscv/vector/Makefile |  17 +-
 .../riscv/vector/v_exec_initval_nolibc.c      |  85 ++++++
 .../selftests/riscv/vector/v_helpers.c        |  57 ++++
 .../selftests/riscv/vector/v_helpers.h        |   6 +
 .../selftests/riscv/vector/v_initval.c        |  16 +
 .../selftests/riscv/vector/v_initval_nolibc.c |  68 -----
 .../selftests/riscv/vector/vstate_prctl.c     | 278 ++++++++++--------
 8 files changed, 337 insertions(+), 193 deletions(-)
 create mode 100644 tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c
 create mode 100644 tools/testing/selftests/riscv/vector/v_helpers.c
 create mode 100644 tools/testing/selftests/riscv/vector/v_helpers.h
 create mode 100644 tools/testing/selftests/riscv/vector/v_initval.c
 delete mode 100644 tools/testing/selftests/riscv/vector/v_initval_nolibc.c

diff --git a/tools/testing/selftests/riscv/vector/.gitignore b/tools/testing/selftests/riscv/vector/.gitignore
index 9ae7964491d50..7d9c87cd06497 100644
--- a/tools/testing/selftests/riscv/vector/.gitignore
+++ b/tools/testing/selftests/riscv/vector/.gitignore
@@ -1,3 +1,4 @@
 vstate_exec_nolibc
 vstate_prctl
-v_initval_nolibc
+v_initval
+v_exec_initval_nolibc
diff --git a/tools/testing/selftests/riscv/vector/Makefile b/tools/testing/selftests/riscv/vector/Makefile
index bfff0ff4f3bef..6f7497f4e7b30 100644
--- a/tools/testing/selftests/riscv/vector/Makefile
+++ b/tools/testing/selftests/riscv/vector/Makefile
@@ -2,18 +2,27 @@
 # Copyright (C) 2021 ARM Limited
 # Originally tools/testing/arm64/abi/Makefile
 
-TEST_GEN_PROGS := vstate_prctl v_initval_nolibc
-TEST_GEN_PROGS_EXTENDED := vstate_exec_nolibc
+TEST_GEN_PROGS := v_initval vstate_prctl
+TEST_GEN_PROGS_EXTENDED := vstate_exec_nolibc v_exec_initval_nolibc
 
 include ../../lib.mk
 
-$(OUTPUT)/vstate_prctl: vstate_prctl.c ../hwprobe/sys_hwprobe.S
+$(OUTPUT)/sys_hwprobe.o: ../hwprobe/sys_hwprobe.S
+	$(CC) -static -c -o$@ $(CFLAGS) $^
+
+$(OUTPUT)/v_helpers.o: v_helpers.c
+	$(CC) -static -c -o$@ $(CFLAGS) $^
+
+$(OUTPUT)/vstate_prctl: vstate_prctl.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o
 	$(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
 
 $(OUTPUT)/vstate_exec_nolibc: vstate_exec_nolibc.c
 	$(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \
 		-Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc
 
-$(OUTPUT)/v_initval_nolibc: v_initval_nolibc.c
+$(OUTPUT)/v_initval: v_initval.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o
+	$(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
+
+$(OUTPUT)/v_exec_initval_nolibc: v_exec_initval_nolibc.c
 	$(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \
 		-Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc
diff --git a/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c
new file mode 100644
index 0000000000000..4a39cab29c34d
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Get values of vector registers as soon as the program starts to test if
+ * is properly cleaning the values before starting a new program. Vector
+ * registers are caller saved, so no function calls may happen before reading
+ * the values. To further ensure consistency, this file is compiled without
+ * libc and without auto-vectorization.
+ *
+ * To be "clean" all values must be either all ones or all zeroes.
+ */
+
+#define __stringify_1(x...)	#x
+#define __stringify(x...)	__stringify_1(x)
+
+int main(int argc, char **argv)
+{
+	char prev_value = 0, value;
+	unsigned long vl;
+	int first = 1;
+
+	asm volatile (
+		".option push\n\t"
+		".option arch, +v\n\t"
+		"vsetvli	%[vl], x0, e8, m1, ta, ma\n\t"
+		".option pop\n\t"
+		: [vl] "=r" (vl)
+	);
+
+#define CHECK_VECTOR_REGISTER(register) ({					\
+	for (int i = 0; i < vl; i++) {						\
+		asm volatile (							\
+			".option push\n\t"					\
+			".option arch, +v\n\t"					\
+			"vmv.x.s %0, " __stringify(register) "\n\t"		\
+			"vsrl.vi " __stringify(register) ", " __stringify(register) ", 8\n\t" \
+			".option pop\n\t"					\
+			: "=r" (value));					\
+		if (first) {							\
+			first = 0;						\
+		} else if (value != prev_value || !(value == 0x00 || value == 0xff)) { \
+			printf("Register " __stringify(register)		\
+				" values not clean! value: %u\n", value);	\
+			exit(-1);						\
+		}								\
+		prev_value = value;						\
+	}									\
+})
+
+	CHECK_VECTOR_REGISTER(v0);
+	CHECK_VECTOR_REGISTER(v1);
+	CHECK_VECTOR_REGISTER(v2);
+	CHECK_VECTOR_REGISTER(v3);
+	CHECK_VECTOR_REGISTER(v4);
+	CHECK_VECTOR_REGISTER(v5);
+	CHECK_VECTOR_REGISTER(v6);
+	CHECK_VECTOR_REGISTER(v7);
+	CHECK_VECTOR_REGISTER(v8);
+	CHECK_VECTOR_REGISTER(v9);
+	CHECK_VECTOR_REGISTER(v10);
+	CHECK_VECTOR_REGISTER(v11);
+	CHECK_VECTOR_REGISTER(v12);
+	CHECK_VECTOR_REGISTER(v13);
+	CHECK_VECTOR_REGISTER(v14);
+	CHECK_VECTOR_REGISTER(v15);
+	CHECK_VECTOR_REGISTER(v16);
+	CHECK_VECTOR_REGISTER(v17);
+	CHECK_VECTOR_REGISTER(v18);
+	CHECK_VECTOR_REGISTER(v19);
+	CHECK_VECTOR_REGISTER(v20);
+	CHECK_VECTOR_REGISTER(v21);
+	CHECK_VECTOR_REGISTER(v22);
+	CHECK_VECTOR_REGISTER(v23);
+	CHECK_VECTOR_REGISTER(v24);
+	CHECK_VECTOR_REGISTER(v25);
+	CHECK_VECTOR_REGISTER(v26);
+	CHECK_VECTOR_REGISTER(v27);
+	CHECK_VECTOR_REGISTER(v28);
+	CHECK_VECTOR_REGISTER(v29);
+	CHECK_VECTOR_REGISTER(v30);
+	CHECK_VECTOR_REGISTER(v31);
+
+#undef CHECK_VECTOR_REGISTER
+
+	return 0;
+}
diff --git a/tools/testing/selftests/riscv/vector/v_helpers.c b/tools/testing/selftests/riscv/vector/v_helpers.c
new file mode 100644
index 0000000000000..d50f4dfbf9e56
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/v_helpers.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "../hwprobe/hwprobe.h"
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+bool is_vector_supported(void)
+{
+	struct riscv_hwprobe pair;
+
+	pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
+	riscv_hwprobe(&pair, 1, 0, NULL, 0);
+	return pair.value & RISCV_HWPROBE_EXT_ZVE32X;
+}
+
+int launch_test(char *next_program, int test_inherit)
+{
+	char *exec_argv[3], *exec_envp[1];
+	int rc, pid, status;
+
+	pid = fork();
+	if (pid < 0) {
+		printf("fork failed %d", pid);
+		return -1;
+	}
+
+	if (!pid) {
+		exec_argv[0] = next_program;
+		exec_argv[1] = test_inherit != 0 ? "x" : NULL;
+		exec_argv[2] = NULL;
+		exec_envp[0] = NULL;
+		/* launch the program again to check inherit */
+		rc = execve(next_program, exec_argv, exec_envp);
+		if (rc) {
+			perror("execve");
+			printf("child execve failed %d\n", rc);
+			exit(-1);
+		}
+	}
+
+	rc = waitpid(-1, &status, 0);
+	if (rc < 0) {
+		printf("waitpid failed\n");
+		return -3;
+	}
+
+	if ((WIFEXITED(status) && WEXITSTATUS(status) == -1) ||
+	    WIFSIGNALED(status)) {
+		printf("child exited abnormally\n");
+		return -4;
+	}
+
+	return WEXITSTATUS(status);
+}
diff --git a/tools/testing/selftests/riscv/vector/v_helpers.h b/tools/testing/selftests/riscv/vector/v_helpers.h
new file mode 100644
index 0000000000000..faeeeb625b6ee
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/v_helpers.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <stdbool.h>
+
+bool is_vector_supported(void);
+
+int launch_test(char *next_program, int test_inherit);
diff --git a/tools/testing/selftests/riscv/vector/v_initval.c b/tools/testing/selftests/riscv/vector/v_initval.c
new file mode 100644
index 0000000000000..f38b5797fa317
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/v_initval.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "../../kselftest_harness.h"
+#include "v_helpers.h"
+
+#define NEXT_PROGRAM "./v_exec_initval_nolibc"
+
+TEST(v_initval)
+{
+	if (!is_vector_supported())
+		SKIP(return, "Vector not supported");
+
+	ASSERT_EQ(0, launch_test(NEXT_PROGRAM, 0));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c
deleted file mode 100644
index 1dd94197da30c..0000000000000
--- a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c
+++ /dev/null
@@ -1,68 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-
-#include "../../kselftest.h"
-#define MAX_VSIZE	(8192 * 32)
-
-void dump(char *ptr, int size)
-{
-	int i = 0;
-
-	for (i = 0; i < size; i++) {
-		if (i != 0) {
-			if (i % 16 == 0)
-				printf("\n");
-			else if (i % 8 == 0)
-				printf("  ");
-		}
-		printf("%02x ", ptr[i]);
-	}
-	printf("\n");
-}
-
-int main(void)
-{
-	int i;
-	unsigned long vl;
-	char *datap, *tmp;
-
-	datap = malloc(MAX_VSIZE);
-	if (!datap) {
-		ksft_test_result_fail("fail to allocate memory for size = %d\n", MAX_VSIZE);
-		exit(-1);
-	}
-
-	tmp = datap;
-	asm volatile (
-		".option push\n\t"
-		".option arch, +v\n\t"
-		"vsetvli	%0, x0, e8, m8, ta, ma\n\t"
-		"vse8.v		v0, (%2)\n\t"
-		"add		%1, %2, %0\n\t"
-		"vse8.v		v8, (%1)\n\t"
-		"add		%1, %1, %0\n\t"
-		"vse8.v		v16, (%1)\n\t"
-		"add		%1, %1, %0\n\t"
-		"vse8.v		v24, (%1)\n\t"
-		".option pop\n\t"
-		: "=&r" (vl), "=r" (tmp) : "r" (datap) : "memory");
-
-	ksft_print_msg("vl = %lu\n", vl);
-
-	if (datap[0] != 0x00 && datap[0] != 0xff) {
-		ksft_test_result_fail("v-regesters are not properly initialized\n");
-		dump(datap, vl * 4);
-		exit(-1);
-	}
-
-	for (i = 1; i < vl * 4; i++) {
-		if (datap[i] != datap[0]) {
-			ksft_test_result_fail("detect stale values on v-regesters\n");
-			dump(datap, vl * 4);
-			exit(-2);
-		}
-	}
-
-	free(datap);
-	ksft_exit_pass();
-	return 0;
-}
diff --git a/tools/testing/selftests/riscv/vector/vstate_prctl.c b/tools/testing/selftests/riscv/vector/vstate_prctl.c
index 895177f6bf4c8..2fc86924bf426 100644
--- a/tools/testing/selftests/riscv/vector/vstate_prctl.c
+++ b/tools/testing/selftests/riscv/vector/vstate_prctl.c
@@ -3,50 +3,13 @@
 #include <unistd.h>
 #include <errno.h>
 #include <sys/wait.h>
+#include <sys/types.h>
+#include <stdlib.h>
 
-#include "../hwprobe/hwprobe.h"
-#include "../../kselftest.h"
+#include "../../kselftest_harness.h"
+#include "v_helpers.h"
 
 #define NEXT_PROGRAM "./vstate_exec_nolibc"
-static int launch_test(int test_inherit)
-{
-	char *exec_argv[3], *exec_envp[1];
-	int rc, pid, status;
-
-	pid = fork();
-	if (pid < 0) {
-		ksft_test_result_fail("fork failed %d", pid);
-		return -1;
-	}
-
-	if (!pid) {
-		exec_argv[0] = NEXT_PROGRAM;
-		exec_argv[1] = test_inherit != 0 ? "x" : NULL;
-		exec_argv[2] = NULL;
-		exec_envp[0] = NULL;
-		/* launch the program again to check inherit */
-		rc = execve(NEXT_PROGRAM, exec_argv, exec_envp);
-		if (rc) {
-			perror("execve");
-			ksft_test_result_fail("child execve failed %d\n", rc);
-			exit(-1);
-		}
-	}
-
-	rc = waitpid(-1, &status, 0);
-	if (rc < 0) {
-		ksft_test_result_fail("waitpid failed\n");
-		return -3;
-	}
-
-	if ((WIFEXITED(status) && WEXITSTATUS(status) == -1) ||
-	    WIFSIGNALED(status)) {
-		ksft_test_result_fail("child exited abnormally\n");
-		return -4;
-	}
-
-	return WEXITSTATUS(status);
-}
 
 int test_and_compare_child(long provided, long expected, int inherit)
 {
@@ -54,128 +17,203 @@ int test_and_compare_child(long provided, long expected, int inherit)
 
 	rc = prctl(PR_RISCV_V_SET_CONTROL, provided);
 	if (rc != 0) {
-		ksft_test_result_fail("prctl with provided arg %lx failed with code %d\n",
-				      provided, rc);
+		printf("prctl with provided arg %lx failed with code %d\n",
+		       provided, rc);
 		return -1;
 	}
-	rc = launch_test(inherit);
+	rc = launch_test(NEXT_PROGRAM, inherit);
 	if (rc != expected) {
-		ksft_test_result_fail("Test failed, check %d != %ld\n", rc,
-				      expected);
+		printf("Test failed, check %d != %ld\n", rc, expected);
 		return -2;
 	}
 	return 0;
 }
 
-#define PR_RISCV_V_VSTATE_CTRL_CUR_SHIFT	0
-#define PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT	2
+#define PR_RISCV_V_VSTATE_CTRL_CUR_SHIFT 0
+#define PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT 2
 
-int main(void)
+TEST(get_control_no_v)
 {
-	struct riscv_hwprobe pair;
-	long flag, expected;
 	long rc;
 
-	pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
-	rc = riscv_hwprobe(&pair, 1, 0, NULL, 0);
-	if (rc < 0) {
-		ksft_test_result_fail("hwprobe() failed with %ld\n", rc);
-		return -1;
-	}
+	if (is_vector_supported())
+		SKIP(return, "Test expects vector to be not supported");
 
-	if (pair.key != RISCV_HWPROBE_KEY_IMA_EXT_0) {
-		ksft_test_result_fail("hwprobe cannot probe RISCV_HWPROBE_KEY_IMA_EXT_0\n");
-		return -2;
-	}
+	rc = prctl(PR_RISCV_V_GET_CONTROL);
+	EXPECT_EQ(-1, rc)
+	TH_LOG("GET_CONTROL should fail on kernel/hw without ZVE32X");
+	EXPECT_EQ(EINVAL, errno)
+	TH_LOG("GET_CONTROL should fail on kernel/hw without ZVE32X");
+}
 
-	if (!(pair.value & RISCV_HWPROBE_EXT_ZVE32X)) {
-		rc = prctl(PR_RISCV_V_GET_CONTROL);
-		if (rc != -1 || errno != EINVAL) {
-			ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without ZVE32X\n");
-			return -3;
-		}
-
-		rc = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON);
-		if (rc != -1 || errno != EINVAL) {
-			ksft_test_result_fail("SET_CONTROL should fail on kernel/hw without ZVE32X\n");
-			return -4;
-		}
-
-		ksft_test_result_skip("Vector not supported\n");
-		return 0;
-	}
+TEST(set_control_no_v)
+{
+	long rc;
+
+	if (is_vector_supported())
+		SKIP(return, "Test expects vector to be not supported");
+
+	rc = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON);
+	EXPECT_EQ(-1, rc)
+	TH_LOG("SET_CONTROL should fail on kernel/hw without ZVE32X");
+	EXPECT_EQ(EINVAL, errno)
+	TH_LOG("SET_CONTROL should fail on kernel/hw without ZVE32X");
+}
+
+TEST(vstate_on_current)
+{
+	long flag;
+	long rc;
+
+	if (!is_vector_supported())
+		SKIP(return, "Vector not supported");
 
 	flag = PR_RISCV_V_VSTATE_CTRL_ON;
 	rc = prctl(PR_RISCV_V_SET_CONTROL, flag);
-	if (rc != 0) {
-		ksft_test_result_fail("Enabling V for current should always success\n");
-		return -5;
-	}
+	EXPECT_EQ(0, rc) TH_LOG("Enabling V for current should always success");
+}
+
+TEST(vstate_off_eperm)
+{
+	long flag;
+	long rc;
+
+	if (!is_vector_supported())
+		SKIP(return, "Vector not supported");
 
 	flag = PR_RISCV_V_VSTATE_CTRL_OFF;
 	rc = prctl(PR_RISCV_V_SET_CONTROL, flag);
-	if (rc != -1 || errno != EPERM) {
-		ksft_test_result_fail("Disabling current's V alive must fail with EPERM(%d)\n",
-				      errno);
-		return -5;
-	}
+	EXPECT_EQ(EPERM, errno)
+	TH_LOG("Disabling V in current thread with V enabled must fail with EPERM(%d)", errno);
+	EXPECT_EQ(-1, rc)
+	TH_LOG("Disabling V in current thread with V enabled must fail with EPERM(%d)", errno);
+}
+
+TEST(vstate_on_no_nesting)
+{
+	long flag;
+
+	if (!is_vector_supported())
+		SKIP(return, "Vector not supported");
 
 	/* Turn on next's vector explicitly and test */
 	flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
-	if (test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_ON, 0))
-		return -6;
+
+	EXPECT_EQ(0,
+		  test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_ON, 0));
+}
+
+TEST(vstate_off_nesting)
+{
+	long flag;
+
+	if (!is_vector_supported())
+		SKIP(return, "Vector not supported");
 
 	/* Turn off next's vector explicitly and test */
 	flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
-	if (test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_OFF, 0))
-		return -7;
+
+	EXPECT_EQ(0,
+		  test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_OFF, 1));
+}
+
+TEST(vstate_on_inherit_no_nesting)
+{
+	long flag, expected;
+
+	if (!is_vector_supported())
+		SKIP(return, "Vector not supported");
+
+	/* Turn on next's vector explicitly and test no inherit */
+	flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
+	flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
+	expected = flag | PR_RISCV_V_VSTATE_CTRL_ON;
+
+	EXPECT_EQ(0, test_and_compare_child(flag, expected, 0));
+}
+
+TEST(vstate_on_inherit)
+{
+	long flag, expected;
+
+	if (!is_vector_supported())
+		SKIP(return, "Vector not supported");
 
 	/* Turn on next's vector explicitly and test inherit */
 	flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
 	flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
 	expected = flag | PR_RISCV_V_VSTATE_CTRL_ON;
-	if (test_and_compare_child(flag, expected, 0))
-		return -8;
 
-	if (test_and_compare_child(flag, expected, 1))
-		return -9;
+	EXPECT_EQ(0, test_and_compare_child(flag, expected, 1));
+}
+
+TEST(vstate_off_inherit_no_nesting)
+{
+	long flag, expected;
+
+	if (!is_vector_supported())
+		SKIP(return, "Vector not supported");
+
+	/* Turn off next's vector explicitly and test no inherit */
+	flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
+	flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
+	expected = flag | PR_RISCV_V_VSTATE_CTRL_OFF;
+
+	EXPECT_EQ(0, test_and_compare_child(flag, expected, 0));
+}
+
+TEST(vstate_off_inherit)
+{
+	long flag, expected;
+
+	if (!is_vector_supported())
+		SKIP(return, "Vector not supported");
 
 	/* Turn off next's vector explicitly and test inherit */
 	flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
 	flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
 	expected = flag | PR_RISCV_V_VSTATE_CTRL_OFF;
-	if (test_and_compare_child(flag, expected, 0))
-		return -10;
 
-	if (test_and_compare_child(flag, expected, 1))
-		return -11;
+	EXPECT_EQ(0, test_and_compare_child(flag, expected, 1));
+}
+
+/* arguments should fail with EINVAL */
+TEST(inval_set_control_1)
+{
+	int rc;
+
+	if (!is_vector_supported())
+		SKIP(return, "Vector not supported");
 
-	/* arguments should fail with EINVAL */
 	rc = prctl(PR_RISCV_V_SET_CONTROL, 0xff0);
-	if (rc != -1 || errno != EINVAL) {
-		ksft_test_result_fail("Undefined control argument should return EINVAL\n");
-		return -12;
-	}
+	EXPECT_EQ(-1, rc);
+	EXPECT_EQ(EINVAL, errno);
+}
+
+/* arguments should fail with EINVAL */
+TEST(inval_set_control_2)
+{
+	int rc;
+
+	if (!is_vector_supported())
+		SKIP(return, "Vector not supported");
 
 	rc = prctl(PR_RISCV_V_SET_CONTROL, 0x3);
-	if (rc != -1 || errno != EINVAL) {
-		ksft_test_result_fail("Undefined control argument should return EINVAL\n");
-		return -12;
-	}
+	EXPECT_EQ(-1, rc);
+	EXPECT_EQ(EINVAL, errno);
+}
 
-	rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc);
-	if (rc != -1 || errno != EINVAL) {
-		ksft_test_result_fail("Undefined control argument should return EINVAL\n");
-		return -12;
-	}
+/* arguments should fail with EINVAL */
+TEST(inval_set_control_3)
+{
+	int rc;
 
-	rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc);
-	if (rc != -1 || errno != EINVAL) {
-		ksft_test_result_fail("Undefined control argument should return EINVAL\n");
-		return -12;
-	}
+	if (!is_vector_supported())
+		SKIP(return, "Vector not supported");
 
-	ksft_test_result_pass("tests for riscv_v_vstate_ctrl pass\n");
-	ksft_exit_pass();
-	return 0;
+	rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc);
+	EXPECT_EQ(-1, rc);
+	EXPECT_EQ(EINVAL, errno);
 }
+
+TEST_HARNESS_MAIN

From c384c5d4a2aed5b6a10de1fcc2f5b46ad4aeeea8 Mon Sep 17 00:00:00 2001
From: Charlie Jenkins <charlie@rivosinc.com>
Date: Wed, 13 Nov 2024 18:21:19 -0800
Subject: [PATCH 128/368] selftests: riscv: Support xtheadvector in vector
 tests

Extend existing vector tests to be compatible with the xtheadvector
instructions.

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
Tested-by: Yangyu Chen <cyy@cyyself.name>
Link: https://lore.kernel.org/r/20241113-xtheadvector-v11-13-236c22791ef9@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 .../riscv/vector/v_exec_initval_nolibc.c      | 23 +++--
 .../selftests/riscv/vector/v_helpers.c        | 17 +++-
 .../selftests/riscv/vector/v_helpers.h        |  4 +-
 .../selftests/riscv/vector/v_initval.c        | 12 ++-
 .../riscv/vector/vstate_exec_nolibc.c         | 20 +++--
 .../selftests/riscv/vector/vstate_prctl.c     | 89 ++++++++++++-------
 6 files changed, 113 insertions(+), 52 deletions(-)

diff --git a/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c
index 4a39cab29c34d..35c0812e32de0 100644
--- a/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c
+++ b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c
@@ -18,13 +18,22 @@ int main(int argc, char **argv)
 	unsigned long vl;
 	int first = 1;
 
-	asm volatile (
-		".option push\n\t"
-		".option arch, +v\n\t"
-		"vsetvli	%[vl], x0, e8, m1, ta, ma\n\t"
-		".option pop\n\t"
-		: [vl] "=r" (vl)
-	);
+	if (argc > 2 && strcmp(argv[2], "x"))
+		asm volatile (
+			// 0 | zimm[10:0] | rs1 | 1 1 1 | rd |1010111| vsetvli
+			// vsetvli	t4, x0, e8, m1, d1
+			".4byte		0b00000000000000000111111011010111\n\t"
+			"mv		%[vl], t4\n\t"
+			: [vl] "=r" (vl) : : "t4"
+		);
+	else
+		asm volatile (
+			".option push\n\t"
+			".option arch, +v\n\t"
+			"vsetvli	%[vl], x0, e8, m1, ta, ma\n\t"
+			".option pop\n\t"
+			: [vl] "=r" (vl)
+		);
 
 #define CHECK_VECTOR_REGISTER(register) ({					\
 	for (int i = 0; i < vl; i++) {						\
diff --git a/tools/testing/selftests/riscv/vector/v_helpers.c b/tools/testing/selftests/riscv/vector/v_helpers.c
index d50f4dfbf9e56..01a8799dcb786 100644
--- a/tools/testing/selftests/riscv/vector/v_helpers.c
+++ b/tools/testing/selftests/riscv/vector/v_helpers.c
@@ -1,12 +1,22 @@
 // SPDX-License-Identifier: GPL-2.0-only
 
 #include "../hwprobe/hwprobe.h"
+#include <asm/vendor/thead.h>
 #include <stdbool.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <unistd.h>
 #include <sys/wait.h>
 
+bool is_xtheadvector_supported(void)
+{
+	struct riscv_hwprobe pair;
+
+	pair.key = RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0;
+	riscv_hwprobe(&pair, 1, 0, NULL, 0);
+	return pair.value & RISCV_HWPROBE_VENDOR_EXT_XTHEADVECTOR;
+}
+
 bool is_vector_supported(void)
 {
 	struct riscv_hwprobe pair;
@@ -16,9 +26,9 @@ bool is_vector_supported(void)
 	return pair.value & RISCV_HWPROBE_EXT_ZVE32X;
 }
 
-int launch_test(char *next_program, int test_inherit)
+int launch_test(char *next_program, int test_inherit, int xtheadvector)
 {
-	char *exec_argv[3], *exec_envp[1];
+	char *exec_argv[4], *exec_envp[1];
 	int rc, pid, status;
 
 	pid = fork();
@@ -30,7 +40,8 @@ int launch_test(char *next_program, int test_inherit)
 	if (!pid) {
 		exec_argv[0] = next_program;
 		exec_argv[1] = test_inherit != 0 ? "x" : NULL;
-		exec_argv[2] = NULL;
+		exec_argv[2] = xtheadvector != 0 ? "x" : NULL;
+		exec_argv[3] = NULL;
 		exec_envp[0] = NULL;
 		/* launch the program again to check inherit */
 		rc = execve(next_program, exec_argv, exec_envp);
diff --git a/tools/testing/selftests/riscv/vector/v_helpers.h b/tools/testing/selftests/riscv/vector/v_helpers.h
index faeeeb625b6ee..763cddfe26dad 100644
--- a/tools/testing/selftests/riscv/vector/v_helpers.h
+++ b/tools/testing/selftests/riscv/vector/v_helpers.h
@@ -1,6 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 #include <stdbool.h>
 
+bool is_xtheadvector_supported(void);
+
 bool is_vector_supported(void);
 
-int launch_test(char *next_program, int test_inherit);
+int launch_test(char *next_program, int test_inherit, int xtheadvector);
diff --git a/tools/testing/selftests/riscv/vector/v_initval.c b/tools/testing/selftests/riscv/vector/v_initval.c
index f38b5797fa317..be9e1d18ad295 100644
--- a/tools/testing/selftests/riscv/vector/v_initval.c
+++ b/tools/testing/selftests/riscv/vector/v_initval.c
@@ -7,10 +7,16 @@
 
 TEST(v_initval)
 {
-	if (!is_vector_supported())
-		SKIP(return, "Vector not supported");
+	int xtheadvector = 0;
 
-	ASSERT_EQ(0, launch_test(NEXT_PROGRAM, 0));
+	if (!is_vector_supported()) {
+		if (is_xtheadvector_supported())
+			xtheadvector = 1;
+		else
+			SKIP(return, "Vector not supported");
+	}
+
+	ASSERT_EQ(0, launch_test(NEXT_PROGRAM, 0, xtheadvector));
 }
 
 TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
index 1f9969bed2355..7b7d6f21acb45 100644
--- a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
+++ b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
@@ -6,13 +6,16 @@
 
 int main(int argc, char **argv)
 {
-	int rc, pid, status, test_inherit = 0;
+	int rc, pid, status, test_inherit = 0, xtheadvector = 0;
 	long ctrl, ctrl_c;
 	char *exec_argv[2], *exec_envp[2];
 
-	if (argc > 1)
+	if (argc > 1 && strcmp(argv[1], "x"))
 		test_inherit = 1;
 
+	if (argc > 2 && strcmp(argv[2], "x"))
+		xtheadvector = 1;
+
 	ctrl = my_syscall1(__NR_prctl, PR_RISCV_V_GET_CONTROL);
 	if (ctrl < 0) {
 		puts("PR_RISCV_V_GET_CONTROL is not supported\n");
@@ -53,11 +56,14 @@ int main(int argc, char **argv)
 				puts("child's vstate_ctrl not equal to parent's\n");
 				exit(-1);
 			}
-			asm volatile (".option push\n\t"
-				      ".option arch, +v\n\t"
-				      "vsetvli x0, x0, e32, m8, ta, ma\n\t"
-				      ".option pop\n\t"
-				      );
+			if (xtheadvector)
+				asm volatile (".4byte	0x00007ed7");
+			else
+				asm volatile (".option push\n\t"
+					".option arch, +v\n\t"
+					"vsetvli x0, x0, e32, m8, ta, ma\n\t"
+					".option pop\n\t"
+					);
 			exit(ctrl);
 		}
 	}
diff --git a/tools/testing/selftests/riscv/vector/vstate_prctl.c b/tools/testing/selftests/riscv/vector/vstate_prctl.c
index 2fc86924bf426..62fbb17a05566 100644
--- a/tools/testing/selftests/riscv/vector/vstate_prctl.c
+++ b/tools/testing/selftests/riscv/vector/vstate_prctl.c
@@ -11,7 +11,7 @@
 
 #define NEXT_PROGRAM "./vstate_exec_nolibc"
 
-int test_and_compare_child(long provided, long expected, int inherit)
+int test_and_compare_child(long provided, long expected, int inherit, int xtheadvector)
 {
 	int rc;
 
@@ -21,7 +21,7 @@ int test_and_compare_child(long provided, long expected, int inherit)
 		       provided, rc);
 		return -1;
 	}
-	rc = launch_test(NEXT_PROGRAM, inherit);
+	rc = launch_test(NEXT_PROGRAM, inherit, xtheadvector);
 	if (rc != expected) {
 		printf("Test failed, check %d != %ld\n", rc, expected);
 		return -2;
@@ -36,7 +36,7 @@ TEST(get_control_no_v)
 {
 	long rc;
 
-	if (is_vector_supported())
+	if (is_vector_supported() || is_xtheadvector_supported())
 		SKIP(return, "Test expects vector to be not supported");
 
 	rc = prctl(PR_RISCV_V_GET_CONTROL);
@@ -50,7 +50,7 @@ TEST(set_control_no_v)
 {
 	long rc;
 
-	if (is_vector_supported())
+	if (is_vector_supported() || is_xtheadvector_supported())
 		SKIP(return, "Test expects vector to be not supported");
 
 	rc = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON);
@@ -65,12 +65,12 @@ TEST(vstate_on_current)
 	long flag;
 	long rc;
 
-	if (!is_vector_supported())
+	if (!is_vector_supported() && !is_xtheadvector_supported())
 		SKIP(return, "Vector not supported");
 
 	flag = PR_RISCV_V_VSTATE_CTRL_ON;
 	rc = prctl(PR_RISCV_V_SET_CONTROL, flag);
-	EXPECT_EQ(0, rc) TH_LOG("Enabling V for current should always success");
+	EXPECT_EQ(0, rc) TH_LOG("Enabling V for current should always succeed");
 }
 
 TEST(vstate_off_eperm)
@@ -78,7 +78,7 @@ TEST(vstate_off_eperm)
 	long flag;
 	long rc;
 
-	if (!is_vector_supported())
+	if (!is_vector_supported() && !is_xtheadvector_supported())
 		SKIP(return, "Vector not supported");
 
 	flag = PR_RISCV_V_VSTATE_CTRL_OFF;
@@ -92,89 +92,116 @@ TEST(vstate_off_eperm)
 TEST(vstate_on_no_nesting)
 {
 	long flag;
+	int xtheadvector = 0;
 
-	if (!is_vector_supported())
-		SKIP(return, "Vector not supported");
+	if (!is_vector_supported()) {
+		if (is_xtheadvector_supported())
+			xtheadvector = 1;
+		else
+			SKIP(return, "Vector not supported");
+	}
 
 	/* Turn on next's vector explicitly and test */
 	flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
 
-	EXPECT_EQ(0,
-		  test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_ON, 0));
+	EXPECT_EQ(0, test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_ON, 0, xtheadvector));
 }
 
 TEST(vstate_off_nesting)
 {
 	long flag;
+	int xtheadvector = 0;
 
-	if (!is_vector_supported())
-		SKIP(return, "Vector not supported");
+	if (!is_vector_supported()) {
+		if (is_xtheadvector_supported())
+			xtheadvector = 1;
+		else
+			SKIP(return, "Vector not supported");
+	}
 
 	/* Turn off next's vector explicitly and test */
 	flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
 
-	EXPECT_EQ(0,
-		  test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_OFF, 1));
+	EXPECT_EQ(0, test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_OFF, 1, xtheadvector));
 }
 
 TEST(vstate_on_inherit_no_nesting)
 {
 	long flag, expected;
+	int xtheadvector = 0;
 
-	if (!is_vector_supported())
-		SKIP(return, "Vector not supported");
+	if (!is_vector_supported()) {
+		if (is_xtheadvector_supported())
+			xtheadvector = 1;
+		else
+			SKIP(return, "Vector not supported");
+	}
 
 	/* Turn on next's vector explicitly and test no inherit */
 	flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
 	flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
 	expected = flag | PR_RISCV_V_VSTATE_CTRL_ON;
 
-	EXPECT_EQ(0, test_and_compare_child(flag, expected, 0));
+	EXPECT_EQ(0, test_and_compare_child(flag, expected, 0, xtheadvector));
 }
 
 TEST(vstate_on_inherit)
 {
 	long flag, expected;
+	int xtheadvector = 0;
 
-	if (!is_vector_supported())
-		SKIP(return, "Vector not supported");
+	if (!is_vector_supported()) {
+		if (is_xtheadvector_supported())
+			xtheadvector = 1;
+		else
+			SKIP(return, "Vector not supported");
+	}
 
 	/* Turn on next's vector explicitly and test inherit */
 	flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
 	flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
 	expected = flag | PR_RISCV_V_VSTATE_CTRL_ON;
 
-	EXPECT_EQ(0, test_and_compare_child(flag, expected, 1));
+	EXPECT_EQ(0, test_and_compare_child(flag, expected, 1, xtheadvector));
 }
 
 TEST(vstate_off_inherit_no_nesting)
 {
 	long flag, expected;
+	int xtheadvector = 0;
 
-	if (!is_vector_supported())
-		SKIP(return, "Vector not supported");
-
+	if (!is_vector_supported()) {
+		if (is_xtheadvector_supported())
+			xtheadvector = 1;
+		else
+			SKIP(return, "Vector not supported");
+	}
 	/* Turn off next's vector explicitly and test no inherit */
 	flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
 	flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
 	expected = flag | PR_RISCV_V_VSTATE_CTRL_OFF;
 
-	EXPECT_EQ(0, test_and_compare_child(flag, expected, 0));
+	EXPECT_EQ(0, test_and_compare_child(flag, expected, 0, xtheadvector));
 }
 
 TEST(vstate_off_inherit)
 {
 	long flag, expected;
+	int xtheadvector = 0;
 
-	if (!is_vector_supported())
-		SKIP(return, "Vector not supported");
+	if (!is_vector_supported()) {
+		if (is_xtheadvector_supported())
+			xtheadvector = 1;
+		else
+			SKIP(return, "Vector not supported");
+	}
 
 	/* Turn off next's vector explicitly and test inherit */
 	flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
 	flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
 	expected = flag | PR_RISCV_V_VSTATE_CTRL_OFF;
 
-	EXPECT_EQ(0, test_and_compare_child(flag, expected, 1));
+	EXPECT_EQ(0, test_and_compare_child(flag, expected, 1, xtheadvector));
 }
 
 /* arguments should fail with EINVAL */
@@ -182,7 +209,7 @@ TEST(inval_set_control_1)
 {
 	int rc;
 
-	if (!is_vector_supported())
+	if (!is_vector_supported() && !is_xtheadvector_supported())
 		SKIP(return, "Vector not supported");
 
 	rc = prctl(PR_RISCV_V_SET_CONTROL, 0xff0);
@@ -195,7 +222,7 @@ TEST(inval_set_control_2)
 {
 	int rc;
 
-	if (!is_vector_supported())
+	if (!is_vector_supported() && !is_xtheadvector_supported())
 		SKIP(return, "Vector not supported");
 
 	rc = prctl(PR_RISCV_V_SET_CONTROL, 0x3);
@@ -208,7 +235,7 @@ TEST(inval_set_control_3)
 {
 	int rc;
 
-	if (!is_vector_supported())
+	if (!is_vector_supported() && !is_xtheadvector_supported())
 		SKIP(return, "Vector not supported");
 
 	rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc);

From 4bf97069239bcfca9840936313c7ac35a6e04488 Mon Sep 17 00:00:00 2001
From: Charlie Jenkins <charlie@rivosinc.com>
Date: Wed, 13 Nov 2024 18:21:20 -0800
Subject: [PATCH 129/368] riscv: Add ghostwrite vulnerability

Follow the patterns of the other architectures that use
GENERIC_CPU_VULNERABILITIES for riscv to introduce the ghostwrite
vulnerability and mitigation. The mitigation is to disable all vector
which is accomplished by clearing the bit from the cpufeature field.

Ghostwrite only affects thead c9xx CPUs that impelment xtheadvector, so
the vulerability will only be mitigated on these CPUs.

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
Tested-by: Yangyu Chen <cyy@cyyself.name>
Link: https://lore.kernel.org/r/20241113-xtheadvector-v11-14-236c22791ef9@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/Kconfig.errata            | 11 +++++
 arch/riscv/errata/thead/errata.c     | 28 +++++++++++++
 arch/riscv/include/asm/bugs.h        | 22 ++++++++++
 arch/riscv/include/asm/errata_list.h |  3 +-
 arch/riscv/kernel/Makefile           |  2 +
 arch/riscv/kernel/bugs.c             | 60 ++++++++++++++++++++++++++++
 arch/riscv/kernel/cpufeature.c       | 10 ++++-
 drivers/base/cpu.c                   |  3 ++
 include/linux/cpu.h                  |  1 +
 9 files changed, 138 insertions(+), 2 deletions(-)
 create mode 100644 arch/riscv/include/asm/bugs.h
 create mode 100644 arch/riscv/kernel/bugs.c

diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata
index 2acc7d876e1fb..e318119d570de 100644
--- a/arch/riscv/Kconfig.errata
+++ b/arch/riscv/Kconfig.errata
@@ -119,4 +119,15 @@ config ERRATA_THEAD_PMU
 
 	  If you don't know what to do here, say "Y".
 
+config ERRATA_THEAD_GHOSTWRITE
+	bool "Apply T-Head Ghostwrite errata"
+	depends on ERRATA_THEAD && RISCV_ISA_XTHEADVECTOR
+	default y
+	help
+	  The T-Head C9xx cores have a vulnerability in the xtheadvector
+	  instruction set. When this errata is enabled, the CPUs will be probed
+	  to determine if they are vulnerable and disable xtheadvector.
+
+	  If you don't know what to do here, say "Y".
+
 endmenu # "CPU errata selection"
diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
index f5120e07c3182..5cc008ab41a87 100644
--- a/arch/riscv/errata/thead/errata.c
+++ b/arch/riscv/errata/thead/errata.c
@@ -10,6 +10,7 @@
 #include <linux/string.h>
 #include <linux/uaccess.h>
 #include <asm/alternative.h>
+#include <asm/bugs.h>
 #include <asm/cacheflush.h>
 #include <asm/cpufeature.h>
 #include <asm/dma-noncoherent.h>
@@ -142,6 +143,31 @@ static bool errata_probe_pmu(unsigned int stage,
 	return true;
 }
 
+static bool errata_probe_ghostwrite(unsigned int stage,
+				    unsigned long arch_id, unsigned long impid)
+{
+	if (!IS_ENABLED(CONFIG_ERRATA_THEAD_GHOSTWRITE))
+		return false;
+
+	/*
+	 * target-c9xx cores report arch_id and impid as 0
+	 *
+	 * While ghostwrite may not affect all c9xx cores that implement
+	 * xtheadvector, there is no futher granularity than c9xx. Assume
+	 * vulnerable for this entire class of processors when xtheadvector is
+	 * enabled.
+	 */
+	if (arch_id != 0 || impid != 0)
+		return false;
+
+	if (stage != RISCV_ALTERNATIVES_EARLY_BOOT)
+		return false;
+
+	ghostwrite_set_vulnerable();
+
+	return true;
+}
+
 static u32 thead_errata_probe(unsigned int stage,
 			      unsigned long archid, unsigned long impid)
 {
@@ -155,6 +181,8 @@ static u32 thead_errata_probe(unsigned int stage,
 	if (errata_probe_pmu(stage, archid, impid))
 		cpu_req_errata |= BIT(ERRATA_THEAD_PMU);
 
+	errata_probe_ghostwrite(stage, archid, impid);
+
 	return cpu_req_errata;
 }
 
diff --git a/arch/riscv/include/asm/bugs.h b/arch/riscv/include/asm/bugs.h
new file mode 100644
index 0000000000000..17ca0a9477307
--- /dev/null
+++ b/arch/riscv/include/asm/bugs.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Interface for managing mitigations for riscv vulnerabilities.
+ *
+ * Copyright (C) 2024 Rivos Inc.
+ */
+
+#ifndef __ASM_BUGS_H
+#define __ASM_BUGS_H
+
+/* Watch out, ordering is important here. */
+enum mitigation_state {
+	UNAFFECTED,
+	MITIGATED,
+	VULNERABLE,
+};
+
+void ghostwrite_set_vulnerable(void);
+bool ghostwrite_enable_mitigation(void);
+enum mitigation_state ghostwrite_get_state(void);
+
+#endif /* __ASM_BUGS_H */
diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
index 7c8a71a526a30..6e426ed7919a4 100644
--- a/arch/riscv/include/asm/errata_list.h
+++ b/arch/riscv/include/asm/errata_list.h
@@ -25,7 +25,8 @@
 #ifdef CONFIG_ERRATA_THEAD
 #define	ERRATA_THEAD_MAE 0
 #define	ERRATA_THEAD_PMU 1
-#define	ERRATA_THEAD_NUMBER 2
+#define	ERRATA_THEAD_GHOSTWRITE 2
+#define	ERRATA_THEAD_NUMBER 3
 #endif
 
 #ifdef __ASSEMBLY__
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 30db92672ada5..d73f04c6c5637 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -118,3 +118,5 @@ obj-$(CONFIG_COMPAT)		+= compat_vdso/
 obj-$(CONFIG_64BIT)		+= pi/
 obj-$(CONFIG_ACPI)		+= acpi.o
 obj-$(CONFIG_ACPI_NUMA)	+= acpi_numa.o
+
+obj-$(CONFIG_GENERIC_CPU_VULNERABILITIES) += bugs.o
diff --git a/arch/riscv/kernel/bugs.c b/arch/riscv/kernel/bugs.c
new file mode 100644
index 0000000000000..3655fe7d678cd
--- /dev/null
+++ b/arch/riscv/kernel/bugs.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Rivos Inc.
+ */
+
+#include <linux/cpu.h>
+#include <linux/device.h>
+#include <linux/sprintf.h>
+
+#include <asm/bugs.h>
+#include <asm/vendor_extensions/thead.h>
+
+static enum mitigation_state ghostwrite_state;
+
+void ghostwrite_set_vulnerable(void)
+{
+	ghostwrite_state = VULNERABLE;
+}
+
+/*
+ * Vendor extension alternatives will use the value set at the time of boot
+ * alternative patching, thus this must be called before boot alternatives are
+ * patched (and after extension probing) to be effective.
+ *
+ * Returns true if mitgated, false otherwise.
+ */
+bool ghostwrite_enable_mitigation(void)
+{
+	if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR) &&
+	    ghostwrite_state == VULNERABLE && !cpu_mitigations_off()) {
+		disable_xtheadvector();
+		ghostwrite_state = MITIGATED;
+		return true;
+	}
+
+	return false;
+}
+
+enum mitigation_state ghostwrite_get_state(void)
+{
+	return ghostwrite_state;
+}
+
+ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) {
+		switch (ghostwrite_state) {
+		case UNAFFECTED:
+			return sprintf(buf, "Not affected\n");
+		case MITIGATED:
+			return sprintf(buf, "Mitigation: xtheadvector disabled\n");
+		case VULNERABLE:
+			fallthrough;
+		default:
+			return sprintf(buf, "Vulnerable\n");
+		}
+	} else {
+		return sprintf(buf, "Not affected\n");
+	}
+}
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index ba6976132638c..35670c96b3830 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -17,6 +17,7 @@
 #include <linux/of.h>
 #include <asm/acpi.h>
 #include <asm/alternative.h>
+#include <asm/bugs.h>
 #include <asm/cacheflush.h>
 #include <asm/cpufeature.h>
 #include <asm/hwcap.h>
@@ -824,6 +825,7 @@ static int has_thead_homogeneous_vlenb(void)
 static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap)
 {
 	unsigned int cpu;
+	bool mitigated;
 
 	for_each_possible_cpu(cpu) {
 		unsigned long this_hwcap = 0;
@@ -874,7 +876,13 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap)
 		riscv_fill_vendor_ext_list(cpu);
 	}
 
-	if (has_xtheadvector_no_alternatives() && has_thead_homogeneous_vlenb() < 0) {
+	/*
+	 * Execute ghostwrite mitigation immediately after detecting extensions
+	 * to disable xtheadvector if necessary.
+	 */
+	mitigated = ghostwrite_enable_mitigation();
+
+	if (!mitigated && has_xtheadvector_no_alternatives() && has_thead_homogeneous_vlenb() < 0) {
 		pr_warn("Unsupported heterogeneous vlenb detected, vector extension disabled.\n");
 		disable_xtheadvector();
 	}
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index fdaa24bb641a0..a7e5118498758 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -599,6 +599,7 @@ CPU_SHOW_VULN_FALLBACK(retbleed);
 CPU_SHOW_VULN_FALLBACK(spec_rstack_overflow);
 CPU_SHOW_VULN_FALLBACK(gds);
 CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling);
+CPU_SHOW_VULN_FALLBACK(ghostwrite);
 
 static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
 static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
@@ -614,6 +615,7 @@ static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL);
 static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL);
 static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL);
 static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL);
+static DEVICE_ATTR(ghostwrite, 0444, cpu_show_ghostwrite, NULL);
 
 static struct attribute *cpu_root_vulnerabilities_attrs[] = {
 	&dev_attr_meltdown.attr,
@@ -630,6 +632,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
 	&dev_attr_spec_rstack_overflow.attr,
 	&dev_attr_gather_data_sampling.attr,
 	&dev_attr_reg_file_data_sampling.attr,
+	&dev_attr_ghostwrite.attr,
 	NULL
 };
 
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index bdcec17324452..6a0a8f1c7c903 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -77,6 +77,7 @@ extern ssize_t cpu_show_gds(struct device *dev,
 			    struct device_attribute *attr, char *buf);
 extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev,
 					       struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf);
 
 extern __printf(4, 5)
 struct device *cpu_device_create(struct device *parent, void *drvdata,

From b6de116e4636e1a45e5ee69264a66cfab721e581 Mon Sep 17 00:00:00 2001
From: Yunhui Cui <cuiyunhui@bytedance.com>
Date: Tue, 23 Jul 2024 10:18:20 +0800
Subject: [PATCH 130/368] riscv/mm/fault: add show_pte() before die()

When the kernel displays "Unable to handle kernel paging request at
virtual address", we would like to confirm the status of the virtual
address in the page table. So add show_pte() before die().

Signed-off-by: Yunhui Cui <cuiyunhui@bytedance.com>
Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Link: https://lore.kernel.org/r/20240723021820.87718-1-cuiyunhui@bytedance.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/mm/fault.c | 52 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index a9f2b4af8f3f1..0194324a0c506 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -22,6 +22,57 @@
 
 #include "../kernel/head.h"
 
+static void show_pte(unsigned long addr)
+{
+	pgd_t *pgdp, pgd;
+	p4d_t *p4dp, p4d;
+	pud_t *pudp, pud;
+	pmd_t *pmdp, pmd;
+	pte_t *ptep, pte;
+	struct mm_struct *mm = current->mm;
+
+	if (!mm)
+		mm = &init_mm;
+
+	pr_alert("Current %s pgtable: %luK pagesize, %d-bit VAs, pgdp=0x%016llx\n",
+		 current->comm, PAGE_SIZE / SZ_1K, VA_BITS,
+		 mm == &init_mm ? (u64)__pa_symbol(mm->pgd) : virt_to_phys(mm->pgd));
+
+	pgdp = pgd_offset(mm, addr);
+	pgd = pgdp_get(pgdp);
+	pr_alert("[%016lx] pgd=%016lx", addr, pgd_val(pgd));
+	if (pgd_none(pgd) || pgd_bad(pgd) || pgd_leaf(pgd))
+		goto out;
+
+	p4dp = p4d_offset(pgdp, addr);
+	p4d = p4dp_get(p4dp);
+	pr_cont(", p4d=%016lx", p4d_val(p4d));
+	if (p4d_none(p4d) || p4d_bad(p4d) || p4d_leaf(p4d))
+		goto out;
+
+	pudp = pud_offset(p4dp, addr);
+	pud = pudp_get(pudp);
+	pr_cont(", pud=%016lx", pud_val(pud));
+	if (pud_none(pud) || pud_bad(pud) || pud_leaf(pud))
+		goto out;
+
+	pmdp = pmd_offset(pudp, addr);
+	pmd = pmdp_get(pmdp);
+	pr_cont(", pmd=%016lx", pmd_val(pmd));
+	if (pmd_none(pmd) || pmd_bad(pmd) || pmd_leaf(pmd))
+		goto out;
+
+	ptep = pte_offset_map(pmdp, addr);
+	if (!ptep)
+		goto out;
+
+	pte = ptep_get(ptep);
+	pr_cont(", pte=%016lx", pte_val(pte));
+	pte_unmap(ptep);
+out:
+	pr_cont("\n");
+}
+
 static void die_kernel_fault(const char *msg, unsigned long addr,
 		struct pt_regs *regs)
 {
@@ -31,6 +82,7 @@ static void die_kernel_fault(const char *msg, unsigned long addr,
 		addr);
 
 	bust_spinlocks(0);
+	show_pte(addr);
 	die(regs, "Oops");
 	make_task_dead(SIGKILL);
 }

From 69146a8c893f734cefaac0af6f917f894f29077e Mon Sep 17 00:00:00 2001
From: Zhihao Cheng <chengzhihao1@huawei.com>
Date: Mon, 20 Jan 2025 12:38:24 +0800
Subject: [PATCH 131/368] ubi: ubi_get_ec_info: Fix compiling error 'cast
 specifies array type'

On risc V platform, there is a type conversion for the return value
(unsigned long type) of __untagged_addr_remote() in function
untagged_addr(). The compiler will complain when the parameter 'addr'
is an array type:
  arch/riscv/include/asm/uaccess.h:33:9: error: cast specifies array type
  (__force  __typeof__(addr))__untagged_addr_remote(current->mm, __addr)

Fix it by converting the input parameter as a pointer.

Fixes: 01099f635a4c ("ubi: Implement ioctl for detailed erase counters")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202501191405.WYnmdL0U-lkp@intel.com/
Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/ubi/cdev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 4c3e4edb68532..b700a0efaa931 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -851,7 +851,8 @@ static int ubi_get_ec_info(struct ubi_device *ubi, struct ubi_ecinfo_req __user
 		end_peb = ubi->peb_count;
 
 	/* Check access rights before filling erase_counters array */
-	if (!access_ok(ureq->erase_counters, (end_peb-req.start) * sizeof(int32_t)))
+	if (!access_ok((void __user *)ureq->erase_counters,
+		       (end_peb-req.start) * sizeof(int32_t)))
 		return -EFAULT;
 
 	/* Fill erase counter array */

From d12ca6d4c31bf974ecc80e36761488f41d05d18b Mon Sep 17 00:00:00 2001
From: Shengjiu Wang <shengjiu.wang@nxp.com>
Date: Mon, 20 Jan 2025 16:19:37 +0800
Subject: [PATCH 132/368] ASoC: fsl_asrc_m2m: only handle pairs for m2m in the
 suspend

ASRC memory to memory cases and memory to peripheral cases are
sharing the same pair pools, the pairs got for m2m suspend
function may be used for memory to peripheral, which is handled
memory to peripheral driver and can't be handled in
memory to memory suspend function.

Use the "pair->dma_buffer" as a flag for memory to memory case,
when it is allocated, handle the suspend operation for the related
pairs.

Fixes: 24a01710f627 ("ASoC: fsl_asrc_m2m: Add memory to memory function")
Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
Reviewed-by: Daniel Baluta <daniel.baluta@nxp.com>
Link: https://patch.msgid.link/20250120081938.2501554-2-shengjiu.wang@nxp.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/fsl/fsl_asrc_m2m.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/fsl/fsl_asrc_m2m.c b/sound/soc/fsl/fsl_asrc_m2m.c
index 4906843e2a8fd..ab9033ccb01e1 100644
--- a/sound/soc/fsl/fsl_asrc_m2m.c
+++ b/sound/soc/fsl/fsl_asrc_m2m.c
@@ -633,7 +633,7 @@ int fsl_asrc_m2m_suspend(struct fsl_asrc *asrc)
 
 	for (i = 0; i < PAIR_CTX_NUM; i++) {
 		pair = asrc->pair[i];
-		if (!pair)
+		if (!pair || !pair->dma_buffer[IN].area || !pair->dma_buffer[OUT].area)
 			continue;
 		if (!completion_done(&pair->complete[IN])) {
 			if (pair->dma_chan[IN])

From abe01a78bfc8be9cc025a73b991c4e77431de9de Mon Sep 17 00:00:00 2001
From: Shengjiu Wang <shengjiu.wang@nxp.com>
Date: Mon, 20 Jan 2025 16:19:38 +0800
Subject: [PATCH 133/368] ASoC: fsl_asrc_m2m: return error value in
 asrc_m2m_device_run()

The asrc_m2m_device_run() function is the main process function
of converting, the error need to be returned to user, that user
can handle error case properly.

Fixes: 24a01710f627 ("ASoC: fsl_asrc_m2m: Add memory to memory function")
Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
Reviewed-by: Daniel Baluta <daniel.baluta@nxp.com>
Link: https://patch.msgid.link/20250120081938.2501554-3-shengjiu.wang@nxp.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/fsl/fsl_asrc_m2m.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/sound/soc/fsl/fsl_asrc_m2m.c b/sound/soc/fsl/fsl_asrc_m2m.c
index ab9033ccb01e1..f46881f71e430 100644
--- a/sound/soc/fsl/fsl_asrc_m2m.c
+++ b/sound/soc/fsl/fsl_asrc_m2m.c
@@ -183,7 +183,7 @@ static int asrc_dmaconfig(struct fsl_asrc_pair *pair,
 }
 
 /* main function of converter */
-static void asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_task_runtime *task)
+static int asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_task_runtime *task)
 {
 	struct fsl_asrc *asrc = pair->asrc;
 	struct device *dev = &asrc->pdev->dev;
@@ -193,7 +193,7 @@ static void asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_tas
 	unsigned int out_dma_len;
 	unsigned int width;
 	u32 fifo_addr;
-	int ret;
+	int ret = 0;
 
 	/* set ratio mod */
 	if (asrc->m2m_set_ratio_mod) {
@@ -215,6 +215,7 @@ static void asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_tas
 	    in_buf_len > ASRC_M2M_BUFFER_SIZE ||
 	    in_buf_len % (width * pair->channels / 8)) {
 		dev_err(dev, "out buffer size is error: [%d]\n", in_buf_len);
+		ret = -EINVAL;
 		goto end;
 	}
 
@@ -245,6 +246,7 @@ static void asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_tas
 		}
 	} else if (out_dma_len > ASRC_M2M_BUFFER_SIZE) {
 		dev_err(dev, "cap buffer size error\n");
+		ret = -EINVAL;
 		goto end;
 	}
 
@@ -263,12 +265,14 @@ static void asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_tas
 
 	if (!wait_for_completion_interruptible_timeout(&pair->complete[IN], 10 * HZ)) {
 		dev_err(dev, "out DMA task timeout\n");
+		ret = -ETIMEDOUT;
 		goto end;
 	}
 
 	if (out_dma_len > 0) {
 		if (!wait_for_completion_interruptible_timeout(&pair->complete[OUT], 10 * HZ)) {
 			dev_err(dev, "cap DMA task timeout\n");
+			ret = -ETIMEDOUT;
 			goto end;
 		}
 	}
@@ -278,7 +282,7 @@ static void asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_tas
 	/* update payload length for capture */
 	task->output_size = out_dma_len;
 end:
-	return;
+	return ret;
 }
 
 static int fsl_asrc_m2m_comp_open(struct snd_compr_stream *stream)
@@ -525,9 +529,7 @@ static int fsl_asrc_m2m_comp_task_start(struct snd_compr_stream *stream,
 	struct snd_compr_runtime *runtime = stream->runtime;
 	struct fsl_asrc_pair *pair = runtime->private_data;
 
-	asrc_m2m_device_run(pair, task);
-
-	return 0;
+	return asrc_m2m_device_run(pair, task);
 }
 
 static int fsl_asrc_m2m_comp_task_stop(struct snd_compr_stream *stream,

From da8146ce615ad49ca4d873c1028b1b6fb0bba910 Mon Sep 17 00:00:00 2001
From: Zhang Yi <zhangyi@everest-semi.com>
Date: Mon, 20 Jan 2025 18:17:58 +0800
Subject: [PATCH 134/368] ASoC: codecs: ES8326: Improved PSRR

Modified configuration to improve PSSR when ES8326 is working

Signed-off-by: Zhang Yi <zhangyi@everest-semi.com>
Link: https://patch.msgid.link/20250120101758.13347-1-zhangyi@everest-semi.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/es8326.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c
index a5603b6176889..34f6eda30e198 100644
--- a/sound/soc/codecs/es8326.c
+++ b/sound/soc/codecs/es8326.c
@@ -896,7 +896,7 @@ static void es8326_jack_detect_handler(struct work_struct *work)
 			regmap_write(es8326->regmap, ES8326_INT_SOURCE,
 					(ES8326_INT_SRC_PIN9 | ES8326_INT_SRC_BUTTON));
 			regmap_write(es8326->regmap, ES8326_SYS_BIAS, 0x1f);
-			regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x0f, 0x08);
+			regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x0f, 0x0d);
 			queue_delayed_work(system_wq, &es8326->jack_detect_work,
 					msecs_to_jiffies(400));
 			es8326->hp = 1;
@@ -1008,7 +1008,7 @@ static void es8326_init(struct snd_soc_component *component)
 	struct es8326_priv *es8326 = snd_soc_component_get_drvdata(component);
 
 	regmap_write(es8326->regmap, ES8326_RESET, 0x1f);
-	regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x0E);
+	regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x3E);
 	regmap_write(es8326->regmap, ES8326_ANA_LP, 0xf0);
 	usleep_range(10000, 15000);
 	regmap_write(es8326->regmap, ES8326_HPJACK_TIMER, 0xd9);

From 425b753645767049f1a3eab02f39120c02156b72 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 13 Jan 2025 19:34:22 +0100
Subject: [PATCH 135/368] cpuidle: teo: Rearrange idle state lookup code

Rearrange code in the idle state lookup loop in teo_select() to make it
somewhat easier to follow and update comments around it.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Christian Loehle <christian.loehle@arm.com>
Tested-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
Tested-by: Christian Loehle <christian.loehle@arm.com>
Link: https://patch.msgid.link/4619938.LvFx2qVVIh@rjwysocki.net
---
 drivers/cpuidle/governors/teo.c | 34 +++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 173ddcac540ad..68af712f70647 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -367,7 +367,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	 * If the sum of the intercepts metric for all of the idle states
 	 * shallower than the current candidate one (idx) is greater than the
 	 * sum of the intercepts and hits metrics for the candidate state and
-	 * all of the deeper states a shallower idle state is likely to be a
+	 * all of the deeper states, a shallower idle state is likely to be a
 	 * better choice.
 	 */
 	prev_intercept_idx = idx;
@@ -396,30 +396,36 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 				 * first enabled state that is deep enough.
 				 */
 				if (teo_state_ok(i, drv) &&
-				    !dev->states_usage[i].disable)
+				    !dev->states_usage[i].disable) {
 					idx = i;
-				else
-					idx = first_suitable_idx;
-
+					break;
+				}
+				idx = first_suitable_idx;
 				break;
 			}
 
 			if (dev->states_usage[i].disable)
 				continue;
 
-			if (!teo_state_ok(i, drv)) {
+			if (teo_state_ok(i, drv)) {
 				/*
-				 * The current state is too shallow, but if an
-				 * alternative candidate state has been found,
-				 * it may still turn out to be a better choice.
+				 * The current state is deep enough, but still
+				 * there may be a better one.
 				 */
-				if (first_suitable_idx != idx)
-					continue;
-
-				break;
+				first_suitable_idx = i;
+				continue;
 			}
 
-			first_suitable_idx = i;
+			/*
+			 * The current state is too shallow, so if no suitable
+			 * states other than the initial candidate have been
+			 * found, give up (the remaining states to check are
+			 * shallower still), but otherwise the first suitable
+			 * state other than the initial candidate may turn out
+			 * to be preferable.
+			 */
+			if (first_suitable_idx == idx)
+				break;
 		}
 	}
 	if (!idx && prev_intercept_idx) {

From 92ce5c07b7a1913246fd5492aee52db8a0c66f55 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 13 Jan 2025 19:36:57 +0100
Subject: [PATCH 136/368] cpuidle: teo: Reorder candidate state index checks

Since constraint_idx may be 0, the candidate state index may change to 0
after assigning constraint_idx to it, so first check if it is greater
than constraint_idx (and update it if so) and then check it against 0.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Christian Loehle <christian.loehle@arm.com>
Tested-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
Tested-by: Christian Loehle <christian.loehle@arm.com>
Link: https://patch.msgid.link/1907276.tdWV9SEqCh@rjwysocki.net
---
 drivers/cpuidle/governors/teo.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 68af712f70647..30e444c9c40b0 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -428,6 +428,14 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 				break;
 		}
 	}
+
+	/*
+	 * If there is a latency constraint, it may be necessary to select an
+	 * idle state shallower than the current candidate one.
+	 */
+	if (idx > constraint_idx)
+		idx = constraint_idx;
+
 	if (!idx && prev_intercept_idx) {
 		/*
 		 * We have to query the sleep length here otherwise we don't
@@ -438,13 +446,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		goto out_tick;
 	}
 
-	/*
-	 * If there is a latency constraint, it may be necessary to select an
-	 * idle state shallower than the current candidate one.
-	 */
-	if (idx > constraint_idx)
-		idx = constraint_idx;
-
 	/*
 	 * Skip the timers check if state 0 is the current candidate one,
 	 * because an immediate non-timer wakeup is expected in that case.

From ea185406d1ed90493ef0868a03ddcb6b2701b11b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 13 Jan 2025 19:39:00 +0100
Subject: [PATCH 137/368] cpuidle: teo: Combine candidate state index checks
 against 0

There are two candidate state index checks against 0 in teo_select()
that need not be separate, so combine them and update comments around
them.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Christian Loehle <christian.loehle@arm.com>
Tested-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
Tested-by: Christian Loehle <christian.loehle@arm.com>
Link: https://patch.msgid.link/13676346.uLZWGnKmhe@rjwysocki.net
---
 drivers/cpuidle/governors/teo.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 30e444c9c40b0..bd2fe41b42873 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -436,23 +436,18 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	if (idx > constraint_idx)
 		idx = constraint_idx;
 
-	if (!idx && prev_intercept_idx) {
-		/*
-		 * We have to query the sleep length here otherwise we don't
-		 * know after wakeup if our guess was correct.
-		 */
-		duration_ns = tick_nohz_get_sleep_length(&delta_tick);
-		cpu_data->sleep_length_ns = duration_ns;
+	if (!idx) {
+		if (prev_intercept_idx) {
+			/*
+			 * Query the sleep length to be able to count the wakeup
+			 * as a hit if it is caused by a timer.
+			 */
+			duration_ns = tick_nohz_get_sleep_length(&delta_tick);
+			cpu_data->sleep_length_ns = duration_ns;
+		}
 		goto out_tick;
 	}
 
-	/*
-	 * Skip the timers check if state 0 is the current candidate one,
-	 * because an immediate non-timer wakeup is expected in that case.
-	 */
-	if (!idx)
-		goto out_tick;
-
 	/*
 	 * If state 0 is a polling one, check if the target residency of
 	 * the current candidate state is low enough and skip the timers

From b9a6af26bd83fb23d15c53b1ce63df77dda15513 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 13 Jan 2025 19:40:49 +0100
Subject: [PATCH 138/368] cpuidle: teo: Drop local variable prev_intercept_idx

Local variable prev_intercept_idx in teo_select() is redundant because
it cannot be 0 when candidate state index is 0.

The prev_intercept_idx value is the index of the deepest enabled idle
state, so if it is 0, state 0 is the deepest enabled idle state, in
which case it must be the only enabled idle state, but then teo_select()
would have returned early before initializing prev_intercept_idx.

Thus prev_intercept_idx must be nonzero and the check of it against 0
always passes, so it can be dropped altogether.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Christian Loehle <christian.loehle@arm.com>
Tested-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
Tested-by: Christian Loehle <christian.loehle@arm.com>
Link: https://patch.msgid.link/3327997.aeNJFYEL58@rjwysocki.net
[ rjw: Fixed typo in the changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/teo.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index bd2fe41b42873..95d76c8e0d129 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -292,7 +292,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	unsigned int hit_sum = 0;
 	int constraint_idx = 0;
 	int idx0 = 0, idx = -1;
-	int prev_intercept_idx;
 	s64 duration_ns;
 	int i;
 
@@ -370,7 +369,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	 * all of the deeper states, a shallower idle state is likely to be a
 	 * better choice.
 	 */
-	prev_intercept_idx = idx;
 	if (2 * idx_intercept_sum > cpu_data->total - idx_hit_sum) {
 		int first_suitable_idx = idx;
 
@@ -437,14 +435,11 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		idx = constraint_idx;
 
 	if (!idx) {
-		if (prev_intercept_idx) {
-			/*
-			 * Query the sleep length to be able to count the wakeup
-			 * as a hit if it is caused by a timer.
-			 */
-			duration_ns = tick_nohz_get_sleep_length(&delta_tick);
-			cpu_data->sleep_length_ns = duration_ns;
-		}
+		/*
+		 * Query the sleep length to be able to count the wakeup as a
+		 * hit if it is caused by a timer.
+		 */
+		cpu_data->sleep_length_ns = tick_nohz_get_sleep_length(&delta_tick);
 		goto out_tick;
 	}
 

From e24f8a55de509ba26726f094e084d90428cbcf26 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 13 Jan 2025 19:41:55 +0100
Subject: [PATCH 139/368] cpuidle: teo: Clarify two code comments

Rewrite two code comments suposed to explain its behavior that are too
concise or not sufficiently clear.

No functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Christian Loehle <christian.loehle@arm.com>
Tested-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
Tested-by: Christian Loehle <christian.loehle@arm.com>
Link: https://patch.msgid.link/8472971.T7Z3S40VBb@rjwysocki.net
[ rjw: Fixed 2 typos in new comments ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/teo.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 95d76c8e0d129..411b315081f8b 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -154,9 +154,10 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 
 	if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) {
 		/*
-		 * One of the safety nets has triggered or the wakeup was close
-		 * enough to the closest timer event expected at the idle state
-		 * selection time to be discarded.
+		 * This causes the wakeup to be counted as a hit regardless of
+		 * the real idle duration which doesn't need to be computed
+		 * because the wakeup has been close enough to an anticipated
+		 * timer.
 		 */
 		measured_ns = U64_MAX;
 	} else {
@@ -302,8 +303,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 
 	cpu_data->time_span_ns = local_clock();
 	/*
-	 * Set the expected sleep length to infinity in case of an early
-	 * return.
+	 * Set the sleep length to infinity in case the invocation of
+	 * tick_nohz_get_sleep_length() below is skipped, in which case it won't
+	 * be known whether or not the subsequent wakeup is caused by a timer.
+	 * It is generally fine to count the wakeup as an intercept then, except
+	 * for the cases when the CPU is mostly woken up by timers and there may
+	 * be opportunities to ask for a deeper idle state when no imminent
+	 * timers are scheduled which may be missed.
 	 */
 	cpu_data->sleep_length_ns = KTIME_MAX;
 

From d619b5cc678024fa5ed7eb3702c3991a2aa96823 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 13 Jan 2025 19:45:50 +0100
Subject: [PATCH 140/368] cpuidle: teo: Simplify counting events used for tick
 management

Replace the tick_hits metric with a new tick_intercepts one that can be
used directly when deciding whether or not to stop the scheduler tick
and update the governor functional description accordingly.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Christian Loehle <christian.loehle@arm.com>
Tested-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
Tested-by: Christian Loehle <christian.loehle@arm.com>
Link: https://patch.msgid.link/1987985.PYKUYFuaPT@rjwysocki.net
---
 drivers/cpuidle/governors/teo.c | 49 ++++++++++-----------------------
 1 file changed, 14 insertions(+), 35 deletions(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 411b315081f8b..62f323f2e245c 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -41,11 +41,7 @@
  * idle state 2, the third bin spans from the target residency of idle state 2
  * up to, but not including, the target residency of idle state 3 and so on.
  * The last bin spans from the target residency of the deepest idle state
- * supplied by the driver to the scheduler tick period length or to infinity if
- * the tick period length is less than the target residency of that state.  In
- * the latter case, the governor also counts events with the measured idle
- * duration between the tick period length and the target residency of the
- * deepest idle state.
+ * supplied by the driver to infinity.
  *
  * Two metrics called "hits" and "intercepts" are associated with each bin.
  * They are updated every time before selecting an idle state for the given CPU
@@ -60,6 +56,10 @@
  * into by the sleep length (these events are also referred to as "intercepts"
  * below).
  *
+ * The governor also counts "intercepts" with the measured idle duration below
+ * the tick period length and uses this information when deciding whether or not
+ * to stop the scheduler tick.
+ *
  * In order to select an idle state for a CPU, the governor takes the following
  * steps (modulo the possible latency constraint that must be taken into account
  * too):
@@ -128,14 +128,14 @@ struct teo_bin {
  * @sleep_length_ns: Time till the closest timer event (at the selection time).
  * @state_bins: Idle state data bins for this CPU.
  * @total: Grand total of the "intercepts" and "hits" metrics for all bins.
- * @tick_hits: Number of "hits" after TICK_NSEC.
+ * @tick_intercepts: "Intercepts" before TICK_NSEC.
  */
 struct teo_cpu {
 	s64 time_span_ns;
 	s64 sleep_length_ns;
 	struct teo_bin state_bins[CPUIDLE_STATE_MAX];
 	unsigned int total;
-	unsigned int tick_hits;
+	unsigned int tick_intercepts;
 };
 
 static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
@@ -207,38 +207,21 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 		}
 	}
 
-	/*
-	 * If the deepest state's target residency is below the tick length,
-	 * make a record of it to help teo_select() decide whether or not
-	 * to stop the tick.  This effectively adds an extra hits-only bin
-	 * beyond the last state-related one.
-	 */
-	if (target_residency_ns < TICK_NSEC) {
-		cpu_data->tick_hits -= cpu_data->tick_hits >> DECAY_SHIFT;
-
-		cpu_data->total += cpu_data->tick_hits;
-
-		if (TICK_NSEC <= cpu_data->sleep_length_ns) {
-			idx_timer = drv->state_count;
-			if (TICK_NSEC <= measured_ns) {
-				cpu_data->tick_hits += PULSE;
-				goto end;
-			}
-		}
-	}
-
+	cpu_data->tick_intercepts -= cpu_data->tick_intercepts >> DECAY_SHIFT;
 	/*
 	 * If the measured idle duration falls into the same bin as the sleep
 	 * length, this is a "hit", so update the "hits" metric for that bin.
 	 * Otherwise, update the "intercepts" metric for the bin fallen into by
 	 * the measured idle duration.
 	 */
-	if (idx_timer == idx_duration)
+	if (idx_timer == idx_duration) {
 		cpu_data->state_bins[idx_timer].hits += PULSE;
-	else
+	} else {
 		cpu_data->state_bins[idx_duration].intercepts += PULSE;
+		if (TICK_NSEC <= measured_ns)
+			cpu_data->tick_intercepts += PULSE;
+	}
 
-end:
 	cpu_data->total += PULSE;
 }
 
@@ -286,7 +269,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
 	s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
 	ktime_t delta_tick = TICK_NSEC / 2;
-	unsigned int tick_intercept_sum = 0;
 	unsigned int idx_intercept_sum = 0;
 	unsigned int intercept_sum = 0;
 	unsigned int idx_hit_sum = 0;
@@ -365,9 +347,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		goto end;
 	}
 
-	tick_intercept_sum = intercept_sum +
-			cpu_data->state_bins[drv->state_count-1].intercepts;
-
 	/*
 	 * If the sum of the intercepts metric for all of the idle states
 	 * shallower than the current candidate one (idx) is greater than the
@@ -477,7 +456,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	 * total wakeup events, do not stop the tick.
 	 */
 	if (drv->states[idx].target_residency_ns < TICK_NSEC &&
-	    tick_intercept_sum > cpu_data->total / 2 + cpu_data->total / 8)
+	    cpu_data->tick_intercepts > cpu_data->total / 2 + cpu_data->total / 8)
 		duration_ns = TICK_NSEC / 2;
 
 end:

From 13ed5c4a6d9c91755227b7b0fab7b2543f6adfd2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 13 Jan 2025 19:48:47 +0100
Subject: [PATCH 141/368] cpuidle: teo: Skip getting the sleep length if
 wakeups are very frequent

Commit 6da8f9ba5a87 ("cpuidle: teo: Skip tick_nohz_get_sleep_length()
call in some cases") attempted to reduce the governor overhead in some
cases by making it avoid obtaining the sleep length (the time till the
next timer event) which may be costly.

Among other things, after the above commit, tick_nohz_get_sleep_length()
was not called any more when idle state 0 was to be returned, which
turned out to be problematic and the previous behavior in that respect
was restored by commit 4b20b07ce72f ("cpuidle: teo: Don't count non-
existent intercepts").

However, commit 6da8f9ba5a87 also caused the governor to avoid calling
tick_nohz_get_sleep_length() on systems where idle state 0 is a "polling"
one (that is, it is not really an idle state, but a loop continuously
executed by the CPU) when the target residency of the idle state to be
returned was low enough, so there was no practical need to refine the
idle state selection in any way.  This change was not removed by the
other commit, so now on systems where idle state 0 is a "polling" one,
tick_nohz_get_sleep_length() is called when idle state 0 is to be
returned, but it is not called when a deeper idle state with
sufficiently low target residency is to be returned.  That is arguably
confusing and inconsistent.

Moreover, there is no specific reason why the behavior in question
should depend on whether or not idle state 0 is a "polling" one.

One way to address this would be to make the governor always call
tick_nohz_get_sleep_length() to obtain the sleep length, but that would
effectively mean reverting commit 6da8f9ba5a87 and restoring the latency
issue that was the reason for doing it.  This approach is thus not
particularly attractive.

To address it differently, notice that if a CPU is woken up very often,
this is not likely to be caused by timers in the first place (user space
has a default timer slack of 50 us and there are relatively few timers
with a deadline shorter than several microseconds in the kernel) and
even if it were the case, the potential benefit from using a deep idle
state would then be questionable for latency reasons.  Therefore, if the
majority of CPU wakeups occur within several microseconds, it can be
assumed that all wakeups in that range are non-timer and the sleep
length need not be determined.

Accordingly, introduce a new metric for counting wakeups with the
measured idle duration below RESIDENCY_THRESHOLD_NS and modify the idle
state selection to skip the tick_nohz_get_sleep_length() invocation if
idle state 0 has been selected or the target residency of the candidate
idle state is below RESIDENCY_THRESHOLD_NS and the value of the new
metric is at least 1/2 of the total event count.

Since the above requires the measured idle duration to be determined
every time, except for the cases when one of the safety nets has
triggered in which the wakeup is counted as a hit in the deepest
idle state idle residency range, update the handling of those cases
to avoid skipping the idle duration computation when the CPU wakeup
is "genuine".

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://patch.msgid.link/3851791.kQq0lBPeGt@rjwysocki.net
Tested-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
Tested-by: Christian Loehle <christian.loehle@arm.com>
Reviewed-by: Christian Loehle <christian.loehle@arm.com>
[ rjw: Renamed a struct field ]
[ rjw: Fixed typo in the subject and one in a comment ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/teo.c | 58 ++++++++++++++++++++-------------
 1 file changed, 36 insertions(+), 22 deletions(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 62f323f2e245c..d772a3b7ccbd7 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -129,6 +129,7 @@ struct teo_bin {
  * @state_bins: Idle state data bins for this CPU.
  * @total: Grand total of the "intercepts" and "hits" metrics for all bins.
  * @tick_intercepts: "Intercepts" before TICK_NSEC.
+ * @short_idles: Wakeups after short idle periods.
  */
 struct teo_cpu {
 	s64 time_span_ns;
@@ -136,6 +137,7 @@ struct teo_cpu {
 	struct teo_bin state_bins[CPUIDLE_STATE_MAX];
 	unsigned int total;
 	unsigned int tick_intercepts;
+	unsigned int short_idles;
 };
 
 static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
@@ -152,12 +154,12 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	s64 target_residency_ns;
 	u64 measured_ns;
 
-	if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) {
+	cpu_data->short_idles -= cpu_data->short_idles >> DECAY_SHIFT;
+
+	if (cpu_data->time_span_ns < 0) {
 		/*
-		 * This causes the wakeup to be counted as a hit regardless of
-		 * the real idle duration which doesn't need to be computed
-		 * because the wakeup has been close enough to an anticipated
-		 * timer.
+		 * If one of the safety nets has triggered, assume that this
+		 * might have been a long sleep.
 		 */
 		measured_ns = U64_MAX;
 	} else {
@@ -177,10 +179,14 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 		 * time, so take 1/2 of the exit latency as a very rough
 		 * approximation of the average of it.
 		 */
-		if (measured_ns >= lat_ns)
+		if (measured_ns >= lat_ns) {
 			measured_ns -= lat_ns / 2;
-		else
+			if (measured_ns < RESIDENCY_THRESHOLD_NS)
+				cpu_data->short_idles += PULSE;
+		} else {
 			measured_ns /= 2;
+			cpu_data->short_idles += PULSE;
+		}
 	}
 
 	cpu_data->total = 0;
@@ -419,27 +425,35 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	if (idx > constraint_idx)
 		idx = constraint_idx;
 
-	if (!idx) {
-		/*
-		 * Query the sleep length to be able to count the wakeup as a
-		 * hit if it is caused by a timer.
-		 */
-		cpu_data->sleep_length_ns = tick_nohz_get_sleep_length(&delta_tick);
-		goto out_tick;
-	}
-
 	/*
-	 * If state 0 is a polling one, check if the target residency of
-	 * the current candidate state is low enough and skip the timers
-	 * check in that case too.
+	 * If either the candidate state is state 0 or its target residency is
+	 * low enough, there is basically nothing more to do, but if the sleep
+	 * length is not updated, the subsequent wakeup will be counted as an
+	 * "intercept" which may be problematic in the cases when timer wakeups
+	 * are dominant.  Namely, it may effectively prevent deeper idle states
+	 * from being selected at one point even if no imminent timers are
+	 * scheduled.
+	 *
+	 * However, frequent timers in the RESIDENCY_THRESHOLD_NS range on one
+	 * CPU are unlikely (user space has a default 50 us slack value for
+	 * hrtimers and there are relatively few timers with a lower deadline
+	 * value in the kernel), and even if they did happen, the potential
+	 * benefit from using a deep idle state in that case would be
+	 * questionable anyway for latency reasons.  Thus if the measured idle
+	 * duration falls into that range in the majority of cases, assume
+	 * non-timer wakeups to be dominant and skip updating the sleep length
+	 * to reduce latency.
 	 */
-	if ((drv->states[0].flags & CPUIDLE_FLAG_POLLING) &&
-	    drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS)
+	if ((!idx || drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) &&
+	    2 * cpu_data->short_idles >= cpu_data->total)
 		goto out_tick;
 
 	duration_ns = tick_nohz_get_sleep_length(&delta_tick);
 	cpu_data->sleep_length_ns = duration_ns;
 
+	if (!idx)
+		goto out_tick;
+
 	/*
 	 * If the closest expected timer is before the target residency of the
 	 * candidate state, a shallower one needs to be found.
@@ -501,7 +515,7 @@ static void teo_reflect(struct cpuidle_device *dev, int state)
 	if (dev->poll_time_limit ||
 	    (tick_nohz_idle_got_tick() && cpu_data->sleep_length_ns > TICK_NSEC)) {
 		dev->poll_time_limit = false;
-		cpu_data->time_span_ns = cpu_data->sleep_length_ns;
+		cpu_data->time_span_ns = KTIME_MIN;
 	} else {
 		cpu_data->time_span_ns = local_clock() - cpu_data->time_span_ns;
 	}

From ddcfa7964677b1298712edb931a98ac25ffd2fb6 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 13 Jan 2025 19:50:23 +0100
Subject: [PATCH 142/368] cpuidle: teo: Simplify handling of total events count

Instead of computing the total events count from scratch every time,
decay it and add a PULSE value to it in teo_update().

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Christian Loehle <christian.loehle@arm.com>
Tested-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
Tested-by: Christian Loehle <christian.loehle@arm.com>
Link: https://patch.msgid.link/9388883.CDJkKcVGEf@rjwysocki.net
---
 drivers/cpuidle/governors/teo.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index d772a3b7ccbd7..600b54a9f1e11 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -189,8 +189,6 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 		}
 	}
 
-	cpu_data->total = 0;
-
 	/*
 	 * Decay the "hits" and "intercepts" metrics for all of the bins and
 	 * find the bins that the sleep length and the measured idle duration
@@ -202,8 +200,6 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 		bin->hits -= bin->hits >> DECAY_SHIFT;
 		bin->intercepts -= bin->intercepts >> DECAY_SHIFT;
 
-		cpu_data->total += bin->hits + bin->intercepts;
-
 		target_residency_ns = drv->states[i].target_residency_ns;
 
 		if (target_residency_ns <= cpu_data->sleep_length_ns) {
@@ -228,6 +224,7 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 			cpu_data->tick_intercepts += PULSE;
 	}
 
+	cpu_data->total -= cpu_data->total >> DECAY_SHIFT;
 	cpu_data->total += PULSE;
 }
 

From 65e18e6544751ac25dc284794566ee90d65a379e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 13 Jan 2025 19:51:59 +0100
Subject: [PATCH 143/368] cpuidle: teo: Replace time_span_ns with a flag

After recent updates, the time_span_ns field in struct teo_cpu has
become an indicator on whether or not the most recent wakeup has been
"genuine" which may as well be indicated by a bool field without
calling local_clock(), so update the code accordingly.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Christian Loehle <christian.loehle@arm.com>
Tested-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
Tested-by: Christian Loehle <christian.loehle@arm.com>
Link: https://patch.msgid.link/6010475.MhkbZ0Pkbq@rjwysocki.net
---
 drivers/cpuidle/governors/teo.c | 27 +++++++++------------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 600b54a9f1e11..c232c95ca7faa 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -124,20 +124,20 @@ struct teo_bin {
 
 /**
  * struct teo_cpu - CPU data used by the TEO cpuidle governor.
- * @time_span_ns: Time between idle state selection and post-wakeup update.
  * @sleep_length_ns: Time till the closest timer event (at the selection time).
  * @state_bins: Idle state data bins for this CPU.
  * @total: Grand total of the "intercepts" and "hits" metrics for all bins.
  * @tick_intercepts: "Intercepts" before TICK_NSEC.
  * @short_idles: Wakeups after short idle periods.
+ * @artificial_wakeup: Set if the wakeup has been triggered by a safety net.
  */
 struct teo_cpu {
-	s64 time_span_ns;
 	s64 sleep_length_ns;
 	struct teo_bin state_bins[CPUIDLE_STATE_MAX];
 	unsigned int total;
 	unsigned int tick_intercepts;
 	unsigned int short_idles;
+	bool artificial_wakeup;
 };
 
 static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
@@ -156,7 +156,7 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 
 	cpu_data->short_idles -= cpu_data->short_idles >> DECAY_SHIFT;
 
-	if (cpu_data->time_span_ns < 0) {
+	if (cpu_data->artificial_wakeup) {
 		/*
 		 * If one of the safety nets has triggered, assume that this
 		 * might have been a long sleep.
@@ -165,13 +165,6 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	} else {
 		u64 lat_ns = drv->states[dev->last_state_idx].exit_latency_ns;
 
-		/*
-		 * The computations below are to determine whether or not the
-		 * (saved) time till the next timer event and the measured idle
-		 * duration fall into the same "bin", so use last_residency_ns
-		 * for that instead of time_span_ns which includes the cpuidle
-		 * overhead.
-		 */
 		measured_ns = dev->last_residency_ns;
 		/*
 		 * The delay between the wakeup and the first instruction
@@ -286,7 +279,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		dev->last_state_idx = -1;
 	}
 
-	cpu_data->time_span_ns = local_clock();
 	/*
 	 * Set the sleep length to infinity in case the invocation of
 	 * tick_nohz_get_sleep_length() below is skipped, in which case it won't
@@ -504,17 +496,16 @@ static void teo_reflect(struct cpuidle_device *dev, int state)
 	struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
 
 	dev->last_state_idx = state;
-	/*
-	 * If the wakeup was not "natural", but triggered by one of the safety
-	 * nets, assume that the CPU might have been idle for the entire sleep
-	 * length time.
-	 */
 	if (dev->poll_time_limit ||
 	    (tick_nohz_idle_got_tick() && cpu_data->sleep_length_ns > TICK_NSEC)) {
+		/*
+		 * The wakeup was not "genuine", but triggered by one of the
+		 * safety nets.
+		 */
 		dev->poll_time_limit = false;
-		cpu_data->time_span_ns = KTIME_MIN;
+		cpu_data->artificial_wakeup = true;
 	} else {
-		cpu_data->time_span_ns = local_clock() - cpu_data->time_span_ns;
+		cpu_data->artificial_wakeup = false;
 	}
 }
 

From 16c8d7586c196cddcc8822a946ef03c9cfabae30 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 20 Jan 2025 17:08:50 +0100
Subject: [PATCH 144/368] cpuidle: teo: Skip sleep length computation for low
 latency constraints

If the idle state exit latency constraint is sufficiently low, it
is better to avoid the additional latency related to calling
tick_nohz_get_sleep_length().  It is also not necessary to compute
the sleep length in that case because shallow idle state selection
will be forced then regardless of the recent wakeup history.

Accordingly, skip the sleep length computation and subsequent
checks of the exit latency constraint is low enough.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Christian Loehle <christian.loehle@arm.com>
Link: https://patch.msgid.link/6122398.lOV4Wx5bFT@rjwysocki.net
---
 drivers/cpuidle/governors/teo.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index c232c95ca7faa..8fe5e1b47ef90 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -105,6 +105,12 @@
 
 #include "gov.h"
 
+/*
+ * Idle state exit latency threshold used for deciding whether or not to check
+ * the time till the closest expected timer event.
+ */
+#define LATENCY_THRESHOLD_NS	(RESIDENCY_THRESHOLD_NS / 2)
+
 /*
  * The PULSE value is added to metrics when they grow and the DECAY_SHIFT value
  * is used for decreasing metrics on a regular basis.
@@ -432,9 +438,14 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	 * duration falls into that range in the majority of cases, assume
 	 * non-timer wakeups to be dominant and skip updating the sleep length
 	 * to reduce latency.
+	 *
+	 * Also, if the latency constraint is sufficiently low, it will force
+	 * shallow idle states regardless of the wakeup type, so the sleep
+	 * length need not be known in that case.
 	 */
 	if ((!idx || drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) &&
-	    2 * cpu_data->short_idles >= cpu_data->total)
+	    (2 * cpu_data->short_idles >= cpu_data->total ||
+	     latency_req < LATENCY_THRESHOLD_NS))
 		goto out_tick;
 
 	duration_ns = tick_nohz_get_sleep_length(&delta_tick);

From 5719e2823565a304a2afb752d4792c622a693e22 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Wed, 15 Jan 2025 21:26:03 +0100
Subject: [PATCH 145/368] io_uring/rsrc: Simplify buffer cloning by locking
 both rings

The locking in the buffer cloning code is somewhat complex because it goes
back and forth between locking the source ring and the destination ring.

Make it easier to reason about by locking both rings at the same time.
To avoid ABBA deadlocks, lock the rings in ascending kernel address order,
just like in lock_two_nondirectories().

Signed-off-by: Jann Horn <jannh@google.com>
Link: https://lore.kernel.org/r/20250115-uring-clone-refactor-v2-1-7289ba50776d@google.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/rsrc.c | 73 +++++++++++++++++++++++++++----------------------
 1 file changed, 40 insertions(+), 33 deletions(-)

diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index e32ac58533914..a1c7c8db55455 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -921,6 +921,16 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
 	return 0;
 }
 
+/* Lock two rings at once. The rings must be different! */
+static void lock_two_rings(struct io_ring_ctx *ctx1, struct io_ring_ctx *ctx2)
+{
+	if (ctx1 > ctx2)
+		swap(ctx1, ctx2);
+	mutex_lock(&ctx1->uring_lock);
+	mutex_lock_nested(&ctx2->uring_lock, SINGLE_DEPTH_NESTING);
+}
+
+/* Both rings are locked by the caller. */
 static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx,
 			    struct io_uring_clone_buffers *arg)
 {
@@ -928,6 +938,9 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
 	int i, ret, off, nr;
 	unsigned int nbufs;
 
+	lockdep_assert_held(&ctx->uring_lock);
+	lockdep_assert_held(&src_ctx->uring_lock);
+
 	/*
 	 * Accounting state is shared between the two rings; that only works if
 	 * both rings are accounted towards the same counters.
@@ -942,7 +955,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
 	if (ctx->buf_table.nr && !(arg->flags & IORING_REGISTER_DST_REPLACE))
 		return -EBUSY;
 
-	nbufs = READ_ONCE(src_ctx->buf_table.nr);
+	nbufs = src_ctx->buf_table.nr;
 	if (!arg->nr)
 		arg->nr = nbufs;
 	else if (arg->nr > nbufs)
@@ -966,27 +979,20 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
 		}
 	}
 
-	/*
-	 * Drop our own lock here. We'll setup the data we need and reference
-	 * the source buffers, then re-grab, check, and assign at the end.
-	 */
-	mutex_unlock(&ctx->uring_lock);
-
-	mutex_lock(&src_ctx->uring_lock);
 	ret = -ENXIO;
 	nbufs = src_ctx->buf_table.nr;
 	if (!nbufs)
-		goto out_unlock;
+		goto out_free;
 	ret = -EINVAL;
 	if (!arg->nr)
 		arg->nr = nbufs;
 	else if (arg->nr > nbufs)
-		goto out_unlock;
+		goto out_free;
 	ret = -EOVERFLOW;
 	if (check_add_overflow(arg->nr, arg->src_off, &off))
-		goto out_unlock;
+		goto out_free;
 	if (off > nbufs)
-		goto out_unlock;
+		goto out_free;
 
 	off = arg->dst_off;
 	i = arg->src_off;
@@ -1001,7 +1007,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
 			dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
 			if (!dst_node) {
 				ret = -ENOMEM;
-				goto out_unlock;
+				goto out_free;
 			}
 
 			refcount_inc(&src_node->buf->refs);
@@ -1011,10 +1017,6 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
 		i++;
 	}
 
-	/* Have a ref on the bufs now, drop src lock and re-grab our own lock */
-	mutex_unlock(&src_ctx->uring_lock);
-	mutex_lock(&ctx->uring_lock);
-
 	/*
 	 * If asked for replace, put the old table. data->nodes[] holds both
 	 * old and new nodes at this point.
@@ -1023,24 +1025,17 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
 		io_rsrc_data_free(ctx, &ctx->buf_table);
 
 	/*
-	 * ctx->buf_table should be empty now - either the contents are being
-	 * replaced and we just freed the table, or someone raced setting up
-	 * a buffer table while the clone was happening. If not empty, fall
-	 * through to failure handling.
+	 * ctx->buf_table must be empty now - either the contents are being
+	 * replaced and we just freed the table, or the contents are being
+	 * copied to a ring that does not have buffers yet (checked at function
+	 * entry).
 	 */
-	if (!ctx->buf_table.nr) {
-		ctx->buf_table = data;
-		return 0;
-	}
+	WARN_ON_ONCE(ctx->buf_table.nr);
+	ctx->buf_table = data;
+	return 0;
 
-	mutex_unlock(&ctx->uring_lock);
-	mutex_lock(&src_ctx->uring_lock);
-	/* someone raced setting up buffers, dump ours */
-	ret = -EBUSY;
-out_unlock:
+out_free:
 	io_rsrc_data_free(ctx, &data);
-	mutex_unlock(&src_ctx->uring_lock);
-	mutex_lock(&ctx->uring_lock);
 	return ret;
 }
 
@@ -1054,6 +1049,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
 int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg)
 {
 	struct io_uring_clone_buffers buf;
+	struct io_ring_ctx *src_ctx;
 	bool registered_src;
 	struct file *file;
 	int ret;
@@ -1071,7 +1067,18 @@ int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg)
 	file = io_uring_register_get_file(buf.src_fd, registered_src);
 	if (IS_ERR(file))
 		return PTR_ERR(file);
-	ret = io_clone_buffers(ctx, file->private_data, &buf);
+
+	src_ctx = file->private_data;
+	if (src_ctx != ctx) {
+		mutex_unlock(&ctx->uring_lock);
+		lock_two_rings(ctx, src_ctx);
+	}
+
+	ret = io_clone_buffers(ctx, src_ctx, &buf);
+
+	if (src_ctx != ctx)
+		mutex_unlock(&src_ctx->uring_lock);
+
 	if (!registered_src)
 		fput(file);
 	return ret;

From bb2d76344bc80933a462aa84581d1258e0dc758b Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 16 Jan 2025 02:53:26 +0000
Subject: [PATCH 146/368] io_uring: clean up io_uring_register_get_file()

Make it always reference the returned file. It's safer, especially with
unregistrations happening under it. And it makes the api cleaner with no
conditional clean ups by the caller.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/0d0b13a63e8edd6b5d360fc821dcdb035cb6b7e0.1736995897.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/register.c | 6 ++++--
 io_uring/rsrc.c     | 3 +--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/io_uring/register.c b/io_uring/register.c
index 05025047d1dab..0db181437ae33 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -853,6 +853,8 @@ struct file *io_uring_register_get_file(unsigned int fd, bool registered)
 			return ERR_PTR(-EINVAL);
 		fd = array_index_nospec(fd, IO_RINGFD_REG_MAX);
 		file = tctx->registered_rings[fd];
+		if (file)
+			get_file(file);
 	} else {
 		file = fget(fd);
 	}
@@ -919,7 +921,7 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
 	trace_io_uring_register(ctx, opcode, ctx->file_table.data.nr,
 				ctx->buf_table.nr, ret);
 	mutex_unlock(&ctx->uring_lock);
-	if (!use_registered_ring)
-		fput(file);
+
+	fput(file);
 	return ret;
 }
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index a1c7c8db55455..b5e47030764e5 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -1079,7 +1079,6 @@ int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg)
 	if (src_ctx != ctx)
 		mutex_unlock(&src_ctx->uring_lock);
 
-	if (!registered_src)
-		fput(file);
+	fput(file);
 	return ret;
 }

From b73de0da50129d790975bb8a9893b421cc38bc24 Mon Sep 17 00:00:00 2001
From: Sidong Yang <sidong.yang@furiosa.ai>
Date: Wed, 15 Jan 2025 14:20:31 +0000
Subject: [PATCH 147/368] io_uring/rsrc: remove unused parameter ctx for
 io_rsrc_node_alloc()

io_uring_ctx parameter for io_rsrc_node_alloc() is unused for now.
This patch removes the parameter and fixes the callers accordingly.

Signed-off-by: Sidong Yang <sidong.yang@furiosa.ai>
Link: https://lore.kernel.org/r/20250115142033.658599-1-sidong.yang@furiosa.ai
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/filetable.c |  2 +-
 io_uring/rsrc.c      | 10 +++++-----
 io_uring/rsrc.h      |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/io_uring/filetable.c b/io_uring/filetable.c
index a21660e3145ab..dd8eeec97acf6 100644
--- a/io_uring/filetable.c
+++ b/io_uring/filetable.c
@@ -68,7 +68,7 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
 	if (slot_index >= ctx->file_table.data.nr)
 		return -EINVAL;
 
-	node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE);
+	node = io_rsrc_node_alloc(IORING_RSRC_FILE);
 	if (!node)
 		return -ENOMEM;
 
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index b5e47030764e5..a5fc035af8ff9 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -118,7 +118,7 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
 	}
 }
 
-struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type)
+struct io_rsrc_node *io_rsrc_node_alloc(int type)
 {
 	struct io_rsrc_node *node;
 
@@ -203,7 +203,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
 				err = -EBADF;
 				break;
 			}
-			node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE);
+			node = io_rsrc_node_alloc(IORING_RSRC_FILE);
 			if (!node) {
 				err = -ENOMEM;
 				fput(file);
@@ -525,7 +525,7 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 			goto fail;
 		}
 		ret = -ENOMEM;
-		node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE);
+		node = io_rsrc_node_alloc(IORING_RSRC_FILE);
 		if (!node) {
 			fput(file);
 			goto fail;
@@ -730,7 +730,7 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
 	if (!iov->iov_base)
 		return NULL;
 
-	node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
+	node = io_rsrc_node_alloc(IORING_RSRC_BUFFER);
 	if (!node)
 		return ERR_PTR(-ENOMEM);
 	node->buf = NULL;
@@ -1004,7 +1004,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
 		if (!src_node) {
 			dst_node = NULL;
 		} else {
-			dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
+			dst_node = io_rsrc_node_alloc(IORING_RSRC_BUFFER);
 			if (!dst_node) {
 				ret = -ENOMEM;
 				goto out_free;
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index c8b0935844618..5cd00b7baef8b 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -43,7 +43,7 @@ struct io_imu_folio_data {
 	unsigned int	nr_folios;
 };
 
-struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type);
+struct io_rsrc_node *io_rsrc_node_alloc(int type);
 void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node);
 void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data);
 int io_rsrc_data_alloc(struct io_rsrc_data *data, unsigned nr);

From 2839ab71ac9009884fe41a7422a167a64716c0a7 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Mon, 20 Jan 2025 17:21:57 +0100
Subject: [PATCH 148/368] io_uring/rsrc: Move lockdep assert from
 io_free_rsrc_node() to caller

Checking for lockdep_assert_held(&ctx->uring_lock) in io_free_rsrc_node()
means that the assertion is only checked when the resource drops to zero
references.
Move the lockdep assertion up into the caller io_put_rsrc_node() so that it
instead happens on every reference count decrement.

Signed-off-by: Jann Horn <jannh@google.com>
Link: https://lore.kernel.org/r/20250120-uring-lockdep-assert-earlier-v1-1-68d8e071a4bb@google.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/rsrc.c | 2 --
 io_uring/rsrc.h | 3 +++
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index a5fc035af8ff9..af39b69eb4fde 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -444,8 +444,6 @@ int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
 
 void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
 {
-	lockdep_assert_held(&ctx->uring_lock);
-
 	if (node->tag)
 		io_post_aux_cqe(ctx, node->tag, 0, 0);
 
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index 5cd00b7baef8b..190f7ee45de93 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -2,6 +2,8 @@
 #ifndef IOU_RSRC_H
 #define IOU_RSRC_H
 
+#include <linux/lockdep.h>
+
 #define IO_NODE_ALLOC_CACHE_MAX 32
 
 #define IO_RSRC_TAG_TABLE_SHIFT	(PAGE_SHIFT - 3)
@@ -80,6 +82,7 @@ static inline struct io_rsrc_node *io_rsrc_node_lookup(struct io_rsrc_data *data
 
 static inline void io_put_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
 {
+	lockdep_assert_held(&ctx->uring_lock);
 	if (node && !--node->refs)
 		io_free_rsrc_node(ctx, node);
 }

From 5323186e2e8d33c073fad51e24f18e2d6dbae2da Mon Sep 17 00:00:00 2001
From: Detlev Casanova <detlev.casanova@collabora.com>
Date: Fri, 17 Jan 2025 11:31:02 -0500
Subject: [PATCH 149/368] ASoC: rockchip: i2s_tdm: Re-add the set_sysclk
 callback

In commit
9e2ab4b18ebd ("ASoC: rockchip: i2s-tdm: Fix inaccurate sampling rates"),
the set_sysclk callback was removed as considered unused as the mclk rate
can be set in the hw_params callback.
The difference between hw_params and set_sysclk is that the former is
called with the audio sampling rate set in the params (e.g.: 48000 Hz)
while the latter is called with a clock rate already computed with
  sampling_rate * mclk-fs (e.g.: 48000 * 256)

For HDMI audio using the Rockchip I2S TDM driver, the mclk-fs value must
be set to 128 instead of the default 256, and that value is set in the
device tree at the machine driver level (like a simple-audio-card
compatible node).
Therefore, the i2s_tdm driver has no idea that another mclk-fs value can
be configured and simply computes the mclk rate in the hw_params callback
with DEFAULT_MCLK_FS * params_rate(params), which is wrong for HDMI
audio.

Re-add the set_sysclk callback so that the mclk rate is computed by the
machine driver which has the correct mclk-fs value set in its device tree
node.

Fixes: 9e2ab4b18ebd ("ASoC: rockchip: i2s-tdm: Fix inaccurate sampling rates")
Signed-off-by: Detlev Casanova <detlev.casanova@collabora.com>
Link: https://patch.msgid.link/20250117163102.65807-1-detlev.casanova@collabora.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/rockchip/rockchip_i2s_tdm.c | 31 +++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/sound/soc/rockchip/rockchip_i2s_tdm.c b/sound/soc/rockchip/rockchip_i2s_tdm.c
index bd0dc586e24a3..7f5fcaecee4b6 100644
--- a/sound/soc/rockchip/rockchip_i2s_tdm.c
+++ b/sound/soc/rockchip/rockchip_i2s_tdm.c
@@ -22,7 +22,6 @@
 
 #define DRV_NAME "rockchip-i2s-tdm"
 
-#define DEFAULT_MCLK_FS				256
 #define CH_GRP_MAX				4  /* The max channel 8 / 2 */
 #define MULTIPLEX_CH_MAX			10
 
@@ -70,6 +69,8 @@ struct rk_i2s_tdm_dev {
 	bool has_playback;
 	bool has_capture;
 	struct snd_soc_dai_driver *dai;
+	unsigned int mclk_rx_freq;
+	unsigned int mclk_tx_freq;
 };
 
 static int to_ch_num(unsigned int val)
@@ -617,6 +618,27 @@ static int rockchip_i2s_trcm_mode(struct snd_pcm_substream *substream,
 	return 0;
 }
 
+static int rockchip_i2s_tdm_set_sysclk(struct snd_soc_dai *cpu_dai, int stream,
+				       unsigned int freq, int dir)
+{
+	struct rk_i2s_tdm_dev *i2s_tdm = to_info(cpu_dai);
+
+	if (i2s_tdm->clk_trcm) {
+		i2s_tdm->mclk_tx_freq = freq;
+		i2s_tdm->mclk_rx_freq = freq;
+	} else {
+		if (stream == SNDRV_PCM_STREAM_PLAYBACK)
+			i2s_tdm->mclk_tx_freq = freq;
+		else
+			i2s_tdm->mclk_rx_freq = freq;
+	}
+
+	dev_dbg(i2s_tdm->dev, "The target mclk_%s freq is: %d\n",
+		stream ? "rx" : "tx", freq);
+
+	return 0;
+}
+
 static int rockchip_i2s_tdm_hw_params(struct snd_pcm_substream *substream,
 				      struct snd_pcm_hw_params *params,
 				      struct snd_soc_dai *dai)
@@ -631,15 +653,19 @@ static int rockchip_i2s_tdm_hw_params(struct snd_pcm_substream *substream,
 
 		if (i2s_tdm->clk_trcm == TRCM_TX) {
 			mclk = i2s_tdm->mclk_tx;
+			mclk_rate = i2s_tdm->mclk_tx_freq;
 		} else if (i2s_tdm->clk_trcm == TRCM_RX) {
 			mclk = i2s_tdm->mclk_rx;
+			mclk_rate = i2s_tdm->mclk_rx_freq;
 		} else if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
 			mclk = i2s_tdm->mclk_tx;
+			mclk_rate = i2s_tdm->mclk_tx_freq;
 		} else {
 			mclk = i2s_tdm->mclk_rx;
+			mclk_rate = i2s_tdm->mclk_rx_freq;
 		}
 
-		err = clk_set_rate(mclk, DEFAULT_MCLK_FS * params_rate(params));
+		err = clk_set_rate(mclk, mclk_rate);
 		if (err)
 			return err;
 
@@ -799,6 +825,7 @@ static const struct snd_soc_dai_ops rockchip_i2s_tdm_dai_ops = {
 	.hw_params = rockchip_i2s_tdm_hw_params,
 	.set_bclk_ratio	= rockchip_i2s_tdm_set_bclk_ratio,
 	.set_fmt = rockchip_i2s_tdm_set_fmt,
+	.set_sysclk = rockchip_i2s_tdm_set_sysclk,
 	.set_tdm_slot = rockchip_dai_tdm_slot,
 	.trigger = rockchip_i2s_tdm_trigger,
 };

From dec6b006f4cc13968d75ed28673ca4e3633de96b Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@denx.de>
Date: Tue, 21 Jan 2025 12:57:47 -0300
Subject: [PATCH 150/368] ASoC: dt-bindings: ti,pcm1681: Fix the binding title

The PCM1681 is an 8-channel Digital-to-Analog Converter, so fix it
accordingly.

Signed-off-by: Fabio Estevam <festevam@denx.de>
Link: https://patch.msgid.link/20250121155747.3740995-1-festevam@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/sound/ti,pcm1681.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/sound/ti,pcm1681.yaml b/Documentation/devicetree/bindings/sound/ti,pcm1681.yaml
index 5aa00617291c9..1f0e6787a7464 100644
--- a/Documentation/devicetree/bindings/sound/ti,pcm1681.yaml
+++ b/Documentation/devicetree/bindings/sound/ti,pcm1681.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/sound/ti,pcm1681.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Texas Instruments PCM1681 8-channel PWM Processor
+title: Texas Instruments PCM1681 8-channel Digital-to-Analog Converter
 
 maintainers:
   - Shenghao Ding <shenghao-ding@ti.com>

From b76b3ee5573fd6ff8761d82feb74d707eb2139ef Mon Sep 17 00:00:00 2001
From: Alexander Boehm <aboehm@eurofunk.com>
Date: Wed, 22 Jan 2025 10:29:27 +0200
Subject: [PATCH 151/368] ASoC: SOF: imx8m: add SAI2,5,6,7

Added the remaining SAIs in addition to SAI1 and SAI3. There is no SAI4.

Signed-off-by: Alexander Boehm <aboehm@eurofunk.com>
Reviewed-by: Daniel Baluta <daniel.baluta@nxp.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.dev>
Signed-off-by: Daniel Baluta <daniel.baluta@nxp.com>
Reviewed-by: Iuliana Prodan <iuliana.prodan@nxp.com>
Link: https://patch.msgid.link/20250122082928.1321536-1-daniel.baluta@nxp.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/imx/imx8m.c | 44 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/sound/soc/sof/imx/imx8m.c b/sound/soc/sof/imx/imx8m.c
index ff42743efa791..4ab5814e9117e 100644
--- a/sound/soc/sof/imx/imx8m.c
+++ b/sound/soc/sof/imx/imx8m.c
@@ -294,6 +294,17 @@ static struct snd_soc_dai_driver imx8m_dai[] = {
 		.channels_max = 32,
 	},
 },
+{
+	.name = "sai2",
+	.playback = {
+		.channels_min = 1,
+		.channels_max = 32,
+	},
+	.capture = {
+		.channels_min = 1,
+		.channels_max = 32,
+	},
+},
 {
 	.name = "sai3",
 	.playback = {
@@ -305,6 +316,39 @@ static struct snd_soc_dai_driver imx8m_dai[] = {
 		.channels_max = 32,
 	},
 },
+{
+	.name = "sai5",
+	.playback = {
+		.channels_min = 1,
+		.channels_max = 32,
+	},
+	.capture = {
+		.channels_min = 1,
+		.channels_max = 32,
+	},
+},
+{
+	.name = "sai6",
+	.playback = {
+		.channels_min = 1,
+		.channels_max = 32,
+	},
+	.capture = {
+		.channels_min = 1,
+		.channels_max = 32,
+	},
+},
+{
+	.name = "sai7",
+	.playback = {
+		.channels_min = 1,
+		.channels_max = 32,
+	},
+	.capture = {
+		.channels_min = 1,
+		.channels_max = 32,
+	},
+},
 {
 	.name = "micfil",
 	.capture = {

From e935f903ab9bee43f3375883c230a32138ae3d1d Mon Sep 17 00:00:00 2001
From: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
Date: Tue, 21 Jan 2025 08:48:15 +0200
Subject: [PATCH 152/368] ASoC: audio-graph-card2: use correct endpoint when
 getting link parameters

When link DT nodes are parsed, most functions get port as a parameter,
which results in port endpoint@0 always being used. However, each endpoint
might have different settings, but those are currently ignored.

Fix that by passing endpoint instead of port when parsing link parameters.

Signed-off-by: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
Acked-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://patch.msgid.link/20250121064815.741820-1-ivo.g.dimitrov.75@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/generic/audio-graph-card2.c | 62 +++++++++++++--------------
 1 file changed, 29 insertions(+), 33 deletions(-)

diff --git a/sound/soc/generic/audio-graph-card2.c b/sound/soc/generic/audio-graph-card2.c
index c36b1a2ac949e..ee94b256b7707 100644
--- a/sound/soc/generic/audio-graph-card2.c
+++ b/sound/soc/generic/audio-graph-card2.c
@@ -648,23 +648,23 @@ static int graph_parse_node_multi(struct simple_util_priv *priv,
 
 static int graph_parse_node_single(struct simple_util_priv *priv,
 				   enum graph_type gtype,
-				   struct device_node *port,
+				   struct device_node *ep,
 				   struct link_info *li, int is_cpu)
 {
-	struct device_node *ep __free(device_node) = of_graph_get_next_port_endpoint(port, NULL);
-
 	return __graph_parse_node(priv, gtype, ep, li, is_cpu, 0);
 }
 
 static int graph_parse_node(struct simple_util_priv *priv,
 			    enum graph_type gtype,
-			    struct device_node *port,
+			    struct device_node *ep,
 			    struct link_info *li, int is_cpu)
 {
+	struct device_node *port __free(device_node) = ep_to_port(ep);
+
 	if (graph_lnk_is_multi(port))
 		return graph_parse_node_multi(priv, gtype, port, li, is_cpu);
 	else
-		return graph_parse_node_single(priv, gtype, port, li, is_cpu);
+		return graph_parse_node_single(priv, gtype, ep, li, is_cpu);
 }
 
 static void graph_parse_daifmt(struct device_node *node, unsigned int *daifmt)
@@ -722,14 +722,15 @@ static unsigned int graph_parse_bitframe(struct device_node *ep)
 
 static void graph_link_init(struct simple_util_priv *priv,
 			    struct device_node *lnk,
-			    struct device_node *port_cpu,
-			    struct device_node *port_codec,
+			    struct device_node *ep_cpu,
+			    struct device_node *ep_codec,
 			    struct link_info *li,
 			    int is_cpu_node)
 {
 	struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link);
 	struct simple_dai_props *dai_props = simple_priv_to_props(priv, li->link);
-	struct device_node *ep_cpu, *ep_codec;
+	struct device_node *port_cpu = ep_to_port(ep_cpu);
+	struct device_node *port_codec = ep_to_port(ep_codec);
 	struct device_node *multi_cpu_port = NULL, *multi_codec_port = NULL;
 	struct snd_soc_dai_link_component *dlc;
 	unsigned int daifmt = 0;
@@ -739,25 +740,23 @@ static void graph_link_init(struct simple_util_priv *priv,
 	int multi_cpu_port_idx = 1, multi_codec_port_idx = 1;
 	int i;
 
-	of_node_get(port_cpu);
 	if (graph_lnk_is_multi(port_cpu)) {
 		multi_cpu_port = port_cpu;
 		ep_cpu = graph_get_next_multi_ep(&multi_cpu_port, multi_cpu_port_idx++);
 		of_node_put(port_cpu);
 		port_cpu = ep_to_port(ep_cpu);
 	} else {
-		ep_cpu = of_graph_get_next_port_endpoint(port_cpu, NULL);
+		of_node_get(ep_cpu);
 	}
 	struct device_node *ports_cpu __free(device_node) = port_to_ports(port_cpu);
 
-	of_node_get(port_codec);
 	if (graph_lnk_is_multi(port_codec)) {
 		multi_codec_port = port_codec;
 		ep_codec = graph_get_next_multi_ep(&multi_codec_port, multi_codec_port_idx++);
 		of_node_put(port_codec);
 		port_codec = ep_to_port(ep_codec);
 	} else {
-		ep_codec = of_graph_get_next_port_endpoint(port_codec, NULL);
+		of_node_get(ep_codec);
 	}
 	struct device_node *ports_codec __free(device_node) = port_to_ports(port_codec);
 
@@ -833,7 +832,7 @@ int audio_graph2_link_normal(struct simple_util_priv *priv,
 {
 	struct device_node *cpu_port = lnk;
 	struct device_node *cpu_ep	__free(device_node) = of_graph_get_next_port_endpoint(cpu_port, NULL);
-	struct device_node *codec_port	__free(device_node) = of_graph_get_remote_port(cpu_ep);
+	struct device_node *codec_ep	__free(device_node) = of_graph_get_remote_endpoint(cpu_ep);
 	int ret;
 
 	/*
@@ -841,18 +840,18 @@ int audio_graph2_link_normal(struct simple_util_priv *priv,
 	 * see
 	 *	__graph_parse_node() :: DAI Naming
 	 */
-	ret = graph_parse_node(priv, GRAPH_NORMAL, codec_port, li, 0);
+	ret = graph_parse_node(priv, GRAPH_NORMAL, codec_ep, li, 0);
 	if (ret < 0)
 		return ret;
 
 	/*
 	 * call CPU, and set DAI Name
 	 */
-	ret = graph_parse_node(priv, GRAPH_NORMAL, cpu_port, li, 1);
+	ret = graph_parse_node(priv, GRAPH_NORMAL, cpu_ep, li, 1);
 	if (ret < 0)
 		return ret;
 
-	graph_link_init(priv, lnk, cpu_port, codec_port, li, 1);
+	graph_link_init(priv, lnk, cpu_ep, codec_ep, li, 1);
 
 	return ret;
 }
@@ -864,15 +863,15 @@ int audio_graph2_link_dpcm(struct simple_util_priv *priv,
 {
 	struct device_node *ep	__free(device_node) = of_graph_get_next_port_endpoint(lnk, NULL);
 	struct device_node *rep	__free(device_node) = of_graph_get_remote_endpoint(ep);
-	struct device_node *cpu_port = NULL;
-	struct device_node *codec_port = NULL;
+	struct device_node *cpu_ep = NULL;
+	struct device_node *codec_ep = NULL;
 	struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link);
 	struct simple_dai_props *dai_props = simple_priv_to_props(priv, li->link);
 	int is_cpu = graph_util_is_ports0(lnk);
 	int ret;
 
 	if (is_cpu) {
-		cpu_port = of_graph_get_remote_port(ep); /* rport */
+		cpu_ep = rep;
 
 		/*
 		 * dpcm {
@@ -901,12 +900,12 @@ int audio_graph2_link_dpcm(struct simple_util_priv *priv,
 		dai_link->dynamic		= 1;
 		dai_link->dpcm_merged_format	= 1;
 
-		ret = graph_parse_node(priv, GRAPH_DPCM, cpu_port, li, 1);
+		ret = graph_parse_node(priv, GRAPH_DPCM, cpu_ep, li, 1);
 		if (ret)
-			goto err;
+			return ret;
 
 	} else {
-		codec_port = of_graph_get_remote_port(ep); /* rport */
+		codec_ep = rep;
 
 		/*
 		 * dpcm {
@@ -937,18 +936,15 @@ int audio_graph2_link_dpcm(struct simple_util_priv *priv,
 		dai_link->no_pcm		= 1;
 		dai_link->be_hw_params_fixup	= simple_util_be_hw_params_fixup;
 
-		ret = graph_parse_node(priv, GRAPH_DPCM, codec_port, li, 0);
+		ret = graph_parse_node(priv, GRAPH_DPCM, codec_ep, li, 0);
 		if (ret < 0)
-			goto err;
+			return ret;
 	}
 
 	graph_parse_convert(ep,  dai_props); /* at node of <dpcm> */
 	graph_parse_convert(rep, dai_props); /* at node of <CPU/Codec> */
 
-	graph_link_init(priv, lnk, cpu_port, codec_port, li, is_cpu);
-err:
-	of_node_put(cpu_port);
-	of_node_put(codec_port);
+	graph_link_init(priv, lnk, cpu_ep, codec_ep, li, is_cpu);
 
 	return ret;
 }
@@ -1013,26 +1009,26 @@ int audio_graph2_link_c2c(struct simple_util_priv *priv,
 	struct device_node *ep0 __free(device_node) = of_graph_get_next_port_endpoint(port0, NULL);
 	struct device_node *ep1 __free(device_node) = of_graph_get_next_port_endpoint(port1, NULL);
 
-	struct device_node *codec0_port __free(device_node) = of_graph_get_remote_port(ep0);
-	struct device_node *codec1_port __free(device_node) = of_graph_get_remote_port(ep1);
+	struct device_node *codec0_ep __free(device_node) = of_graph_get_remote_endpoint(ep0);
+	struct device_node *codec1_ep __free(device_node) = of_graph_get_remote_endpoint(ep1);
 
 	/*
 	 * call Codec first.
 	 * see
 	 *	__graph_parse_node() :: DAI Naming
 	 */
-	ret = graph_parse_node(priv, GRAPH_C2C, codec1_port, li, 0);
+	ret = graph_parse_node(priv, GRAPH_C2C, codec1_ep, li, 0);
 	if (ret < 0)
 		return ret;
 
 	/*
 	 * call CPU, and set DAI Name
 	 */
-	ret = graph_parse_node(priv, GRAPH_C2C, codec0_port, li, 1);
+	ret = graph_parse_node(priv, GRAPH_C2C, codec0_ep, li, 1);
 	if (ret < 0)
 		return ret;
 
-	graph_link_init(priv, lnk, codec0_port, codec1_port, li, 1);
+	graph_link_init(priv, lnk, codec0_ep, codec1_ep, li, 1);
 
 	return ret;
 }

From a2cd92185db0586f2136feae84d98cc54580f381 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 22 Jan 2025 07:53:17 +0100
Subject: [PATCH 153/368] ASoC: fsl_asrc_m2m: select CONFIG_DMA_SHARED_BUFFER

Randconfig builds without dmabuf result in this link error from
the fasl-asrc driver:

ERROR: modpost: "dma_buf_put" [sound/core/snd-compress.ko] undefined!
ERROR: modpost: "dma_buf_export" [sound/soc/fsl/snd-soc-fsl-asrc.ko] undefined!

Add the missing 'select' statement.

Fixes: 24a01710f627 ("ASoC: fsl_asrc_m2m: Add memory to memory function")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Daniel Baluta <daniel.baluta@nxp.com>
Link: https://patch.msgid.link/20250122065330.1423248-1-arnd@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/fsl/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig
index e5fbf5305ea25..c4cf3cff58ded 100644
--- a/sound/soc/fsl/Kconfig
+++ b/sound/soc/fsl/Kconfig
@@ -6,6 +6,7 @@ comment "Common SoC Audio options for Freescale CPUs:"
 config SND_SOC_FSL_ASRC
 	tristate "Asynchronous Sample Rate Converter (ASRC) module support"
 	depends on HAS_DMA
+	select DMA_SHARED_BUFFER
 	select REGMAP_MMIO
 	select SND_SOC_GENERIC_DMAENGINE_PCM
 	select SND_COMPRESS_ACCEL

From 4b24c69af9cd5bd8fe98ab2ddd822d73f5e20a00 Mon Sep 17 00:00:00 2001
From: Iuliana Prodan <iuliana.prodan@nxp.com>
Date: Wed, 22 Jan 2025 18:35:41 +0200
Subject: [PATCH 154/368] dt-bindings: arm: imx: Add board revisions for
 i.MX8MP, i.MX8QM and i.MX8QXP

wm8960 codec is EOL so we have i.MX8MP EVK Rev B4,
i.MX8QM MEK Rev D and i.MX8QXP MEK WCPU boards with
wm8962 codec.
Therefore, add compatibles for them.

Signed-off-by: Iuliana Prodan <iuliana.prodan@nxp.com>
Signed-off-by: Daniel Baluta <daniel.baluta@nxp.com>
Link: https://patch.msgid.link/20250122163544.1392869-2-daniel.baluta@nxp.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/arm/fsl.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml b/Documentation/devicetree/bindings/arm/fsl.yaml
index 6e0dcf4307f10..df26b6b26e8c0 100644
--- a/Documentation/devicetree/bindings/arm/fsl.yaml
+++ b/Documentation/devicetree/bindings/arm/fsl.yaml
@@ -1091,6 +1091,7 @@ properties:
               - dmo,imx8mp-data-modul-edm-sbc # i.MX8MP eDM SBC
               - emcraft,imx8mp-navqp      # i.MX8MP Emcraft Systems NavQ+ Kit
               - fsl,imx8mp-evk            # i.MX8MP EVK Board
+              - fsl,imx8mp-evk-revb4      # i.MX8MP EVK Rev B4 Board
               - gateworks,imx8mp-gw71xx-2x # i.MX8MP Gateworks Board
               - gateworks,imx8mp-gw72xx-2x # i.MX8MP Gateworks Board
               - gateworks,imx8mp-gw73xx-2x # i.MX8MP Gateworks Board
@@ -1262,6 +1263,7 @@ properties:
         items:
           - enum:
               - fsl,imx8qm-mek           # i.MX8QM MEK Board
+              - fsl,imx8qm-mek-revd      # i.MX8QM MEK Rev D Board
               - toradex,apalis-imx8      # Apalis iMX8 Modules
               - toradex,apalis-imx8-v1.1 # Apalis iMX8 V1.1 Modules
           - const: fsl,imx8qm
@@ -1290,6 +1292,7 @@ properties:
           - enum:
               - einfochips,imx8qxp-ai_ml  # i.MX8QXP AI_ML Board
               - fsl,imx8qxp-mek           # i.MX8QXP MEK Board
+              - fsl,imx8qxp-mek-wcpu      # i.MX8QXP MEK WCPU Board
           - const: fsl,imx8qxp
 
       - description: i.MX8DXL based Boards

From 66084793fac9c8b841f65da1809ad0ad398f9f2f Mon Sep 17 00:00:00 2001
From: Daniel Baluta <daniel.baluta@nxp.com>
Date: Wed, 22 Jan 2025 18:35:42 +0200
Subject: [PATCH 155/368] ASoC: SOF: imx: Add mach entry to select cs42888
 topology

After commit 2b9cdef13648 ("ASoC: SOF: imx: Add devicetree support
to select topologies") we select topology to be used by the board
compatible string in the dts.

Now that we have a way to know when the baseboard is installed, use
the board compatible and select proper topology files when the cs42888
Audio IO card is used.

Reviewed-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Liam Girdwood <liam.r.girdwood@intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Iuliana Prodan <iuliana.prodan@nxp.com>
Reviewed-by: Laurentiu Mihalcea <laurentiu.mihalcea@nxp.com>
Signed-off-by: Daniel Baluta <daniel.baluta@nxp.com>
Link: https://patch.msgid.link/20250122163544.1392869-3-daniel.baluta@nxp.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/imx/imx8.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/sound/soc/sof/imx/imx8.c b/sound/soc/sof/imx/imx8.c
index 0b85b29d1067f..d6117a3d42660 100644
--- a/sound/soc/sof/imx/imx8.c
+++ b/sound/soc/sof/imx/imx8.c
@@ -611,6 +611,17 @@ static struct snd_sof_of_mach sof_imx8_machs[] = {
 		.sof_tplg_filename = "sof-imx8-wm8960.tplg",
 		.drv_name = "asoc-audio-graph-card2",
 	},
+	{
+		.compatible = "fsl,imx8qxp-mek-bb",
+		.sof_tplg_filename = "sof-imx8-cs42888.tplg",
+		.drv_name = "asoc-audio-graph-card2",
+	},
+	{
+		.compatible = "fsl,imx8qm-mek-bb",
+		.sof_tplg_filename = "sof-imx8-cs42888.tplg",
+		.drv_name = "asoc-audio-graph-card2",
+	},
+
 	{}
 };
 

From a9f54c7fbd2edb28c8d4d812be3d0129167f92d4 Mon Sep 17 00:00:00 2001
From: Iuliana Prodan <iuliana.prodan@nxp.com>
Date: Wed, 22 Jan 2025 18:35:43 +0200
Subject: [PATCH 156/368] ASoC: SOF: imx8: Add entries for new 8QM and 8QXP
 revisions

The new revisions for 8QM and 8QXP have wm8962 codec instead of wm8960.
Therefore add new entries in sof_imx8_machs, an array of snd_sof_of_mach,
where we describe topology name and driver name.
For the new revisions we have new compatible values and based on these,
we select the new topology file, for wm8962 codec.

Reviewed-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Liam Girdwood <liam.r.girdwood@intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Laurentiu Mihalcea <laurentiu.mihalcea@nxp.com>
Signed-off-by: Iuliana Prodan <iuliana.prodan@nxp.com>
Signed-off-by: Daniel Baluta <daniel.baluta@nxp.com>
Link: https://patch.msgid.link/20250122163544.1392869-4-daniel.baluta@nxp.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/imx/imx8.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/sound/soc/sof/imx/imx8.c b/sound/soc/sof/imx/imx8.c
index d6117a3d42660..3b418114e8d59 100644
--- a/sound/soc/sof/imx/imx8.c
+++ b/sound/soc/sof/imx/imx8.c
@@ -606,11 +606,21 @@ static struct snd_sof_of_mach sof_imx8_machs[] = {
 		.sof_tplg_filename = "sof-imx8-wm8960.tplg",
 		.drv_name = "asoc-audio-graph-card2",
 	},
+	{
+		.compatible = "fsl,imx8qxp-mek-wcpu",
+		.sof_tplg_filename = "sof-imx8-wm8962.tplg",
+		.drv_name = "asoc-audio-graph-card2",
+	},
 	{
 		.compatible = "fsl,imx8qm-mek",
 		.sof_tplg_filename = "sof-imx8-wm8960.tplg",
 		.drv_name = "asoc-audio-graph-card2",
 	},
+	{
+		.compatible = "fsl,imx8qm-mek-revd",
+		.sof_tplg_filename = "sof-imx8-wm8962.tplg",
+		.drv_name = "asoc-audio-graph-card2",
+	},
 	{
 		.compatible = "fsl,imx8qxp-mek-bb",
 		.sof_tplg_filename = "sof-imx8-cs42888.tplg",

From af65d7d041d486cc55530e14d806e16143037962 Mon Sep 17 00:00:00 2001
From: Iuliana Prodan <iuliana.prodan@nxp.com>
Date: Wed, 22 Jan 2025 18:35:44 +0200
Subject: [PATCH 157/368] ASoC: SOF: imx8m: Add entry for new 8M Plus revision

The new revision for 8M Plus has wm8962 codec instead of wm8960.
Therefore add new entry in sof_imx8mp_machs, an array of snd_sof_of_mach,
where we describe topology name and driver name.
For the new revision we have new compatible value and based on this,
we select the new topology file, for wm8962 codec.

Reviewed-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Liam Girdwood <liam.r.girdwood@intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Laurentiu Mihalcea <laurentiu.mihalcea@nxp.com>
Signed-off-by: Daniel Baluta <daniel.baluta@nxp.com>
Signed-off-by: Iuliana Prodan <iuliana.prodan@nxp.com>
Link: https://patch.msgid.link/20250122163544.1392869-5-daniel.baluta@nxp.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/imx/imx8m.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sound/soc/sof/imx/imx8m.c b/sound/soc/sof/imx/imx8m.c
index ff42743efa791..1dee0b7a1f3d6 100644
--- a/sound/soc/sof/imx/imx8m.c
+++ b/sound/soc/sof/imx/imx8m.c
@@ -471,6 +471,11 @@ static const struct snd_sof_dsp_ops sof_imx8m_ops = {
 };
 
 static struct snd_sof_of_mach sof_imx8mp_machs[] = {
+	{
+		.compatible = "fsl,imx8mp-evk-revb4",
+		.sof_tplg_filename = "sof-imx8mp-wm8962.tplg",
+		.drv_name = "asoc-audio-graph-card2",
+	},
 	{
 		.compatible = "fsl,imx8mp-evk",
 		.sof_tplg_filename = "sof-imx8mp-wm8960.tplg",

From 69a62e03f896a7382671877b6ad6aab87c53e9c3 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 22 Jan 2025 17:03:28 -0700
Subject: [PATCH 158/368] io_uring/msg_ring: don't leave potentially dangling
 ->tctx pointer

For remote posting of messages, req->tctx is assigned even though it
is never used. Rather than leave a dangling pointer, just clear it to
NULL and use the previous check for a valid submitter_task to gate on
whether or not the request should be terminated.

Reported-by: Jann Horn <jannh@google.com>
Fixes: b6f58a3f4aa8 ("io_uring: move struct io_kiocb from task_struct to io_uring_task")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/msg_ring.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index bd3cd78d2dba3..7e6f68e911f10 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -89,8 +89,7 @@ static void io_msg_tw_complete(struct io_kiocb *req, struct io_tw_state *ts)
 static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req,
 			      int res, u32 cflags, u64 user_data)
 {
-	req->tctx = READ_ONCE(ctx->submitter_task->io_uring);
-	if (!req->tctx) {
+	if (!READ_ONCE(ctx->submitter_task)) {
 		kmem_cache_free(req_cachep, req);
 		return -EOWNERDEAD;
 	}
@@ -98,6 +97,7 @@ static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	io_req_set_res(req, res, cflags);
 	percpu_ref_get(&ctx->refs);
 	req->ctx = ctx;
+	req->tctx = NULL;
 	req->io_task_work.func = io_msg_tw_complete;
 	io_req_task_work_add_remote(req, ctx, IOU_F_TWQ_LAZY_WAKE);
 	return 0;

From 3fafa6a02be219ddd05d6201911534a34135cb82 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 20 Jan 2025 15:35:01 +0100
Subject: [PATCH 159/368] dt-bindings: interrupt-controller:
 microchip,lan966x-oic: Clarify endpoint use

Reword the description, to make it clear that the LAN966x Outbound
Interrupt Controller is used only in PCI endpoint mode.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Herve Codina <herve.codina@bootlin.com>
Link: https://lore.kernel.org/all/247b1185c93610100f3f8c9e0ab2c1506e53e1f4.1737383314.git.geert+renesas@glider.be
---
 .../bindings/interrupt-controller/microchip,lan966x-oic.yaml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml b/Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml
index b2adc71741770..dca16e202da99 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml
@@ -14,9 +14,8 @@ allOf:
 
 description: |
   The Microchip LAN966x outband interrupt controller (OIC) maps the internal
-  interrupt sources of the LAN966x device to an external interrupt.
-  When the LAN966x device is used as a PCI device, the external interrupt is
-  routed to the PCI interrupt.
+  interrupt sources of the LAN966x device to a PCI interrupt when the LAN966x
+  device is used as a PCI device.
 
 properties:
   compatible:

From e06c9e3682f58fbeb632b7b866bb4fe66a4a4b42 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 20 Jan 2025 15:35:02 +0100
Subject: [PATCH 160/368] irqchip/lan966x-oic: Make CONFIG_LAN966X_OIC depend
 on CONFIG_MCHP_LAN966X_PCI

The Microchip LAN966x outband interrupt controller is only present on
Microchip LAN966x SoCs, and only used in PCI endpoint mode.  Hence add a
dependency on MCHP_LAN966X_PCI, to prevent asking the user about this
driver when configuring a kernel without Microchip LAN966x PCIe support.

Fixes: 3e3a7b35332924c8 ("irqchip: Add support for LAN966x OIC")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Herve Codina <herve.codina@bootlin.com>
Link: https://lore.kernel.org/all/28e8a605e72ee45e27f0d06b2b71366159a9c782.1737383314.git.geert+renesas@glider.be
---
 drivers/irqchip/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index be063bfb50c4b..c11b9965c4ad9 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -169,6 +169,7 @@ config IXP4XX_IRQ
 
 config LAN966X_OIC
 	tristate "Microchip LAN966x OIC Support"
+	depends on MCHP_LAN966X_PCI || COMPILE_TEST
 	select GENERIC_IRQ_CHIP
 	select IRQ_DOMAIN
 	help

From b9a8ea185f3f8024619b2e74b74375493c87df8c Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 22 Jan 2025 20:49:13 -0600
Subject: [PATCH 161/368] ASoC: acp: Support microphone from Lenovo Go S

On Lenovo Go S there is a DMIC connected to the ACP but the firmware
has no `AcpDmicConnected` ACPI _DSD.

Add a DMI entry for all possible Lenovo Go S SKUs to enable DMIC.

Cc: nijs1@lenovo.com
Cc: pgriffais@valvesoftware.com
Cc: mpearson-lenovo@squebb.ca
Cc: stable@vger.kernel.org
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Link: https://patch.msgid.link/20250123024915.2457115-1-superm1@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/amd/yc/acp6x-mach.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c
index ecf57a6cb7c37..b16587d8f97a8 100644
--- a/sound/soc/amd/yc/acp6x-mach.c
+++ b/sound/soc/amd/yc/acp6x-mach.c
@@ -304,6 +304,34 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "83AS"),
 		}
 	},
+	{
+		.driver_data = &acp6x_card,
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "83L3"),
+		}
+	},
+	{
+		.driver_data = &acp6x_card,
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "83N6"),
+		}
+	},
+	{
+		.driver_data = &acp6x_card,
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "83Q2"),
+		}
+	},
+	{
+		.driver_data = &acp6x_card,
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "83Q3"),
+		}
+	},
 	{
 		.driver_data = &acp6x_card,
 		.matches = {

From 87284832bb91259c3ba2a4a1fa5739dc7a45dcb7 Mon Sep 17 00:00:00 2001
From: Luoxi Li <lee.lockhey@gmail.com>
Date: Thu, 23 Jan 2025 11:39:37 +0800
Subject: [PATCH 162/368] ASoC: use to_platform_device() instead of
 container_of()

Use the to_platform_device() macro where possible.

Signed-off-by: Luoxi Li <lee.lockhey@gmail.com>
Link: https://patch.msgid.link/20250123033937.3587880-1-lee.lockhey@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/rt5514.c              | 3 +--
 sound/soc/sof/imx/imx8.c               | 3 +--
 sound/soc/sof/imx/imx8m.c              | 3 +--
 sound/soc/sof/imx/imx8ulp.c            | 3 +--
 sound/soc/sof/intel/bdw.c              | 3 +--
 sound/soc/sof/intel/byt.c              | 3 +--
 sound/soc/sof/mediatek/mt8186/mt8186.c | 2 +-
 sound/soc/sof/mediatek/mt8195/mt8195.c | 6 +++---
 8 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/sound/soc/codecs/rt5514.c b/sound/soc/codecs/rt5514.c
index 2b3c0f9e178cc..9cb74962161a0 100644
--- a/sound/soc/codecs/rt5514.c
+++ b/sound/soc/codecs/rt5514.c
@@ -1091,8 +1091,7 @@ static int rt5514_set_bias_level(struct snd_soc_component *component,
 static int rt5514_probe(struct snd_soc_component *component)
 {
 	struct rt5514_priv *rt5514 = snd_soc_component_get_drvdata(component);
-	struct platform_device *pdev = container_of(component->dev,
-						   struct platform_device, dev);
+	struct platform_device *pdev = to_platform_device(component->dev);
 
 	rt5514->mclk = devm_clk_get_optional(component->dev, "mclk");
 	if (IS_ERR(rt5514->mclk))
diff --git a/sound/soc/sof/imx/imx8.c b/sound/soc/sof/imx/imx8.c
index 0b85b29d1067f..3f6080e6b8a1b 100644
--- a/sound/soc/sof/imx/imx8.c
+++ b/sound/soc/sof/imx/imx8.c
@@ -175,8 +175,7 @@ static int imx8_run(struct snd_sof_dev *sdev)
 
 static int imx8_probe(struct snd_sof_dev *sdev)
 {
-	struct platform_device *pdev =
-		container_of(sdev->dev, struct platform_device, dev);
+	struct platform_device *pdev = to_platform_device(sdev->dev);
 	struct device_node *np = pdev->dev.of_node;
 	struct device_node *res_node;
 	struct resource *mmio;
diff --git a/sound/soc/sof/imx/imx8m.c b/sound/soc/sof/imx/imx8m.c
index 4ab5814e9117e..8b60e55b106a7 100644
--- a/sound/soc/sof/imx/imx8m.c
+++ b/sound/soc/sof/imx/imx8m.c
@@ -144,8 +144,7 @@ static int imx8m_reset(struct snd_sof_dev *sdev)
 
 static int imx8m_probe(struct snd_sof_dev *sdev)
 {
-	struct platform_device *pdev =
-		container_of(sdev->dev, struct platform_device, dev);
+	struct platform_device *pdev = to_platform_device(sdev->dev);
 	struct device_node *np = pdev->dev.of_node;
 	struct device_node *res_node;
 	struct resource *mmio;
diff --git a/sound/soc/sof/imx/imx8ulp.c b/sound/soc/sof/imx/imx8ulp.c
index 6965791ab6ef6..0704da27e69d9 100644
--- a/sound/soc/sof/imx/imx8ulp.c
+++ b/sound/soc/sof/imx/imx8ulp.c
@@ -155,8 +155,7 @@ static int imx8ulp_reset(struct snd_sof_dev *sdev)
 
 static int imx8ulp_probe(struct snd_sof_dev *sdev)
 {
-	struct platform_device *pdev =
-		container_of(sdev->dev, struct platform_device, dev);
+	struct platform_device *pdev = to_platform_device(sdev->dev);
 	struct device_node *np = pdev->dev.of_node;
 	struct device_node *res_node;
 	struct resource *mmio;
diff --git a/sound/soc/sof/intel/bdw.c b/sound/soc/sof/intel/bdw.c
index 5282c0071534d..e1f0e38c24076 100644
--- a/sound/soc/sof/intel/bdw.c
+++ b/sound/soc/sof/intel/bdw.c
@@ -410,8 +410,7 @@ static int bdw_probe(struct snd_sof_dev *sdev)
 {
 	struct snd_sof_pdata *pdata = sdev->pdata;
 	const struct sof_dev_desc *desc = pdata->desc;
-	struct platform_device *pdev =
-		container_of(sdev->dev, struct platform_device, dev);
+	struct platform_device *pdev = to_platform_device(sdev->dev);
 	const struct sof_intel_dsp_desc *chip;
 	struct resource *mmio;
 	u32 base, size;
diff --git a/sound/soc/sof/intel/byt.c b/sound/soc/sof/intel/byt.c
index 536d4c89d2f02..cae7dc0036c6a 100644
--- a/sound/soc/sof/intel/byt.c
+++ b/sound/soc/sof/intel/byt.c
@@ -109,8 +109,7 @@ static int byt_acpi_probe(struct snd_sof_dev *sdev)
 {
 	struct snd_sof_pdata *pdata = sdev->pdata;
 	const struct sof_dev_desc *desc = pdata->desc;
-	struct platform_device *pdev =
-		container_of(sdev->dev, struct platform_device, dev);
+	struct platform_device *pdev = to_platform_device(sdev->dev);
 	const struct sof_intel_dsp_desc *chip;
 	struct resource *mmio;
 	u32 base, size;
diff --git a/sound/soc/sof/mediatek/mt8186/mt8186.c b/sound/soc/sof/mediatek/mt8186/mt8186.c
index 9955dfa520ae2..31437fdd4e922 100644
--- a/sound/soc/sof/mediatek/mt8186/mt8186.c
+++ b/sound/soc/sof/mediatek/mt8186/mt8186.c
@@ -238,7 +238,7 @@ static int mt8186_run(struct snd_sof_dev *sdev)
 
 static int mt8186_dsp_probe(struct snd_sof_dev *sdev)
 {
-	struct platform_device *pdev = container_of(sdev->dev, struct platform_device, dev);
+	struct platform_device *pdev = to_platform_device(sdev->dev);
 	struct adsp_priv *priv;
 	int ret;
 
diff --git a/sound/soc/sof/mediatek/mt8195/mt8195.c b/sound/soc/sof/mediatek/mt8195/mt8195.c
index 6032b566c6795..371563d7ce795 100644
--- a/sound/soc/sof/mediatek/mt8195/mt8195.c
+++ b/sound/soc/sof/mediatek/mt8195/mt8195.c
@@ -228,7 +228,7 @@ static int mt8195_run(struct snd_sof_dev *sdev)
 
 static int mt8195_dsp_probe(struct snd_sof_dev *sdev)
 {
-	struct platform_device *pdev = container_of(sdev->dev, struct platform_device, dev);
+	struct platform_device *pdev = to_platform_device(sdev->dev);
 	struct adsp_priv *priv;
 	int ret;
 
@@ -341,7 +341,7 @@ static int mt8195_dsp_shutdown(struct snd_sof_dev *sdev)
 
 static void mt8195_dsp_remove(struct snd_sof_dev *sdev)
 {
-	struct platform_device *pdev = container_of(sdev->dev, struct platform_device, dev);
+	struct platform_device *pdev = to_platform_device(sdev->dev);
 	struct adsp_priv *priv = sdev->pdata->hw_pdata;
 
 	platform_device_unregister(priv->ipc_dev);
@@ -351,7 +351,7 @@ static void mt8195_dsp_remove(struct snd_sof_dev *sdev)
 
 static int mt8195_dsp_suspend(struct snd_sof_dev *sdev, u32 target_state)
 {
-	struct platform_device *pdev = container_of(sdev->dev, struct platform_device, dev);
+	struct platform_device *pdev = to_platform_device(sdev->dev);
 	int ret;
 	u32 reset_sw, dbg_pc;
 

From 4a32a38cb68f55ff9e100df348ddb3d4b3e50643 Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
Date: Thu, 23 Jan 2025 14:10:36 +0200
Subject: [PATCH 163/368] ASoC: da7213: Initialize the mutex

Initialize the struct da7213_priv::ctrl_lock mutex. Without it the
following stack trace is displayed when rebooting and lockdep is enabled:

DEBUG_LOCKS_WARN_ON(lock->magic != lock)
WARNING: CPU: 0 PID: 180 at kernel/locking/mutex.c:564 __mutex_lock+0x254/0x4e4
CPU: 0 UID: 0 PID: 180 Comm: alsactl Not tainted 6.13.0-next-20250123-arm64-renesas-00002-g132083a22d3d #30
Hardware name: Renesas SMARC EVK version 2 based on r9a08g045s33 (DT)
pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : __mutex_lock+0x254/0x4e4
lr : __mutex_lock+0x254/0x4e4
sp : ffff800082c13c00
x29: ffff800082c13c00 x28: ffff00001002b500 x27: 0000000000000000
x26: 0000000000000000 x25: ffff800080b30db4 x24: 0000000000000002
x23: ffff800082c13c70 x22: 0000ffffc2a68a70 x21: ffff000010348000
x20: 0000000000000000 x19: ffff00000be2e488 x18: 0000000000000000
x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000
x14: 00000000000003c1 x13: 00000000000003c1 x12: 0000000000000000
x11: 0000000000000011 x10: 0000000000001420 x9 : ffff800082c13a70
x8 : 0000000000000001 x7 : ffff800082c13a50 x6 : ffff800082c139e0
x5 : ffff800082c14000 x4 : ffff800082c13a50 x3 : 0000000000000000
x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff00001002b500
Call trace:
  __mutex_lock+0x254/0x4e4 (P)
  mutex_lock_nested+0x20/0x28
  da7213_volsw_locked_get+0x34/0x60
  snd_ctl_elem_read+0xbc/0x114
  snd_ctl_ioctl+0x878/0xa70
  __arm64_sys_ioctl+0x94/0xc8
  invoke_syscall+0x44/0x104
  el0_svc_common.constprop.0+0xb4/0xd4
  do_el0_svc+0x18/0x20
  el0_svc+0x3c/0xf0
  el0t_64_sync_handler+0xc0/0xc4
  el0t_64_sync+0x154/0x158
 irq event stamp: 7713
 hardirqs last  enabled at (7713): [<ffff800080170d94>] ktime_get_coarse_real_ts64+0xf0/0x10c
 hardirqs last disabled at (7712): [<ffff800080170d58>] ktime_get_coarse_real_ts64+0xb4/0x10c
 softirqs last  enabled at (7550): [<ffff8000800179d4>] fpsimd_restore_current_state+0x30/0xb8
 softirqs last disabled at (7548): [<ffff8000800179a8>] fpsimd_restore_current_state+0x4/0xb8
 ---[ end trace 0000000000000000 ]---

Fixes: 64c3259b5f86 ("ASoC: da7213: Add new kcontrol for tonegen")
Cc: stable@vger.kernel.org
Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
Link: https://patch.msgid.link/20250123121036.70406-1-claudiu.beznea.uj@bp.renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/da7213.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/soc/codecs/da7213.c b/sound/soc/codecs/da7213.c
index ca4cc954efa8e..eb97ac73ec062 100644
--- a/sound/soc/codecs/da7213.c
+++ b/sound/soc/codecs/da7213.c
@@ -2203,6 +2203,8 @@ static int da7213_i2c_probe(struct i2c_client *i2c)
 		return ret;
 	}
 
+	mutex_init(&da7213->ctrl_lock);
+
 	pm_runtime_set_autosuspend_delay(&i2c->dev, 100);
 	pm_runtime_use_autosuspend(&i2c->dev);
 	pm_runtime_set_active(&i2c->dev);

From b13ee668e8280ca5b07f8ce2846b9957a8a10853 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 23 Jan 2025 06:18:41 -0700
Subject: [PATCH 164/368] block: don't revert iter for -EIOCBQUEUED

blkdev_read_iter() has a few odd checks, like gating the position and
count adjustment on whether or not the result is bigger-than-or-equal to
zero (where bigger than makes more sense), and not checking the return
value of blkdev_direct_IO() before doing an iov_iter_revert(). The
latter can lead to attempting to revert with a negative value, which
when passed to iov_iter_revert() as an unsigned value will lead to
throwing a WARN_ON() because unroll is bigger than MAX_RW_COUNT.

Be sane and don't revert for -EIOCBQUEUED, like what is done in other
spots.

Cc: stable@vger.kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/fops.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/block/fops.c b/block/fops.c
index 6d5c4fc5a2168..be9f1dbea9ce0 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -783,11 +783,12 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
 		file_accessed(iocb->ki_filp);
 
 		ret = blkdev_direct_IO(iocb, to);
-		if (ret >= 0) {
+		if (ret > 0) {
 			iocb->ki_pos += ret;
 			count -= ret;
 		}
-		iov_iter_revert(to, count - iov_iter_count(to));
+		if (ret != -EIOCBQUEUED)
+			iov_iter_revert(to, count - iov_iter_count(to));
 		if (ret < 0 || !count)
 			goto reexpand;
 	}

From a9ae6fe1c319c4776c2b11e85e15109cd3f04076 Mon Sep 17 00:00:00 2001
From: Daniel Wagner <wagi@kernel.org>
Date: Thu, 23 Jan 2025 14:08:29 +0100
Subject: [PATCH 165/368] blk-mq: create correct map for fallback case

The fallback code in blk_mq_map_hw_queues is original from
blk_mq_pci_map_queues and was added to handle the case where
pci_irq_get_affinity will return NULL for !SMP configuration.

blk_mq_map_hw_queues replaces besides blk_mq_pci_map_queues also
blk_mq_virtio_map_queues which used to use blk_mq_map_queues for the
fallback.

It's possible to use blk_mq_map_queues for both cases though.
blk_mq_map_queues creates the same map as blk_mq_clear_mq_map for !SMP
that is CPU 0 will be mapped to hctx 0.

The WARN_ON_ONCE has to be dropped for virtio as the fallback is also
taken for certain configuration on default. Though there is still a
WARN_ON_ONCE check in lib/group_cpus.c:

       WARN_ON(nr_present + nr_others < numgrps);

which will trigger if the caller tries to create more hardware queues
than CPUs. It tests the same as the WARN_ON_ONCE in
blk_mq_pci_map_queues did.

Fixes: a5665c3d150c ("virtio: blk/scsi: replace blk_mq_virtio_map_queues with blk_mq_map_hw_queues")
Reported-by: Steven Rostedt <rostedt@goodmis.org>
Closes: https://lore.kernel.org/all/20250122093020.6e8a4e5b@gandalf.local.home/
Signed-off-by: Daniel Wagner <wagi@kernel.org>
Link: https://lore.kernel.org/r/20250123-fix-blk_mq_map_hw_queues-v1-1-08dbd01f2c39@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-cpumap.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index ad8d6a363f24a..444798c5374f4 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -87,7 +87,6 @@ void blk_mq_map_hw_queues(struct blk_mq_queue_map *qmap,
 	return;
 
 fallback:
-	WARN_ON_ONCE(qmap->nr_queues > 1);
-	blk_mq_clear_mq_map(qmap);
+	blk_mq_map_queues(qmap);
 }
 EXPORT_SYMBOL_GPL(blk_mq_map_hw_queues);

From 6917192378c1ce17ba31df51c4e0d8b1c97a453b Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 23 Jan 2025 14:25:07 +0100
Subject: [PATCH 166/368] ASoC: Intel: bytcr_rt5640: Add DMI quirk for Vexia
 Edu Atla 10 tablet 5V

The Vexia EDU ATLA 10 tablet comes in 2 different versions with
significantly different mainboards. The only outward difference is that
the charging barrel on one is marked 5V and the other is marked 9V.

The 5V version mostly works with the BYTCR defaults, except that it is
missing a CHAN package in its ACPI tables and the default of using
SSP0-AIF2 is wrong, instead SSP0-AIF1 must be used. That and its jack
detect signal is not inverted as it usually is.

Add a DMI quirk for the 5V version to fix sound not working.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://patch.msgid.link/20250123132507.18434-1-hdegoede@redhat.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/boards/bytcr_rt5640.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c
index 9caa4407c1ca3..6446cda0f8572 100644
--- a/sound/soc/intel/boards/bytcr_rt5640.c
+++ b/sound/soc/intel/boards/bytcr_rt5640.c
@@ -1132,7 +1132,22 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = {
 					BYT_RT5640_SSP0_AIF2 |
 					BYT_RT5640_MCLK_EN),
 	},
-	{	/* Vexia Edu Atla 10 tablet */
+	{
+		/* Vexia Edu Atla 10 tablet 5V version */
+		.matches = {
+			/* Having all 3 of these not set is somewhat unique */
+			DMI_MATCH(DMI_SYS_VENDOR, "To be filled by O.E.M."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "To be filled by O.E.M."),
+			DMI_MATCH(DMI_BOARD_NAME, "To be filled by O.E.M."),
+			/* Above strings are too generic, also match on BIOS date */
+			DMI_MATCH(DMI_BIOS_DATE, "05/14/2015"),
+		},
+		.driver_data = (void *)(BYTCR_INPUT_DEFAULTS |
+					BYT_RT5640_JD_NOT_INV |
+					BYT_RT5640_SSP0_AIF1 |
+					BYT_RT5640_MCLK_EN),
+	},
+	{	/* Vexia Edu Atla 10 tablet 9V version */
 		.matches = {
 			DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
 			DMI_MATCH(DMI_BOARD_NAME, "Aptio CRB"),

From d58d82bd0efd6c8edd452fc2f6c6dd052ec57cb2 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 22 Jan 2025 17:29:31 -0700
Subject: [PATCH 167/368] io_uring/uring_cmd: use cached cmd_op in
 io_uring_cmd_sock()

io_uring_cmd_sock() does a normal read of cmd->sqe->cmd_op, where it
really should be using a READ_ONCE() as ->sqe may still be pointing to
the original SQE. Since the prep side already does this READ_ONCE() and
stores it locally, use that value rather than re-read it.

Fixes: 8e9fad0e70b7b ("io_uring: Add io_uring command support for sockets")
Link: https://lore.kernel.org/r/20250121-uring-sockcmd-fix-v1-1-add742802a29@google.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/uring_cmd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index fc94c465a9850..3993c9339ac76 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -350,7 +350,7 @@ int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
 	if (!prot || !prot->ioctl)
 		return -EOPNOTSUPP;
 
-	switch (cmd->sqe->cmd_op) {
+	switch (cmd->cmd_op) {
 	case SOCKET_URING_OP_SIOCINQ:
 		ret = prot->ioctl(sk, SIOCINQ, &arg);
 		if (ret)

From eaf72f7b414f5944585e7dee9c915c7f8f7f6344 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 22 Jan 2025 19:50:31 -0700
Subject: [PATCH 168/368] io_uring/uring_cmd: cleanup struct io_uring_cmd_data
 layout

A few spots in uring_cmd assume that the SQEs copied are always at the
start of the structure, and hence mix req->async_data and the struct
itself.

Clean that up and use the proper indices.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/uring_cmd.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 3993c9339ac76..6a63ec4b54456 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -192,8 +192,8 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req,
 		return 0;
 	}
 
-	memcpy(req->async_data, sqe, uring_sqe_size(req->ctx));
-	ioucmd->sqe = req->async_data;
+	memcpy(cache->sqes, sqe, uring_sqe_size(req->ctx));
+	ioucmd->sqe = cache->sqes;
 	return 0;
 }
 
@@ -260,7 +260,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
 		struct io_uring_cmd_data *cache = req->async_data;
 
 		if (ioucmd->sqe != (void *) cache)
-			memcpy(cache, ioucmd->sqe, uring_sqe_size(req->ctx));
+			memcpy(cache->sqes, ioucmd->sqe, uring_sqe_size(req->ctx));
 		return -EAGAIN;
 	} else if (ret == -EIOCBQUEUED) {
 		return -EIOCBQUEUED;

From fa3595523d72d13508befd28cf2ca642cafc69f7 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 22 Jan 2025 20:00:57 -0700
Subject: [PATCH 169/368] io_uring: get rid of alloc cache init_once handling

init_once is called when an object doesn't come from the cache, and
hence needs initial clearing of certain members. While the whole
struct could get cleared by memset() in that case, a few of the cache
members are large enough that this may cause unnecessary overhead if
the caches used aren't large enough to satisfy the workload. For those
cases, some churn of kmalloc+kfree is to be expected.

Ensure that the 3 users that need clearing put the members they need
cleared at the start of the struct, and wrap the rest of the struct in
a struct group so the offset is known.

While at it, improve the interaction with KASAN such that when/if
KASAN writes to members inside the struct that should be retained over
caching, it won't trip over itself. For rw and net, the retaining of
the iovec over caching is disabled if KASAN is enabled. A helper will
free and clear those members in that case.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring/cmd.h   |  2 +-
 include/linux/io_uring_types.h |  3 ++-
 io_uring/alloc_cache.h         | 43 +++++++++++++++++++++++++++-------
 io_uring/futex.c               |  4 ++--
 io_uring/io_uring.c            | 12 ++++++----
 io_uring/io_uring.h            |  5 ++--
 io_uring/net.c                 | 28 +++++-----------------
 io_uring/net.h                 | 20 +++++++++-------
 io_uring/poll.c                |  2 +-
 io_uring/rw.c                  | 27 +++++----------------
 io_uring/rw.h                  | 27 ++++++++++++---------
 io_uring/uring_cmd.c           | 11 ++-------
 12 files changed, 91 insertions(+), 93 deletions(-)

diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index a3ce553413de8..abd0c8bd950ba 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -19,8 +19,8 @@ struct io_uring_cmd {
 };
 
 struct io_uring_cmd_data {
-	struct io_uring_sqe	sqes[2];
 	void			*op_data;
+	struct io_uring_sqe	sqes[2];
 };
 
 static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 623d8e798a11a..3def525a1da37 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -222,7 +222,8 @@ struct io_alloc_cache {
 	void			**entries;
 	unsigned int		nr_cached;
 	unsigned int		max_cached;
-	size_t			elem_size;
+	unsigned int		elem_size;
+	unsigned int		init_clear;
 };
 
 struct io_ring_ctx {
diff --git a/io_uring/alloc_cache.h b/io_uring/alloc_cache.h
index a3a8cfec32ce1..cca96aff3277e 100644
--- a/io_uring/alloc_cache.h
+++ b/io_uring/alloc_cache.h
@@ -6,6 +6,19 @@
  */
 #define IO_ALLOC_CACHE_MAX	128
 
+#if defined(CONFIG_KASAN)
+static inline void io_alloc_cache_kasan(struct iovec **iov, int *nr)
+{
+	kfree(*iov);
+	*iov = NULL;
+	*nr = 0;
+}
+#else
+static inline void io_alloc_cache_kasan(struct iovec **iov, int *nr)
+{
+}
+#endif
+
 static inline bool io_alloc_cache_put(struct io_alloc_cache *cache,
 				      void *entry)
 {
@@ -23,35 +36,47 @@ static inline void *io_alloc_cache_get(struct io_alloc_cache *cache)
 	if (cache->nr_cached) {
 		void *entry = cache->entries[--cache->nr_cached];
 
+		/*
+		 * If KASAN is enabled, always clear the initial bytes that
+		 * must be zeroed post alloc, in case any of them overlap
+		 * with KASAN storage.
+		 */
+#if defined(CONFIG_KASAN)
 		kasan_mempool_unpoison_object(entry, cache->elem_size);
+		if (cache->init_clear)
+			memset(entry, 0, cache->init_clear);
+#endif
 		return entry;
 	}
 
 	return NULL;
 }
 
-static inline void *io_cache_alloc(struct io_alloc_cache *cache, gfp_t gfp,
-				   void (*init_once)(void *obj))
+static inline void *io_cache_alloc(struct io_alloc_cache *cache, gfp_t gfp)
 {
-	if (unlikely(!cache->nr_cached)) {
-		void *obj = kmalloc(cache->elem_size, gfp);
+	void *obj;
 
-		if (obj && init_once)
-			init_once(obj);
+	obj = io_alloc_cache_get(cache);
+	if (obj)
 		return obj;
-	}
-	return io_alloc_cache_get(cache);
+
+	obj = kmalloc(cache->elem_size, gfp);
+	if (obj && cache->init_clear)
+		memset(obj, 0, cache->init_clear);
+	return obj;
 }
 
 /* returns false if the cache was initialized properly */
 static inline bool io_alloc_cache_init(struct io_alloc_cache *cache,
-				       unsigned max_nr, size_t size)
+				       unsigned max_nr, unsigned int size,
+				       unsigned int init_bytes)
 {
 	cache->entries = kvmalloc_array(max_nr, sizeof(void *), GFP_KERNEL);
 	if (cache->entries) {
 		cache->nr_cached = 0;
 		cache->max_cached = max_nr;
 		cache->elem_size = size;
+		cache->init_clear = init_bytes;
 		return false;
 	}
 	return true;
diff --git a/io_uring/futex.c b/io_uring/futex.c
index 30139cc150f22..3159a2b7eeca1 100644
--- a/io_uring/futex.c
+++ b/io_uring/futex.c
@@ -36,7 +36,7 @@ struct io_futex_data {
 bool io_futex_cache_init(struct io_ring_ctx *ctx)
 {
 	return io_alloc_cache_init(&ctx->futex_cache, IO_FUTEX_ALLOC_CACHE_MAX,
-				sizeof(struct io_futex_data));
+				sizeof(struct io_futex_data), 0);
 }
 
 void io_futex_cache_free(struct io_ring_ctx *ctx)
@@ -320,7 +320,7 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags)
 	}
 
 	io_ring_submit_lock(ctx, issue_flags);
-	ifd = io_cache_alloc(&ctx->futex_cache, GFP_NOWAIT, NULL);
+	ifd = io_cache_alloc(&ctx->futex_cache, GFP_NOWAIT);
 	if (!ifd) {
 		ret = -ENOMEM;
 		goto done_unlock;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 7bfbc7c223677..263e504be4a8b 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -315,16 +315,18 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	INIT_LIST_HEAD(&ctx->cq_overflow_list);
 	INIT_LIST_HEAD(&ctx->io_buffers_cache);
 	ret = io_alloc_cache_init(&ctx->apoll_cache, IO_POLL_ALLOC_CACHE_MAX,
-			    sizeof(struct async_poll));
+			    sizeof(struct async_poll), 0);
 	ret |= io_alloc_cache_init(&ctx->netmsg_cache, IO_ALLOC_CACHE_MAX,
-			    sizeof(struct io_async_msghdr));
+			    sizeof(struct io_async_msghdr),
+			    offsetof(struct io_async_msghdr, clear));
 	ret |= io_alloc_cache_init(&ctx->rw_cache, IO_ALLOC_CACHE_MAX,
-			    sizeof(struct io_async_rw));
+			    sizeof(struct io_async_rw),
+			    offsetof(struct io_async_rw, clear));
 	ret |= io_alloc_cache_init(&ctx->uring_cache, IO_ALLOC_CACHE_MAX,
-			    sizeof(struct io_uring_cmd_data));
+			    sizeof(struct io_uring_cmd_data), 0);
 	spin_lock_init(&ctx->msg_lock);
 	ret |= io_alloc_cache_init(&ctx->msg_cache, IO_ALLOC_CACHE_MAX,
-			    sizeof(struct io_kiocb));
+			    sizeof(struct io_kiocb), 0);
 	ret |= io_futex_cache_init(ctx);
 	if (ret)
 		goto free_ref;
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index f65e3f3ede517..67adbb3c1bf58 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -226,10 +226,9 @@ static inline void io_req_set_res(struct io_kiocb *req, s32 res, u32 cflags)
 }
 
 static inline void *io_uring_alloc_async_data(struct io_alloc_cache *cache,
-					      struct io_kiocb *req,
-					      void (*init_once)(void *obj))
+					      struct io_kiocb *req)
 {
-	req->async_data = io_cache_alloc(cache, GFP_KERNEL, init_once);
+	req->async_data = io_cache_alloc(cache, GFP_KERNEL);
 	if (req->async_data)
 		req->flags |= REQ_F_ASYNC_DATA;
 	return req->async_data;
diff --git a/io_uring/net.c b/io_uring/net.c
index 85f55fbc25c94..41eef286f8b9a 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -137,7 +137,6 @@ static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg)
 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_async_msghdr *hdr = req->async_data;
-	struct iovec *iov;
 
 	/* can't recycle, ensure we free the iovec if we have one */
 	if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) {
@@ -146,39 +145,25 @@ static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
 	}
 
 	/* Let normal cleanup path reap it if we fail adding to the cache */
-	iov = hdr->free_iov;
+	io_alloc_cache_kasan(&hdr->free_iov, &hdr->free_iov_nr);
 	if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) {
-		if (iov)
-			kasan_mempool_poison_object(iov);
 		req->async_data = NULL;
 		req->flags &= ~REQ_F_ASYNC_DATA;
 	}
 }
 
-static void io_msg_async_data_init(void *obj)
-{
-	struct io_async_msghdr *hdr = (struct io_async_msghdr *)obj;
-
-	hdr->free_iov = NULL;
-	hdr->free_iov_nr = 0;
-}
-
 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 	struct io_async_msghdr *hdr;
 
-	hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req,
-					io_msg_async_data_init);
+	hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req);
 	if (!hdr)
 		return NULL;
 
 	/* If the async data was cached, we might have an iov cached inside. */
-	if (hdr->free_iov) {
-		kasan_mempool_unpoison_object(hdr->free_iov,
-					      hdr->free_iov_nr * sizeof(struct iovec));
+	if (hdr->free_iov)
 		req->flags |= REQ_F_NEED_CLEANUP;
-	}
 	return hdr;
 }
 
@@ -1813,11 +1798,10 @@ void io_netmsg_cache_free(const void *entry)
 {
 	struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry;
 
-	if (kmsg->free_iov) {
-		kasan_mempool_unpoison_object(kmsg->free_iov,
-				kmsg->free_iov_nr * sizeof(struct iovec));
+#if !defined(CONFIG_KASAN)
+	if (kmsg->free_iov)
 		io_netmsg_iovec_free(kmsg);
-	}
+#endif
 	kfree(kmsg);
 }
 #endif
diff --git a/io_uring/net.h b/io_uring/net.h
index 52bfee05f06a1..b804c2b36e605 100644
--- a/io_uring/net.h
+++ b/io_uring/net.h
@@ -5,16 +5,20 @@
 
 struct io_async_msghdr {
 #if defined(CONFIG_NET)
-	struct iovec			fast_iov;
-	/* points to an allocated iov, if NULL we use fast_iov instead */
 	struct iovec			*free_iov;
+	/* points to an allocated iov, if NULL we use fast_iov instead */
 	int				free_iov_nr;
-	int				namelen;
-	__kernel_size_t			controllen;
-	__kernel_size_t			payloadlen;
-	struct sockaddr __user		*uaddr;
-	struct msghdr			msg;
-	struct sockaddr_storage		addr;
+	struct_group(clear,
+		int				namelen;
+		struct iovec			fast_iov;
+		__kernel_size_t			controllen;
+		__kernel_size_t			payloadlen;
+		struct sockaddr __user		*uaddr;
+		struct msghdr			msg;
+		struct sockaddr_storage		addr;
+	);
+#else
+	struct_group(clear);
 #endif
 };
 
diff --git a/io_uring/poll.c b/io_uring/poll.c
index cc01c40b43d31..356474c66f324 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -650,7 +650,7 @@ static struct async_poll *io_req_alloc_apoll(struct io_kiocb *req,
 		kfree(apoll->double_poll);
 	} else {
 		if (!(issue_flags & IO_URING_F_UNLOCKED))
-			apoll = io_cache_alloc(&ctx->apoll_cache, GFP_ATOMIC, NULL);
+			apoll = io_cache_alloc(&ctx->apoll_cache, GFP_ATOMIC);
 		else
 			apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
 		if (!apoll)
diff --git a/io_uring/rw.c b/io_uring/rw.c
index a9a2733be8420..991ecfbea88e3 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -158,16 +158,13 @@ static void io_rw_iovec_free(struct io_async_rw *rw)
 static void io_rw_recycle(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_async_rw *rw = req->async_data;
-	struct iovec *iov;
 
 	if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) {
 		io_rw_iovec_free(rw);
 		return;
 	}
-	iov = rw->free_iovec;
+	io_alloc_cache_kasan(&rw->free_iovec, &rw->free_iov_nr);
 	if (io_alloc_cache_put(&req->ctx->rw_cache, rw)) {
-		if (iov)
-			kasan_mempool_poison_object(iov);
 		req->async_data = NULL;
 		req->flags &= ~REQ_F_ASYNC_DATA;
 	}
@@ -208,27 +205,16 @@ static void io_req_rw_cleanup(struct io_kiocb *req, unsigned int issue_flags)
 	}
 }
 
-static void io_rw_async_data_init(void *obj)
-{
-	struct io_async_rw *rw = (struct io_async_rw *)obj;
-
-	rw->free_iovec = NULL;
-	rw->bytes_done = 0;
-}
-
 static int io_rw_alloc_async(struct io_kiocb *req)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 	struct io_async_rw *rw;
 
-	rw = io_uring_alloc_async_data(&ctx->rw_cache, req, io_rw_async_data_init);
+	rw = io_uring_alloc_async_data(&ctx->rw_cache, req);
 	if (!rw)
 		return -ENOMEM;
-	if (rw->free_iovec) {
-		kasan_mempool_unpoison_object(rw->free_iovec,
-					      rw->free_iov_nr * sizeof(struct iovec));
+	if (rw->free_iovec)
 		req->flags |= REQ_F_NEED_CLEANUP;
-	}
 	rw->bytes_done = 0;
 	return 0;
 }
@@ -1323,10 +1309,9 @@ void io_rw_cache_free(const void *entry)
 {
 	struct io_async_rw *rw = (struct io_async_rw *) entry;
 
-	if (rw->free_iovec) {
-		kasan_mempool_unpoison_object(rw->free_iovec,
-				rw->free_iov_nr * sizeof(struct iovec));
+#if !defined(CONFIG_KASAN)
+	if (rw->free_iovec)
 		io_rw_iovec_free(rw);
-	}
+#endif
 	kfree(rw);
 }
diff --git a/io_uring/rw.h b/io_uring/rw.h
index 2d7656bd268d6..eaa59bd648709 100644
--- a/io_uring/rw.h
+++ b/io_uring/rw.h
@@ -9,19 +9,24 @@ struct io_meta_state {
 
 struct io_async_rw {
 	size_t				bytes_done;
-	struct iov_iter			iter;
-	struct iov_iter_state		iter_state;
-	struct iovec			fast_iov;
 	struct iovec			*free_iovec;
-	int				free_iov_nr;
-	/* wpq is for buffered io, while meta fields are used with direct io */
-	union {
-		struct wait_page_queue		wpq;
-		struct {
-			struct uio_meta			meta;
-			struct io_meta_state		meta_state;
+	struct_group(clear,
+		struct iov_iter			iter;
+		struct iov_iter_state		iter_state;
+		struct iovec			fast_iov;
+		int				free_iov_nr;
+		/*
+		 * wpq is for buffered io, while meta fields are used with
+		 * direct io
+		 */
+		union {
+			struct wait_page_queue		wpq;
+			struct {
+				struct uio_meta			meta;
+				struct io_meta_state		meta_state;
+			};
 		};
-	};
+	);
 };
 
 int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe);
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 6a63ec4b54456..1f6a82128b475 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -168,23 +168,16 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, u64 res2,
 }
 EXPORT_SYMBOL_GPL(io_uring_cmd_done);
 
-static void io_uring_cmd_init_once(void *obj)
-{
-	struct io_uring_cmd_data *data = obj;
-
-	data->op_data = NULL;
-}	
-
 static int io_uring_cmd_prep_setup(struct io_kiocb *req,
 				   const struct io_uring_sqe *sqe)
 {
 	struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
 	struct io_uring_cmd_data *cache;
 
-	cache = io_uring_alloc_async_data(&req->ctx->uring_cache, req,
-			io_uring_cmd_init_once);
+	cache = io_uring_alloc_async_data(&req->ctx->uring_cache, req);
 	if (!cache)
 		return -ENOMEM;
+	cache->op_data = NULL;
 
 	if (!(req->flags & REQ_F_FORCE_ASYNC)) {
 		/* defer memcpy until we need it */

From ff74954e4e9374f24b95dd46ef0bb1b5fa0a46f2 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 23 Jan 2025 07:34:36 -0700
Subject: [PATCH 170/368] io_uring/alloc_cache: get rid of _nocache() helper

Just allow passing in NULL for the cache, if the type in question
doesn't have a cache associated with it.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/io_uring.h | 18 +++++++-----------
 io_uring/timeout.c  |  2 +-
 io_uring/waitid.c   |  2 +-
 3 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 67adbb3c1bf58..ab619e63ef39c 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -228,18 +228,14 @@ static inline void io_req_set_res(struct io_kiocb *req, s32 res, u32 cflags)
 static inline void *io_uring_alloc_async_data(struct io_alloc_cache *cache,
 					      struct io_kiocb *req)
 {
-	req->async_data = io_cache_alloc(cache, GFP_KERNEL);
-	if (req->async_data)
-		req->flags |= REQ_F_ASYNC_DATA;
-	return req->async_data;
-}
+	if (cache) {
+		req->async_data = io_cache_alloc(cache, GFP_KERNEL);
+	} else {
+		const struct io_issue_def *def = &io_issue_defs[req->opcode];
 
-static inline void *io_uring_alloc_async_data_nocache(struct io_kiocb *req)
-{
-	const struct io_issue_def *def = &io_issue_defs[req->opcode];
-
-	WARN_ON_ONCE(!def->async_size);
-	req->async_data = kmalloc(def->async_size, GFP_KERNEL);
+		WARN_ON_ONCE(!def->async_size);
+		req->async_data = kmalloc(def->async_size, GFP_KERNEL);
+	}
 	if (req->async_data)
 		req->flags |= REQ_F_ASYNC_DATA;
 	return req->async_data;
diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index 2bd7e0a317bba..48fc8cf707843 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -544,7 +544,7 @@ static int __io_timeout_prep(struct io_kiocb *req,
 
 	if (WARN_ON_ONCE(req_has_async_data(req)))
 		return -EFAULT;
-	data = io_uring_alloc_async_data_nocache(req);
+	data = io_uring_alloc_async_data(NULL, req);
 	if (!data)
 		return -ENOMEM;
 	data->req = req;
diff --git a/io_uring/waitid.c b/io_uring/waitid.c
index 6778c0ee76c42..853e97a7b0ecb 100644
--- a/io_uring/waitid.c
+++ b/io_uring/waitid.c
@@ -303,7 +303,7 @@ int io_waitid(struct io_kiocb *req, unsigned int issue_flags)
 	struct io_waitid_async *iwa;
 	int ret;
 
-	iwa = io_uring_alloc_async_data_nocache(req);
+	iwa = io_uring_alloc_async_data(NULL, req);
 	if (!iwa)
 		return -ENOMEM;
 

From 27af31e44949fa85550176520ef7086a0d00fd7b Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 16 Jan 2025 18:07:45 +0200
Subject: [PATCH 171/368] hrtimers: Mark is_migration_base() with
 __always_inline

When is_migration_base() is unused, it prevents kernel builds
with clang, `make W=1` and CONFIG_WERROR=y:

kernel/time/hrtimer.c:156:20: error: unused function 'is_migration_base' [-Werror,-Wunused-function]
  156 | static inline bool is_migration_base(struct hrtimer_clock_base *base)
      |                    ^~~~~~~~~~~~~~~~~

Fix this by marking it with __always_inline.

[ tglx: Use __always_inline instead of __maybe_unused and move it into the
  	usage sites conditional ]

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250116160745.243358-1-andriy.shevchenko@linux.intel.com
---
 kernel/time/hrtimer.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index f6d8df94045c9..4fb81f8c6f1c7 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -145,11 +145,6 @@ static struct hrtimer_cpu_base migration_cpu_base = {
 
 #define migration_base	migration_cpu_base.clock_base[0]
 
-static inline bool is_migration_base(struct hrtimer_clock_base *base)
-{
-	return base == &migration_base;
-}
-
 /*
  * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
  * means that all timers which are tied to this base via timer->base are
@@ -275,11 +270,6 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
 
 #else /* CONFIG_SMP */
 
-static inline bool is_migration_base(struct hrtimer_clock_base *base)
-{
-	return false;
-}
-
 static inline struct hrtimer_clock_base *
 lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
 	__acquires(&timer->base->cpu_base->lock)
@@ -1370,6 +1360,18 @@ static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base,
 	}
 }
 
+#ifdef CONFIG_SMP
+static __always_inline bool is_migration_base(struct hrtimer_clock_base *base)
+{
+	return base == &migration_base;
+}
+#else
+static __always_inline bool is_migration_base(struct hrtimer_clock_base *base)
+{
+	return false;
+}
+#endif
+
 /*
  * This function is called on PREEMPT_RT kernels when the fast path
  * deletion of a timer failed because the timer callback function was

From 53dac345395c0d2493cbc2f4c85fe38aef5b63f5 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Sat, 18 Jan 2025 00:24:33 +0100
Subject: [PATCH 172/368] hrtimers: Force migrate away hrtimers queued after
 CPUHP_AP_HRTIMERS_DYING

hrtimers are migrated away from the dying CPU to any online target at
the CPUHP_AP_HRTIMERS_DYING stage in order not to delay bandwidth timers
handling tasks involved in the CPU hotplug forward progress.

However wakeups can still be performed by the outgoing CPU after
CPUHP_AP_HRTIMERS_DYING. Those can result again in bandwidth timers being
armed. Depending on several considerations (crystal ball power management
based election, earliest timer already enqueued, timer migration enabled or
not), the target may eventually be the current CPU even if offline. If that
happens, the timer is eventually ignored.

The most notable example is RCU which had to deal with each and every of
those wake-ups by deferring them to an online CPU, along with related
workarounds:

_ e787644caf76 (rcu: Defer RCU kthreads wakeup when CPU is dying)
_ 9139f93209d1 (rcu/nocb: Fix RT throttling hrtimer armed from offline CPU)
_ f7345ccc62a4 (rcu/nocb: Fix rcuog wake-up from offline softirq)

The problem isn't confined to RCU though as the stop machine kthread
(which runs CPUHP_AP_HRTIMERS_DYING) reports its completion at the end
of its work through cpu_stop_signal_done() and performs a wake up that
eventually arms the deadline server timer:

   WARNING: CPU: 94 PID: 588 at kernel/time/hrtimer.c:1086 hrtimer_start_range_ns+0x289/0x2d0
   CPU: 94 UID: 0 PID: 588 Comm: migration/94 Not tainted
   Stopper: multi_cpu_stop+0x0/0x120 <- stop_machine_cpuslocked+0x66/0xc0
   RIP: 0010:hrtimer_start_range_ns+0x289/0x2d0
   Call Trace:
   <TASK>
     start_dl_timer
     enqueue_dl_entity
     dl_server_start
     enqueue_task_fair
     enqueue_task
     ttwu_do_activate
     try_to_wake_up
     complete
     cpu_stopper_thread

Instead of providing yet another bandaid to work around the situation, fix
it in the hrtimers infrastructure instead: always migrate away a timer to
an online target whenever it is enqueued from an offline CPU.

This will also allow to revert all the above RCU disgraceful hacks.

Fixes: 5c0930ccaad5 ("hrtimers: Push pending hrtimers away from outgoing CPU earlier")
Reported-by: Vlad Poenaru <vlad.wing@gmail.com>
Reported-by: Usama Arif <usamaarif642@gmail.com>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Tested-by: Paul E. McKenney <paulmck@kernel.org>
Link: https://lore.kernel.org/all/20250117232433.24027-1-frederic@kernel.org
Closes: 20241213203739.1519801-1-usamaarif642@gmail.com
---
 include/linux/hrtimer_defs.h |   1 +
 kernel/time/hrtimer.c        | 103 ++++++++++++++++++++++++++++-------
 2 files changed, 83 insertions(+), 21 deletions(-)

diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h
index c3b4b7ed7c163..84a5045f80f36 100644
--- a/include/linux/hrtimer_defs.h
+++ b/include/linux/hrtimer_defs.h
@@ -125,6 +125,7 @@ struct hrtimer_cpu_base {
 	ktime_t				softirq_expires_next;
 	struct hrtimer			*softirq_next_timer;
 	struct hrtimer_clock_base	clock_base[HRTIMER_MAX_CLOCK_BASES];
+	call_single_data_t		csd;
 } ____cacheline_aligned;
 
 
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 4fb81f8c6f1c7..deb1aa32814e3 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -58,6 +58,8 @@
 #define HRTIMER_ACTIVE_SOFT	(HRTIMER_ACTIVE_HARD << MASK_SHIFT)
 #define HRTIMER_ACTIVE_ALL	(HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD)
 
+static void retrigger_next_event(void *arg);
+
 /*
  * The timer bases:
  *
@@ -111,7 +113,8 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
 			.clockid = CLOCK_TAI,
 			.get_time = &ktime_get_clocktai,
 		},
-	}
+	},
+	.csd = CSD_INIT(retrigger_next_event, NULL)
 };
 
 static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
@@ -124,6 +127,14 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
 	[CLOCK_TAI]		= HRTIMER_BASE_TAI,
 };
 
+static inline bool hrtimer_base_is_online(struct hrtimer_cpu_base *base)
+{
+	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
+		return true;
+	else
+		return likely(base->online);
+}
+
 /*
  * Functions and macros which are different for UP/SMP systems are kept in a
  * single place
@@ -178,27 +189,54 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
 }
 
 /*
- * We do not migrate the timer when it is expiring before the next
- * event on the target cpu. When high resolution is enabled, we cannot
- * reprogram the target cpu hardware and we would cause it to fire
- * late. To keep it simple, we handle the high resolution enabled and
- * disabled case similar.
+ * Check if the elected target is suitable considering its next
+ * event and the hotplug state of the current CPU.
+ *
+ * If the elected target is remote and its next event is after the timer
+ * to queue, then a remote reprogram is necessary. However there is no
+ * guarantee the IPI handling the operation would arrive in time to meet
+ * the high resolution deadline. In this case the local CPU becomes a
+ * preferred target, unless it is offline.
+ *
+ * High and low resolution modes are handled the same way for simplicity.
  *
  * Called with cpu_base->lock of target cpu held.
  */
-static int
-hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
+static bool hrtimer_suitable_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base,
+				    struct hrtimer_cpu_base *new_cpu_base,
+				    struct hrtimer_cpu_base *this_cpu_base)
 {
 	ktime_t expires;
 
+	/*
+	 * The local CPU clockevent can be reprogrammed. Also get_target_base()
+	 * guarantees it is online.
+	 */
+	if (new_cpu_base == this_cpu_base)
+		return true;
+
+	/*
+	 * The offline local CPU can't be the default target if the
+	 * next remote target event is after this timer. Keep the
+	 * elected new base. An IPI will we issued to reprogram
+	 * it as a last resort.
+	 */
+	if (!hrtimer_base_is_online(this_cpu_base))
+		return true;
+
 	expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
-	return expires < new_base->cpu_base->expires_next;
+
+	return expires >= new_base->cpu_base->expires_next;
 }
 
-static inline
-struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
-					 int pinned)
+static inline struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, int pinned)
 {
+	if (!hrtimer_base_is_online(base)) {
+		int cpu = cpumask_any_and(cpu_online_mask, housekeeping_cpumask(HK_TYPE_TIMER));
+
+		return &per_cpu(hrtimer_bases, cpu);
+	}
+
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
 	if (static_branch_likely(&timers_migration_enabled) && !pinned)
 		return &per_cpu(hrtimer_bases, get_nohz_timer_target());
@@ -249,8 +287,8 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
 		raw_spin_unlock(&base->cpu_base->lock);
 		raw_spin_lock(&new_base->cpu_base->lock);
 
-		if (new_cpu_base != this_cpu_base &&
-		    hrtimer_check_target(timer, new_base)) {
+		if (!hrtimer_suitable_target(timer, new_base, new_cpu_base,
+					     this_cpu_base)) {
 			raw_spin_unlock(&new_base->cpu_base->lock);
 			raw_spin_lock(&base->cpu_base->lock);
 			new_cpu_base = this_cpu_base;
@@ -259,8 +297,7 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
 		}
 		WRITE_ONCE(timer->base, new_base);
 	} else {
-		if (new_cpu_base != this_cpu_base &&
-		    hrtimer_check_target(timer, new_base)) {
+		if (!hrtimer_suitable_target(timer, new_base,  new_cpu_base, this_cpu_base)) {
 			new_cpu_base = this_cpu_base;
 			goto again;
 		}
@@ -706,8 +743,6 @@ static inline int hrtimer_is_hres_enabled(void)
 	return hrtimer_hres_enabled;
 }
 
-static void retrigger_next_event(void *arg);
-
 /*
  * Switch to high resolution mode
  */
@@ -1195,6 +1230,7 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
 				    u64 delta_ns, const enum hrtimer_mode mode,
 				    struct hrtimer_clock_base *base)
 {
+	struct hrtimer_cpu_base *this_cpu_base = this_cpu_ptr(&hrtimer_bases);
 	struct hrtimer_clock_base *new_base;
 	bool force_local, first;
 
@@ -1206,9 +1242,15 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
 	 * and enforce reprogramming after it is queued no matter whether
 	 * it is the new first expiring timer again or not.
 	 */
-	force_local = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
+	force_local = base->cpu_base == this_cpu_base;
 	force_local &= base->cpu_base->next_timer == timer;
 
+	/*
+	 * Don't force local queuing if this enqueue happens on a unplugged
+	 * CPU after hrtimer_cpu_dying() has been invoked.
+	 */
+	force_local &= this_cpu_base->online;
+
 	/*
 	 * Remove an active timer from the queue. In case it is not queued
 	 * on the current CPU, make sure that remove_hrtimer() updates the
@@ -1238,8 +1280,27 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
 	}
 
 	first = enqueue_hrtimer(timer, new_base, mode);
-	if (!force_local)
-		return first;
+	if (!force_local) {
+		/*
+		 * If the current CPU base is online, then the timer is
+		 * never queued on a remote CPU if it would be the first
+		 * expiring timer there.
+		 */
+		if (hrtimer_base_is_online(this_cpu_base))
+			return first;
+
+		/*
+		 * Timer was enqueued remote because the current base is
+		 * already offline. If the timer is the first to expire,
+		 * kick the remote CPU to reprogram the clock event.
+		 */
+		if (first) {
+			struct hrtimer_cpu_base *new_cpu_base = new_base->cpu_base;
+
+			smp_call_function_single_async(new_cpu_base->cpu, &new_cpu_base->csd);
+		}
+		return 0;
+	}
 
 	/*
 	 * Timer was forced to stay on the current CPU to avoid

From 8f62ca9c338aae4f73e9ce0221c3d4668359ddd8 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 23 Jan 2025 14:22:02 +0100
Subject: [PATCH 173/368] ACPI: x86: Add skip i2c clients quirk for Vexia EDU
 ATLA 10 tablet 5V

The Vexia EDU ATLA 10 tablet comes in 2 different versions with
significantly different mainboards. The only outward difference is that
the charging barrel on one is marked 5V and the other is marked 9V.

Both ship with Android 4.4 as factory OS and have the usual broken DSDT
issues for x86 Android tablets.

Add a quirk to skip ACPI I2C client enumeration for the 5V version to
complement the existing quirk for the 9V version.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://patch.msgid.link/20250123132202.18209-1-hdegoede@redhat.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/x86/utils.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
index cb45ef5240dab..068c1612660bc 100644
--- a/drivers/acpi/x86/utils.c
+++ b/drivers/acpi/x86/utils.c
@@ -407,6 +407,19 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
 		.driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
 					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
 	},
+	{
+		/* Vexia Edu Atla 10 tablet 5V version */
+		.matches = {
+			/* Having all 3 of these not set is somewhat unique */
+			DMI_MATCH(DMI_SYS_VENDOR, "To be filled by O.E.M."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "To be filled by O.E.M."),
+			DMI_MATCH(DMI_BOARD_NAME, "To be filled by O.E.M."),
+			/* Above strings are too generic, also match on BIOS date */
+			DMI_MATCH(DMI_BIOS_DATE, "05/14/2015"),
+		},
+		.driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
+					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
+	},
 	{
 		/* Vexia Edu Atla 10 tablet 9V version */
 		.matches = {

From 43855ac61483cb914f060851535ea753c094b3e0 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 22 Jan 2025 11:36:16 +0530
Subject: [PATCH 174/368] cpufreq: s3c64xx: Fix compilation warning

The driver generates following warning when regulator support isn't
enabled in the kernel. Fix it.

   drivers/cpufreq/s3c64xx-cpufreq.c: In function 's3c64xx_cpufreq_set_target':
>> drivers/cpufreq/s3c64xx-cpufreq.c:55:22: warning: variable 'old_freq' set but not used [-Wunused-but-set-variable]
      55 |         unsigned int old_freq, new_freq;
         |                      ^~~~~~~~
>> drivers/cpufreq/s3c64xx-cpufreq.c:54:30: warning: variable 'dvfs' set but not used [-Wunused-but-set-variable]
      54 |         struct s3c64xx_dvfs *dvfs;
         |                              ^~~~

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202501191803.CtfT7b2o-lkp@intel.com/
Cc: 5.4+ <stable@vger.kernel.org> # v5.4+
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Link: https://patch.msgid.link/236b227e929e5adc04d1e9e7af6845a46c8e9432.1737525916.git.viresh.kumar@linaro.org
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/s3c64xx-cpufreq.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/s3c64xx-cpufreq.c b/drivers/cpufreq/s3c64xx-cpufreq.c
index c6bdfc308e990..9cef715280762 100644
--- a/drivers/cpufreq/s3c64xx-cpufreq.c
+++ b/drivers/cpufreq/s3c64xx-cpufreq.c
@@ -24,6 +24,7 @@ struct s3c64xx_dvfs {
 	unsigned int vddarm_max;
 };
 
+#ifdef CONFIG_REGULATOR
 static struct s3c64xx_dvfs s3c64xx_dvfs_table[] = {
 	[0] = { 1000000, 1150000 },
 	[1] = { 1050000, 1150000 },
@@ -31,6 +32,7 @@ static struct s3c64xx_dvfs s3c64xx_dvfs_table[] = {
 	[3] = { 1200000, 1350000 },
 	[4] = { 1300000, 1350000 },
 };
+#endif
 
 static struct cpufreq_frequency_table s3c64xx_freq_table[] = {
 	{ 0, 0,  66000 },
@@ -51,15 +53,16 @@ static struct cpufreq_frequency_table s3c64xx_freq_table[] = {
 static int s3c64xx_cpufreq_set_target(struct cpufreq_policy *policy,
 				      unsigned int index)
 {
-	struct s3c64xx_dvfs *dvfs;
-	unsigned int old_freq, new_freq;
+	unsigned int new_freq = s3c64xx_freq_table[index].frequency;
 	int ret;
 
+#ifdef CONFIG_REGULATOR
+	struct s3c64xx_dvfs *dvfs;
+	unsigned int old_freq;
+
 	old_freq = clk_get_rate(policy->clk) / 1000;
-	new_freq = s3c64xx_freq_table[index].frequency;
 	dvfs = &s3c64xx_dvfs_table[s3c64xx_freq_table[index].driver_data];
 
-#ifdef CONFIG_REGULATOR
 	if (vddarm && new_freq > old_freq) {
 		ret = regulator_set_voltage(vddarm,
 					    dvfs->vddarm_min,

From 1608f0230510489d74a2e24e47054233b7e4678a Mon Sep 17 00:00:00 2001
From: Lifeng Zheng <zhenglifeng1@huawei.com>
Date: Fri, 17 Jan 2025 18:14:54 +0800
Subject: [PATCH 175/368] cpufreq: Fix re-boost issue after hotplugging a CPU

It turns out that CPUX will stay on the base frequency after performing
these operations:

 1. boost all CPUs: echo 1 > /sys/devices/system/cpu/cpufreq/boost

 2. offline one CPU: echo 0 > /sys/devices/system/cpu/cpuX/online

 3. deboost all CPUs: echo 0 > /sys/devices/system/cpu/cpufreq/boost

 4. online CPUX: echo 1 > /sys/devices/system/cpu/cpuX/online

 5. boost all CPUs again: echo 1 > /sys/devices/system/cpu/cpufreq/boost

This is because max_freq_req of the policy is not updated during the
online process, and the value of max_freq_req before the last offline is
retained.

When the CPU is boosted again, freq_qos_update_request() will do nothing
because the old value is the same as the new one. This causes the CPU to
stay at the base frequency. Updating max_freq_req  in cpufreq_online()
will solve this problem.

Signed-off-by: Lifeng Zheng <zhenglifeng1@huawei.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Link: https://patch.msgid.link/20250117101457.1530653-2-zhenglifeng1@huawei.com
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 1076e37a18ad0..4d54e30f94ee1 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1476,6 +1476,10 @@ static int cpufreq_online(unsigned int cpu)
 
 		blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
 				CPUFREQ_CREATE_POLICY, policy);
+	} else {
+		ret = freq_qos_update_request(policy->max_freq_req, policy->max);
+		if (ret < 0)
+			goto out_destroy_policy;
 	}
 
 	if (cpufreq_driver->get && has_target()) {

From dd016f379ebc2d43a9405742d1a6066577509bd7 Mon Sep 17 00:00:00 2001
From: Lifeng Zheng <zhenglifeng1@huawei.com>
Date: Fri, 17 Jan 2025 18:14:55 +0800
Subject: [PATCH 176/368] cpufreq: Introduce a more generic way to set default
 per-policy boost flag

In cpufreq_online() of cpufreq.c, the per-policy boost flag is already
set to mirror the cpufreq_driver boost during init but using freq_table
to judge if the policy has boost frequency. There are two drawbacks to
this approach:

 1. It doesn't work for the cpufreq drivers that do not use a frequency
    table. For now, acpi-cpufreq and amd-pstate have to enable boost in
    policy initialization. And cppc_cpufreq never set policy to boost
    when going online no matter what the cpufreq_driver boost flag is.

 2. If the CPU goes offline when cpufreq_driver boost is enabled and
    then goes online when cpufreq_driver boost is disabled, the
    per-policy boost flag will incorrectly remain true.

Running set_boost at the end of the online process is a more generic way
for all cpufreq drivers.

Signed-off-by: Lifeng Zheng <zhenglifeng1@huawei.com>
Link: https://patch.msgid.link/20250117101457.1530653-3-zhenglifeng1@huawei.com
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
[ rjw: Changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 4d54e30f94ee1..e0048856eceee 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1410,10 +1410,6 @@ static int cpufreq_online(unsigned int cpu)
 			goto out_free_policy;
 		}
 
-		/* Let the per-policy boost flag mirror the cpufreq_driver boost during init */
-		if (cpufreq_boost_enabled() && policy_has_boost_freq(policy))
-			policy->boost_enabled = true;
-
 		/*
 		 * The initialization has succeeded and the policy is online.
 		 * If there is a problem with its frequency table, take it
@@ -1574,6 +1570,18 @@ static int cpufreq_online(unsigned int cpu)
 	if (new_policy && cpufreq_thermal_control_enabled(cpufreq_driver))
 		policy->cdev = of_cpufreq_cooling_register(policy);
 
+	/* Let the per-policy boost flag mirror the cpufreq_driver boost during init */
+	if (policy->boost_enabled != cpufreq_boost_enabled()) {
+		policy->boost_enabled = cpufreq_boost_enabled();
+		ret = cpufreq_driver->set_boost(policy, policy->boost_enabled);
+		if (ret) {
+			/* If the set_boost fails, the online operation is not affected */
+			pr_info("%s: CPU%d: Cannot %s BOOST\n", __func__, policy->cpu,
+				policy->boost_enabled ? "enable" : "disable");
+			policy->boost_enabled = !policy->boost_enabled;
+		}
+	}
+
 	pr_debug("initialization complete\n");
 
 	return 0;

From 03d8b4e76266e11662c5e544854b737843173e2d Mon Sep 17 00:00:00 2001
From: Lifeng Zheng <zhenglifeng1@huawei.com>
Date: Fri, 17 Jan 2025 18:14:56 +0800
Subject: [PATCH 177/368] cpufreq: CPPC: Fix wrong max_freq in policy
 initialization

In policy initialization, policy->max and policy->cpuinfo.max_freq are
always set to the value calculated from caps->nominal_perf.

This will cause the frequency stay on base frequency even if the policy
is already boosted when a CPU is going online.

Fix this by using policy->boost_enabled to determine which value should
be set.

Signed-off-by: Lifeng Zheng <zhenglifeng1@huawei.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Link: https://patch.msgid.link/20250117101457.1530653-4-zhenglifeng1@huawei.com
[ rjw: Changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cppc_cpufreq.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 2486a6c5256ae..8f512448382f4 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -611,7 +611,8 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	 * Section 8.4.7.1.1.5 of ACPI 6.1 spec)
 	 */
 	policy->min = cppc_perf_to_khz(caps, caps->lowest_nonlinear_perf);
-	policy->max = cppc_perf_to_khz(caps, caps->nominal_perf);
+	policy->max = cppc_perf_to_khz(caps, policy->boost_enabled ?
+						caps->highest_perf : caps->nominal_perf);
 
 	/*
 	 * Set cpuinfo.min_freq to Lowest to make the full range of performance
@@ -619,7 +620,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	 * nonlinear perf
 	 */
 	policy->cpuinfo.min_freq = cppc_perf_to_khz(caps, caps->lowest_perf);
-	policy->cpuinfo.max_freq = cppc_perf_to_khz(caps, caps->nominal_perf);
+	policy->cpuinfo.max_freq = policy->max;
 
 	policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu);
 	policy->shared_type = cpu_data->shared_type;

From 2b16c631832df6cf8782fb1fdc7df8a4f03f4f16 Mon Sep 17 00:00:00 2001
From: Lifeng Zheng <zhenglifeng1@huawei.com>
Date: Fri, 17 Jan 2025 18:14:57 +0800
Subject: [PATCH 178/368] cpufreq: ACPI: Remove set_boost in
 acpi_cpufreq_cpu_init()

At the end of cpufreq_online() in cpufreq.c, set_boost is executed and
the per-policy boost flag is set to mirror the cpufreq_driver boost, so
it is not necessary to run set_boost in acpi_cpufreq_cpu_init().

Signed-off-by: Lifeng Zheng <zhenglifeng1@huawei.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Link: https://patch.msgid.link/20250117101457.1530653-5-zhenglifeng1@huawei.com
[ rjw: Changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/acpi-cpufreq.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 302df42d68875..463b69a2dff52 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -909,11 +909,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency)
 		pr_warn(FW_WARN "P-state 0 is not max freq\n");
 
-	if (acpi_cpufreq_driver.set_boost) {
-		set_boost(policy, acpi_cpufreq_driver.boost_enabled);
-		policy->boost_enabled = acpi_cpufreq_driver.boost_enabled;
-	}
-
 	return result;
 
 err_unreg:

From 93940fbdc46843cea58708bbe8dd225bd0f32e67 Mon Sep 17 00:00:00 2001
From: Christian Loehle <christian.loehle@arm.com>
Date: Mon, 20 Jan 2025 10:09:46 +0000
Subject: [PATCH 179/368] cpufreq/schedutil: Only bind threads if needed

Remove the unconditional binding of sugov kthreads to the affected CPUs
if the cpufreq driver indicates that updates can happen from any CPU.
This allows userspace to set affinities to either save power (waking up
bigger CPUs on HMP can be expensive) or increasing performance (by
letting the utilized CPUs run without preemption of the sugov kthread).

Signed-off-by: Christian Loehle <christian.loehle@arm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Vincent Guittot <vincent.guittot@linaro.org>
Acked-by: Rafael J. Wysocki <rafael@kernel.org>
Acked-by: Juri Lelli <juri.lelli@redhat.com>
Link: https://patch.msgid.link/5a8deed4-7764-4729-a9d4-9520c25fa7e8@arm.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/sched/cpufreq_schedutil.c | 6 +++++-
 kernel/sched/syscalls.c          | 7 +++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index a2a29e3fffcaa..1a19d69b91ed3 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -666,7 +666,11 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
 	}
 
 	sg_policy->thread = thread;
-	kthread_bind_mask(thread, policy->related_cpus);
+	if (policy->dvfs_possible_from_any_cpu)
+		set_cpus_allowed_ptr(thread, policy->related_cpus);
+	else
+		kthread_bind_mask(thread, policy->related_cpus);
+
 	init_irq_work(&sg_policy->irq_work, sugov_irq_work);
 	mutex_init(&sg_policy->work_lock);
 
diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
index 149e2c8036d36..456d339be98fb 100644
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -1129,6 +1129,13 @@ int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask)
 	if (!task_has_dl_policy(p) || !dl_bandwidth_enabled())
 		return 0;
 
+	/*
+	 * The special/sugov task isn't part of regular bandwidth/admission
+	 * control so let userspace change affinities.
+	 */
+	if (dl_entity_is_special(&p->dl))
+		return 0;
+
 	/*
 	 * Since bandwidth control happens on root_domain basis,
 	 * if admission test is enabled, we only admit -deadline

From e20a70c572539a486dbd91b225fa6a194a5e2122 Mon Sep 17 00:00:00 2001
From: Wentao Liang <vulab@iscas.ac.cn>
Date: Sun, 19 Jan 2025 22:32:05 +0800
Subject: [PATCH 180/368] PM: hibernate: Add error handling for
 syscore_suspend()

In hibernation_platform_enter(), the code did not check the
return value of syscore_suspend(), potentially leading to a
situation where syscore_resume() would be called even if
syscore_suspend() failed. This could cause unpredictable
behavior or system instability.

Modify the code sequence in question to properly handle errors returned
by syscore_suspend(). If an error occurs in the suspend path, the code
now jumps to label 'Enable_irqs' skipping the syscore_resume() call and
only enabling interrupts after setting the system state to SYSTEM_RUNNING.

Fixes: 40dc166cb5dd ("PM / Core: Introduce struct syscore_ops for core subsystems PM")
Signed-off-by: Wentao Liang <vulab@iscas.ac.cn>
Link: https://patch.msgid.link/20250119143205.2103-1-vulab@iscas.ac.cn
[ rjw: Changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/power/hibernate.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 1f87aa01ba44f..10a01af63a807 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -608,7 +608,11 @@ int hibernation_platform_enter(void)
 
 	local_irq_disable();
 	system_state = SYSTEM_SUSPEND;
-	syscore_suspend();
+
+	error = syscore_suspend();
+	if (error)
+		goto Enable_irqs;
+
 	if (pm_wakeup_pending()) {
 		error = -EAGAIN;
 		goto Power_up;
@@ -620,6 +624,7 @@ int hibernation_platform_enter(void)
 
  Power_up:
 	syscore_resume();
+ Enable_irqs:
 	system_state = SYSTEM_RUNNING;
 	local_irq_enable();
 

From 4891cd3eba62ac611a7929948cf5588a1abed909 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 16 Jan 2025 17:43:29 +0200
Subject: [PATCH 181/368] PM: Revert "Add EXPORT macros for exporting PM
 functions"

Revert commit 41a337b40e98 ("Add EXPORT macros for exporting PM
functions") because the macros added by it are still unused almost
two years after they had been introduced.

Reported-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://patch.msgid.link/20250116154354.149297-1-andriy.shevchenko@linux.intel.com
[ rjw: New changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/include/linux/pm.h b/include/linux/pm.h
index e7f0260f15ad5..0627a795892be 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -384,12 +384,8 @@ const struct dev_pm_ops name = { \
 
 #ifdef CONFIG_PM
 #define _EXPORT_DEV_PM_OPS(name, license, ns)		_EXPORT_PM_OPS(name, license, ns)
-#define EXPORT_PM_FN_GPL(name)				EXPORT_SYMBOL_GPL(name)
-#define EXPORT_PM_FN_NS_GPL(name, ns)			EXPORT_SYMBOL_NS_GPL(name, "ns")
 #else
 #define _EXPORT_DEV_PM_OPS(name, license, ns)		_DISCARD_PM_OPS(name, license, ns)
-#define EXPORT_PM_FN_GPL(name)
-#define EXPORT_PM_FN_NS_GPL(name, ns)
 #endif
 
 #ifdef CONFIG_PM_SLEEP

From a216542027b892e6651c1b4e076012140d04afaf Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Fri, 10 Jan 2025 15:22:24 +0000
Subject: [PATCH 182/368] btrfs: fix lockdep splat while merging a relocation
 root

When COWing a relocation tree path, at relocation.c:replace_path(), we
can trigger a lockdep splat while we are in the btrfs_search_slot() call
against the relocation root. This happens in that callchain at
ctree.c:read_block_for_search() when we happen to find a child extent
buffer already loaded through the fs tree with a lockdep class set to
the fs tree. So when we attempt to lock that extent buffer through a
relocation tree we have to reset the lockdep class to the class for a
relocation tree, since a relocation tree has extent buffers that used
to belong to a fs tree and may currently be already loaded (we swap
extent buffers between the two trees at the end of replace_path()).

However we are missing calls to btrfs_maybe_reset_lockdep_class() to reset
the lockdep class at ctree.c:read_block_for_search() before we read lock
an extent buffer, just like we did for btrfs_search_slot() in commit
b40130b23ca4 ("btrfs: fix lockdep splat with reloc root extent buffers").

So add the missing btrfs_maybe_reset_lockdep_class() calls before the
attempts to read lock an extent buffer at ctree.c:read_block_for_search().

The lockdep splat was reported by syzbot and it looks like this:

   ======================================================
   WARNING: possible circular locking dependency detected
   6.13.0-rc5-syzkaller-00163-gab75170520d4 #0 Not tainted
   ------------------------------------------------------
   syz.0.0/5335 is trying to acquire lock:
   ffff8880545dbc38 (btrfs-tree-01){++++}-{4:4}, at: btrfs_tree_read_lock_nested+0x2f/0x250 fs/btrfs/locking.c:146

   but task is already holding lock:
   ffff8880545dba58 (btrfs-treloc-02/1){+.+.}-{4:4}, at: btrfs_tree_lock_nested+0x2f/0x250 fs/btrfs/locking.c:189

   which lock already depends on the new lock.

   the existing dependency chain (in reverse order) is:

   -> #2 (btrfs-treloc-02/1){+.+.}-{4:4}:
          reacquire_held_locks+0x3eb/0x690 kernel/locking/lockdep.c:5374
          __lock_release kernel/locking/lockdep.c:5563 [inline]
          lock_release+0x396/0xa30 kernel/locking/lockdep.c:5870
          up_write+0x79/0x590 kernel/locking/rwsem.c:1629
          btrfs_force_cow_block+0x14b3/0x1fd0 fs/btrfs/ctree.c:660
          btrfs_cow_block+0x371/0x830 fs/btrfs/ctree.c:755
          btrfs_search_slot+0xc01/0x3180 fs/btrfs/ctree.c:2153
          replace_path+0x1243/0x2740 fs/btrfs/relocation.c:1224
          merge_reloc_root+0xc46/0x1ad0 fs/btrfs/relocation.c:1692
          merge_reloc_roots+0x3b3/0x980 fs/btrfs/relocation.c:1942
          relocate_block_group+0xb0a/0xd40 fs/btrfs/relocation.c:3754
          btrfs_relocate_block_group+0x77d/0xd90 fs/btrfs/relocation.c:4087
          btrfs_relocate_chunk+0x12c/0x3b0 fs/btrfs/volumes.c:3494
          __btrfs_balance+0x1b0f/0x26b0 fs/btrfs/volumes.c:4278
          btrfs_balance+0xbdc/0x10c0 fs/btrfs/volumes.c:4655
          btrfs_ioctl_balance+0x493/0x7c0 fs/btrfs/ioctl.c:3670
          vfs_ioctl fs/ioctl.c:51 [inline]
          __do_sys_ioctl fs/ioctl.c:906 [inline]
          __se_sys_ioctl+0xf5/0x170 fs/ioctl.c:892
          do_syscall_x64 arch/x86/entry/common.c:52 [inline]
          do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83
          entry_SYSCALL_64_after_hwframe+0x77/0x7f

   -> #1 (btrfs-tree-01/1){+.+.}-{4:4}:
          lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5849
          down_write_nested+0xa2/0x220 kernel/locking/rwsem.c:1693
          btrfs_tree_lock_nested+0x2f/0x250 fs/btrfs/locking.c:189
          btrfs_init_new_buffer fs/btrfs/extent-tree.c:5052 [inline]
          btrfs_alloc_tree_block+0x41c/0x1440 fs/btrfs/extent-tree.c:5132
          btrfs_force_cow_block+0x526/0x1fd0 fs/btrfs/ctree.c:573
          btrfs_cow_block+0x371/0x830 fs/btrfs/ctree.c:755
          btrfs_search_slot+0xc01/0x3180 fs/btrfs/ctree.c:2153
          btrfs_insert_empty_items+0x9c/0x1a0 fs/btrfs/ctree.c:4351
          btrfs_insert_empty_item fs/btrfs/ctree.h:688 [inline]
          btrfs_insert_inode_ref+0x2bb/0xf80 fs/btrfs/inode-item.c:330
          btrfs_rename_exchange fs/btrfs/inode.c:7990 [inline]
          btrfs_rename2+0xcb7/0x2b90 fs/btrfs/inode.c:8374
          vfs_rename+0xbdb/0xf00 fs/namei.c:5067
          do_renameat2+0xd94/0x13f0 fs/namei.c:5224
          __do_sys_renameat2 fs/namei.c:5258 [inline]
          __se_sys_renameat2 fs/namei.c:5255 [inline]
          __x64_sys_renameat2+0xce/0xe0 fs/namei.c:5255
          do_syscall_x64 arch/x86/entry/common.c:52 [inline]
          do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83
          entry_SYSCALL_64_after_hwframe+0x77/0x7f

   -> #0 (btrfs-tree-01){++++}-{4:4}:
          check_prev_add kernel/locking/lockdep.c:3161 [inline]
          check_prevs_add kernel/locking/lockdep.c:3280 [inline]
          validate_chain+0x18ef/0x5920 kernel/locking/lockdep.c:3904
          __lock_acquire+0x1397/0x2100 kernel/locking/lockdep.c:5226
          lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5849
          down_read_nested+0xb5/0xa50 kernel/locking/rwsem.c:1649
          btrfs_tree_read_lock_nested+0x2f/0x250 fs/btrfs/locking.c:146
          btrfs_tree_read_lock fs/btrfs/locking.h:188 [inline]
          read_block_for_search+0x718/0xbb0 fs/btrfs/ctree.c:1610
          btrfs_search_slot+0x1274/0x3180 fs/btrfs/ctree.c:2237
          replace_path+0x1243/0x2740 fs/btrfs/relocation.c:1224
          merge_reloc_root+0xc46/0x1ad0 fs/btrfs/relocation.c:1692
          merge_reloc_roots+0x3b3/0x980 fs/btrfs/relocation.c:1942
          relocate_block_group+0xb0a/0xd40 fs/btrfs/relocation.c:3754
          btrfs_relocate_block_group+0x77d/0xd90 fs/btrfs/relocation.c:4087
          btrfs_relocate_chunk+0x12c/0x3b0 fs/btrfs/volumes.c:3494
          __btrfs_balance+0x1b0f/0x26b0 fs/btrfs/volumes.c:4278
          btrfs_balance+0xbdc/0x10c0 fs/btrfs/volumes.c:4655
          btrfs_ioctl_balance+0x493/0x7c0 fs/btrfs/ioctl.c:3670
          vfs_ioctl fs/ioctl.c:51 [inline]
          __do_sys_ioctl fs/ioctl.c:906 [inline]
          __se_sys_ioctl+0xf5/0x170 fs/ioctl.c:892
          do_syscall_x64 arch/x86/entry/common.c:52 [inline]
          do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83
          entry_SYSCALL_64_after_hwframe+0x77/0x7f

   other info that might help us debug this:

   Chain exists of:
     btrfs-tree-01 --> btrfs-tree-01/1 --> btrfs-treloc-02/1

    Possible unsafe locking scenario:

          CPU0                    CPU1
          ----                    ----
     lock(btrfs-treloc-02/1);
                                  lock(btrfs-tree-01/1);
                                  lock(btrfs-treloc-02/1);
     rlock(btrfs-tree-01);

    *** DEADLOCK ***

   8 locks held by syz.0.0/5335:
    #0: ffff88801e3ae420 (sb_writers#13){.+.+}-{0:0}, at: mnt_want_write_file+0x5e/0x200 fs/namespace.c:559
    #1: ffff888052c760d0 (&fs_info->reclaim_bgs_lock){+.+.}-{4:4}, at: __btrfs_balance+0x4c2/0x26b0 fs/btrfs/volumes.c:4183
    #2: ffff888052c74850 (&fs_info->cleaner_mutex){+.+.}-{4:4}, at: btrfs_relocate_block_group+0x775/0xd90 fs/btrfs/relocation.c:4086
    #3: ffff88801e3ae610 (sb_internal#2){.+.+}-{0:0}, at: merge_reloc_root+0xf11/0x1ad0 fs/btrfs/relocation.c:1659
    #4: ffff888052c76470 (btrfs_trans_num_writers){++++}-{0:0}, at: join_transaction+0x405/0xda0 fs/btrfs/transaction.c:288
    #5: ffff888052c76498 (btrfs_trans_num_extwriters){++++}-{0:0}, at: join_transaction+0x405/0xda0 fs/btrfs/transaction.c:288
    #6: ffff8880545db878 (btrfs-tree-01/1){+.+.}-{4:4}, at: btrfs_tree_lock_nested+0x2f/0x250 fs/btrfs/locking.c:189
    #7: ffff8880545dba58 (btrfs-treloc-02/1){+.+.}-{4:4}, at: btrfs_tree_lock_nested+0x2f/0x250 fs/btrfs/locking.c:189

   stack backtrace:
   CPU: 0 UID: 0 PID: 5335 Comm: syz.0.0 Not tainted 6.13.0-rc5-syzkaller-00163-gab75170520d4 #0
   Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014
   Call Trace:
    <TASK>
    __dump_stack lib/dump_stack.c:94 [inline]
    dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120
    print_circular_bug+0x13a/0x1b0 kernel/locking/lockdep.c:2074
    check_noncircular+0x36a/0x4a0 kernel/locking/lockdep.c:2206
    check_prev_add kernel/locking/lockdep.c:3161 [inline]
    check_prevs_add kernel/locking/lockdep.c:3280 [inline]
    validate_chain+0x18ef/0x5920 kernel/locking/lockdep.c:3904
    __lock_acquire+0x1397/0x2100 kernel/locking/lockdep.c:5226
    lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5849
    down_read_nested+0xb5/0xa50 kernel/locking/rwsem.c:1649
    btrfs_tree_read_lock_nested+0x2f/0x250 fs/btrfs/locking.c:146
    btrfs_tree_read_lock fs/btrfs/locking.h:188 [inline]
    read_block_for_search+0x718/0xbb0 fs/btrfs/ctree.c:1610
    btrfs_search_slot+0x1274/0x3180 fs/btrfs/ctree.c:2237
    replace_path+0x1243/0x2740 fs/btrfs/relocation.c:1224
    merge_reloc_root+0xc46/0x1ad0 fs/btrfs/relocation.c:1692
    merge_reloc_roots+0x3b3/0x980 fs/btrfs/relocation.c:1942
    relocate_block_group+0xb0a/0xd40 fs/btrfs/relocation.c:3754
    btrfs_relocate_block_group+0x77d/0xd90 fs/btrfs/relocation.c:4087
    btrfs_relocate_chunk+0x12c/0x3b0 fs/btrfs/volumes.c:3494
    __btrfs_balance+0x1b0f/0x26b0 fs/btrfs/volumes.c:4278
    btrfs_balance+0xbdc/0x10c0 fs/btrfs/volumes.c:4655
    btrfs_ioctl_balance+0x493/0x7c0 fs/btrfs/ioctl.c:3670
    vfs_ioctl fs/ioctl.c:51 [inline]
    __do_sys_ioctl fs/ioctl.c:906 [inline]
    __se_sys_ioctl+0xf5/0x170 fs/ioctl.c:892
    do_syscall_x64 arch/x86/entry/common.c:52 [inline]
    do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83
    entry_SYSCALL_64_after_hwframe+0x77/0x7f
   RIP: 0033:0x7f1ac6985d29
   Code: ff ff c3 (...)
   RSP: 002b:00007f1ac63fe038 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
   RAX: ffffffffffffffda RBX: 00007f1ac6b76160 RCX: 00007f1ac6985d29
   RDX: 0000000020000180 RSI: 00000000c4009420 RDI: 0000000000000007
   RBP: 00007f1ac6a01b08 R08: 0000000000000000 R09: 0000000000000000
   R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
   R13: 0000000000000001 R14: 00007f1ac6b76160 R15: 00007fffda145a88
    </TASK>

Reported-by: syzbot+63913e558c084f7f8fdc@syzkaller.appspotmail.com
Link: https://lore.kernel.org/linux-btrfs/677b3014.050a0220.3b53b0.0064.GAE@google.com/
Fixes: 99785998ed1c ("btrfs: reduce lock contention when eb cache miss for btree search")
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 92071ca0655f0..3dc5a35dd19b3 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1496,6 +1496,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
 
 		if (!p->skip_locking) {
 			btrfs_unlock_up_safe(p, parent_level + 1);
+			btrfs_maybe_reset_lockdep_class(root, tmp);
 			tmp_locked = true;
 			btrfs_tree_read_lock(tmp);
 			btrfs_release_path(p);
@@ -1539,6 +1540,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
 
 	if (!p->skip_locking) {
 		ASSERT(ret == -EAGAIN);
+		btrfs_maybe_reset_lockdep_class(root, tmp);
 		tmp_locked = true;
 		btrfs_tree_read_lock(tmp);
 		btrfs_release_path(p);

From 0d85f5c2dd91df6b5da454406756f463ba923b69 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 13 Jan 2025 15:01:08 +0000
Subject: [PATCH 183/368] btrfs: fix assertion failure when splitting ordered
 extent after transaction abort

If while we are doing a direct IO write a transaction abort happens, we
mark all existing ordered extents with the BTRFS_ORDERED_IOERR flag (done
at btrfs_destroy_ordered_extents()), and then after that if we enter
btrfs_split_ordered_extent() and the ordered extent has bytes left
(meaning we have a bio that doesn't cover the whole ordered extent, see
details at btrfs_extract_ordered_extent()), we will fail on the following
assertion at btrfs_split_ordered_extent():

   ASSERT(!(flags & ~BTRFS_ORDERED_TYPE_FLAGS));

because the BTRFS_ORDERED_IOERR flag is set and the definition of
BTRFS_ORDERED_TYPE_FLAGS is just the union of all flags that identify the
type of write (regular, nocow, prealloc, compressed, direct IO, encoded).

Fix this by returning an error from btrfs_extract_ordered_extent() if we
find the BTRFS_ORDERED_IOERR flag in the ordered extent. The error will
be the error that resulted in the transaction abort or -EIO if no
transaction abort happened.

This was recently reported by syzbot with the following trace:

   FAULT_INJECTION: forcing a failure.
   name failslab, interval 1, probability 0, space 0, times 1
   CPU: 0 UID: 0 PID: 5321 Comm: syz.0.0 Not tainted 6.13.0-rc5-syzkaller #0
   Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014
   Call Trace:
    <TASK>
    __dump_stack lib/dump_stack.c:94 [inline]
    dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120
    fail_dump lib/fault-inject.c:53 [inline]
    should_fail_ex+0x3b0/0x4e0 lib/fault-inject.c:154
    should_failslab+0xac/0x100 mm/failslab.c:46
    slab_pre_alloc_hook mm/slub.c:4072 [inline]
    slab_alloc_node mm/slub.c:4148 [inline]
    __do_kmalloc_node mm/slub.c:4297 [inline]
    __kmalloc_noprof+0xdd/0x4c0 mm/slub.c:4310
    kmalloc_noprof include/linux/slab.h:905 [inline]
    kzalloc_noprof include/linux/slab.h:1037 [inline]
    btrfs_chunk_alloc_add_chunk_item+0x244/0x1100 fs/btrfs/volumes.c:5742
    reserve_chunk_space+0x1ca/0x2c0 fs/btrfs/block-group.c:4292
    check_system_chunk fs/btrfs/block-group.c:4319 [inline]
    do_chunk_alloc fs/btrfs/block-group.c:3891 [inline]
    btrfs_chunk_alloc+0x77b/0xf80 fs/btrfs/block-group.c:4187
    find_free_extent_update_loop fs/btrfs/extent-tree.c:4166 [inline]
    find_free_extent+0x42d1/0x5810 fs/btrfs/extent-tree.c:4579
    btrfs_reserve_extent+0x422/0x810 fs/btrfs/extent-tree.c:4672
    btrfs_new_extent_direct fs/btrfs/direct-io.c:186 [inline]
    btrfs_get_blocks_direct_write+0x706/0xfa0 fs/btrfs/direct-io.c:321
    btrfs_dio_iomap_begin+0xbb7/0x1180 fs/btrfs/direct-io.c:525
    iomap_iter+0x697/0xf60 fs/iomap/iter.c:90
    __iomap_dio_rw+0xeb9/0x25b0 fs/iomap/direct-io.c:702
    btrfs_dio_write fs/btrfs/direct-io.c:775 [inline]
    btrfs_direct_write+0x610/0xa30 fs/btrfs/direct-io.c:880
    btrfs_do_write_iter+0x2a0/0x760 fs/btrfs/file.c:1397
    do_iter_readv_writev+0x600/0x880
    vfs_writev+0x376/0xba0 fs/read_write.c:1050
    do_pwritev fs/read_write.c:1146 [inline]
    __do_sys_pwritev2 fs/read_write.c:1204 [inline]
    __se_sys_pwritev2+0x196/0x2b0 fs/read_write.c:1195
    do_syscall_x64 arch/x86/entry/common.c:52 [inline]
    do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83
    entry_SYSCALL_64_after_hwframe+0x77/0x7f
   RIP: 0033:0x7f1281f85d29
   RSP: 002b:00007f12819fe038 EFLAGS: 00000246 ORIG_RAX: 0000000000000148
   RAX: ffffffffffffffda RBX: 00007f1282176080 RCX: 00007f1281f85d29
   RDX: 0000000000000001 RSI: 0000000020000240 RDI: 0000000000000005
   RBP: 00007f12819fe090 R08: 0000000000000000 R09: 0000000000000003
   R10: 0000000000007000 R11: 0000000000000246 R12: 0000000000000002
   R13: 0000000000000000 R14: 00007f1282176080 R15: 00007ffcb9e23328
    </TASK>
   BTRFS error (device loop0 state A): Transaction aborted (error -12)
   BTRFS: error (device loop0 state A) in btrfs_chunk_alloc_add_chunk_item:5745: errno=-12 Out of memory
   BTRFS info (device loop0 state EA): forced readonly
   assertion failed: !(flags & ~BTRFS_ORDERED_TYPE_FLAGS), in fs/btrfs/ordered-data.c:1234
   ------------[ cut here ]------------
   kernel BUG at fs/btrfs/ordered-data.c:1234!
   Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN NOPTI
   CPU: 0 UID: 0 PID: 5321 Comm: syz.0.0 Not tainted 6.13.0-rc5-syzkaller #0
   Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014
   RIP: 0010:btrfs_split_ordered_extent+0xd8d/0xe20 fs/btrfs/ordered-data.c:1234
   RSP: 0018:ffffc9000d1df2b8 EFLAGS: 00010246
   RAX: 0000000000000057 RBX: 000000000006a000 RCX: 9ce21886c4195300
   RDX: 0000000000000000 RSI: 0000000080000000 RDI: 0000000000000000
   RBP: 0000000000000091 R08: ffffffff817f0a3c R09: 1ffff92001a3bdf4
   R10: dffffc0000000000 R11: fffff52001a3bdf5 R12: 1ffff1100a45f401
   R13: ffff8880522fa018 R14: dffffc0000000000 R15: 000000000006a000
   FS:  00007f12819fe6c0(0000) GS:ffff88801fc00000(0000) knlGS:0000000000000000
   CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
   CR2: 0000557750bd7da8 CR3: 00000000400ea000 CR4: 0000000000352ef0
   DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
   DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
   Call Trace:
    <TASK>
    btrfs_extract_ordered_extent fs/btrfs/direct-io.c:702 [inline]
    btrfs_dio_submit_io+0x4be/0x6d0 fs/btrfs/direct-io.c:737
    iomap_dio_submit_bio fs/iomap/direct-io.c:85 [inline]
    iomap_dio_bio_iter+0x1022/0x1740 fs/iomap/direct-io.c:447
    __iomap_dio_rw+0x13b7/0x25b0 fs/iomap/direct-io.c:703
    btrfs_dio_write fs/btrfs/direct-io.c:775 [inline]
    btrfs_direct_write+0x610/0xa30 fs/btrfs/direct-io.c:880
    btrfs_do_write_iter+0x2a0/0x760 fs/btrfs/file.c:1397
    do_iter_readv_writev+0x600/0x880
    vfs_writev+0x376/0xba0 fs/read_write.c:1050
    do_pwritev fs/read_write.c:1146 [inline]
    __do_sys_pwritev2 fs/read_write.c:1204 [inline]
    __se_sys_pwritev2+0x196/0x2b0 fs/read_write.c:1195
    do_syscall_x64 arch/x86/entry/common.c:52 [inline]
    do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83
    entry_SYSCALL_64_after_hwframe+0x77/0x7f
   RIP: 0033:0x7f1281f85d29
   RSP: 002b:00007f12819fe038 EFLAGS: 00000246 ORIG_RAX: 0000000000000148
   RAX: ffffffffffffffda RBX: 00007f1282176080 RCX: 00007f1281f85d29
   RDX: 0000000000000001 RSI: 0000000020000240 RDI: 0000000000000005
   RBP: 00007f12819fe090 R08: 0000000000000000 R09: 0000000000000003
   R10: 0000000000007000 R11: 0000000000000246 R12: 0000000000000002
   R13: 0000000000000000 R14: 00007f1282176080 R15: 00007ffcb9e23328
    </TASK>
   Modules linked in:
   ---[ end trace 0000000000000000 ]---
   RIP: 0010:btrfs_split_ordered_extent+0xd8d/0xe20 fs/btrfs/ordered-data.c:1234
   RSP: 0018:ffffc9000d1df2b8 EFLAGS: 00010246
   RAX: 0000000000000057 RBX: 000000000006a000 RCX: 9ce21886c4195300
   RDX: 0000000000000000 RSI: 0000000080000000 RDI: 0000000000000000
   RBP: 0000000000000091 R08: ffffffff817f0a3c R09: 1ffff92001a3bdf4
   R10: dffffc0000000000 R11: fffff52001a3bdf5 R12: 1ffff1100a45f401
   R13: ffff8880522fa018 R14: dffffc0000000000 R15: 000000000006a000
   FS:  00007f12819fe6c0(0000) GS:ffff88801fc00000(0000) knlGS:0000000000000000
   CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
   CR2: 0000557750bd7da8 CR3: 00000000400ea000 CR4: 0000000000352ef0
   DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
   DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400

In this case the transaction abort was due to (an injected) memory
allocation failure when attempting to allocate a new chunk.

Reported-by: syzbot+f60d8337a5c8e8d92a77@syzkaller.appspotmail.com
Link: https://lore.kernel.org/linux-btrfs/6777f2dd.050a0220.178762.0045.GAE@google.com/
Fixes: 52b1fdca23ac ("btrfs: handle completed ordered extents in btrfs_split_ordered_extent")
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ordered-data.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 30eceaf829a7e..4aca7475fd82c 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -1229,6 +1229,18 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent(
 	 */
 	if (WARN_ON_ONCE(len >= ordered->num_bytes))
 		return ERR_PTR(-EINVAL);
+	/*
+	 * If our ordered extent had an error there's no point in continuing.
+	 * The error may have come from a transaction abort done either by this
+	 * task or some other concurrent task, and the transaction abort path
+	 * iterates over all existing ordered extents and sets the flag
+	 * BTRFS_ORDERED_IOERR on them.
+	 */
+	if (unlikely(flags & (1U << BTRFS_ORDERED_IOERR))) {
+		const int fs_error = BTRFS_FS_ERROR(fs_info);
+
+		return fs_error ? ERR_PTR(fs_error) : ERR_PTR(-EIO);
+	}
 	/* We cannot split partially completed ordered extents. */
 	if (ordered->bytes_left) {
 		ASSERT(!(flags & ~BTRFS_ORDERED_TYPE_FLAGS));

From c9c863793395cf0a66c2778a29d72c48c02fbb66 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 20 Jan 2025 09:40:43 +1030
Subject: [PATCH 184/368] btrfs: do not output error message if a qgroup has
 been already cleaned up

[BUG]
There is a bug report that btrfs outputs the following error message:

  BTRFS info (device nvme0n1p2): qgroup scan completed (inconsistency flag cleared)
  BTRFS warning (device nvme0n1p2): failed to cleanup qgroup 0/1179: -2

[CAUSE]
The error itself is pretty harmless, and the end user should ignore it.

When a subvolume is fully dropped, btrfs will call
btrfs_qgroup_cleanup_dropped_subvolume() to delete the qgroup.

However if a qgroup rescan happened before a subvolume fully dropped,
qgroup for that subvolume will not be re-created, as rescan will only
create new qgroup if there is a BTRFS_ROOT_REF_KEY found.

But before we drop a subvolume, the subvolume is unlinked thus there is no
BTRFS_ROOT_REF_KEY.

In that case, btrfs_remove_qgroup() will fail with -ENOENT and trigger
the above error message.

[FIX]
Just ignore -ENOENT error from btrfs_remove_qgroup() inside
btrfs_qgroup_cleanup_dropped_subvolume().

Reported-by: John Shand <jshand2013@gmail.com>
Link: https://bugzilla.suse.com/show_bug.cgi?id=1236056
Fixes: 839d6ea4f86d ("btrfs: automatically remove the subvolume qgroup")
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index b90fabe302e61..aaf16019d829a 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1897,8 +1897,11 @@ int btrfs_qgroup_cleanup_dropped_subvolume(struct btrfs_fs_info *fs_info, u64 su
 	/*
 	 * It's squota and the subvolume still has numbers needed for future
 	 * accounting, in this case we can not delete it.  Just skip it.
+	 *
+	 * Or the qgroup is already removed by a qgroup rescan. For both cases we're
+	 * safe to ignore them.
 	 */
-	if (ret == -EBUSY)
+	if (ret == -EBUSY || ret == -ENOENT)
 		ret = 0;
 	return ret;
 }

From e2f0943cf37305dbdeaf9846e3c941451bcdef63 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 20 Jan 2025 17:26:10 +0000
Subject: [PATCH 185/368] btrfs: fix use-after-free when attempting to join an
 aborted transaction

When we are trying to join the current transaction and if it's aborted,
we read its 'aborted' field after unlocking fs_info->trans_lock and
without holding any extra reference count on it. This means that a
concurrent task that is aborting the transaction may free the transaction
before we read its 'aborted' field, leading to a use-after-free.

Fix this by reading the 'aborted' field while holding fs_info->trans_lock
since any freeing task must first acquire that lock and set
fs_info->running_transaction to NULL before freeing the transaction.

This was reported by syzbot and Dmitry with the following stack traces
from KASAN:

   ==================================================================
   BUG: KASAN: slab-use-after-free in join_transaction+0xd9b/0xda0 fs/btrfs/transaction.c:278
   Read of size 4 at addr ffff888011839024 by task kworker/u4:9/1128

   CPU: 0 UID: 0 PID: 1128 Comm: kworker/u4:9 Not tainted 6.13.0-rc7-syzkaller-00019-gc45323b7560e #0
   Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014
   Workqueue: events_unbound btrfs_async_reclaim_data_space
   Call Trace:
    <TASK>
    __dump_stack lib/dump_stack.c:94 [inline]
    dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120
    print_address_description mm/kasan/report.c:378 [inline]
    print_report+0x169/0x550 mm/kasan/report.c:489
    kasan_report+0x143/0x180 mm/kasan/report.c:602
    join_transaction+0xd9b/0xda0 fs/btrfs/transaction.c:278
    start_transaction+0xaf8/0x1670 fs/btrfs/transaction.c:697
    flush_space+0x448/0xcf0 fs/btrfs/space-info.c:803
    btrfs_async_reclaim_data_space+0x159/0x510 fs/btrfs/space-info.c:1321
    process_one_work kernel/workqueue.c:3236 [inline]
    process_scheduled_works+0xa66/0x1840 kernel/workqueue.c:3317
    worker_thread+0x870/0xd30 kernel/workqueue.c:3398
    kthread+0x2f0/0x390 kernel/kthread.c:389
    ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147
    ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244
    </TASK>

   Allocated by task 5315:
    kasan_save_stack mm/kasan/common.c:47 [inline]
    kasan_save_track+0x3f/0x80 mm/kasan/common.c:68
    poison_kmalloc_redzone mm/kasan/common.c:377 [inline]
    __kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:394
    kasan_kmalloc include/linux/kasan.h:260 [inline]
    __kmalloc_cache_noprof+0x243/0x390 mm/slub.c:4329
    kmalloc_noprof include/linux/slab.h:901 [inline]
    join_transaction+0x144/0xda0 fs/btrfs/transaction.c:308
    start_transaction+0xaf8/0x1670 fs/btrfs/transaction.c:697
    btrfs_create_common+0x1b2/0x2e0 fs/btrfs/inode.c:6572
    lookup_open fs/namei.c:3649 [inline]
    open_last_lookups fs/namei.c:3748 [inline]
    path_openat+0x1c03/0x3590 fs/namei.c:3984
    do_filp_open+0x27f/0x4e0 fs/namei.c:4014
    do_sys_openat2+0x13e/0x1d0 fs/open.c:1402
    do_sys_open fs/open.c:1417 [inline]
    __do_sys_creat fs/open.c:1495 [inline]
    __se_sys_creat fs/open.c:1489 [inline]
    __x64_sys_creat+0x123/0x170 fs/open.c:1489
    do_syscall_x64 arch/x86/entry/common.c:52 [inline]
    do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83
    entry_SYSCALL_64_after_hwframe+0x77/0x7f

   Freed by task 5336:
    kasan_save_stack mm/kasan/common.c:47 [inline]
    kasan_save_track+0x3f/0x80 mm/kasan/common.c:68
    kasan_save_free_info+0x40/0x50 mm/kasan/generic.c:582
    poison_slab_object mm/kasan/common.c:247 [inline]
    __kasan_slab_free+0x59/0x70 mm/kasan/common.c:264
    kasan_slab_free include/linux/kasan.h:233 [inline]
    slab_free_hook mm/slub.c:2353 [inline]
    slab_free mm/slub.c:4613 [inline]
    kfree+0x196/0x430 mm/slub.c:4761
    cleanup_transaction fs/btrfs/transaction.c:2063 [inline]
    btrfs_commit_transaction+0x2c97/0x3720 fs/btrfs/transaction.c:2598
    insert_balance_item+0x1284/0x20b0 fs/btrfs/volumes.c:3757
    btrfs_balance+0x992/0x10c0 fs/btrfs/volumes.c:4633
    btrfs_ioctl_balance+0x493/0x7c0 fs/btrfs/ioctl.c:3670
    vfs_ioctl fs/ioctl.c:51 [inline]
    __do_sys_ioctl fs/ioctl.c:906 [inline]
    __se_sys_ioctl+0xf5/0x170 fs/ioctl.c:892
    do_syscall_x64 arch/x86/entry/common.c:52 [inline]
    do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83
    entry_SYSCALL_64_after_hwframe+0x77/0x7f

   The buggy address belongs to the object at ffff888011839000
    which belongs to the cache kmalloc-2k of size 2048
   The buggy address is located 36 bytes inside of
    freed 2048-byte region [ffff888011839000, ffff888011839800)

   The buggy address belongs to the physical page:
   page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x11838
   head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0
   flags: 0xfff00000000040(head|node=0|zone=1|lastcpupid=0x7ff)
   page_type: f5(slab)
   raw: 00fff00000000040 ffff88801ac42000 ffffea0000493400 dead000000000002
   raw: 0000000000000000 0000000000080008 00000001f5000000 0000000000000000
   head: 00fff00000000040 ffff88801ac42000 ffffea0000493400 dead000000000002
   head: 0000000000000000 0000000000080008 00000001f5000000 0000000000000000
   head: 00fff00000000003 ffffea0000460e01 ffffffffffffffff 0000000000000000
   head: 0000000000000008 0000000000000000 00000000ffffffff 0000000000000000
   page dumped because: kasan: bad access detected
   page_owner tracks the page as allocated
   page last allocated via order 3, migratetype Unmovable, gfp_mask 0xd20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC), pid 57, tgid 57 (kworker/0:2), ts 67248182943, free_ts 67229742023
    set_page_owner include/linux/page_owner.h:32 [inline]
    post_alloc_hook+0x1f3/0x230 mm/page_alloc.c:1558
    prep_new_page mm/page_alloc.c:1566 [inline]
    get_page_from_freelist+0x365c/0x37a0 mm/page_alloc.c:3476
    __alloc_pages_noprof+0x292/0x710 mm/page_alloc.c:4753
    alloc_pages_mpol_noprof+0x3e1/0x780 mm/mempolicy.c:2269
    alloc_slab_page+0x6a/0x110 mm/slub.c:2423
    allocate_slab+0x5a/0x2b0 mm/slub.c:2589
    new_slab mm/slub.c:2642 [inline]
    ___slab_alloc+0xc27/0x14a0 mm/slub.c:3830
    __slab_alloc+0x58/0xa0 mm/slub.c:3920
    __slab_alloc_node mm/slub.c:3995 [inline]
    slab_alloc_node mm/slub.c:4156 [inline]
    __do_kmalloc_node mm/slub.c:4297 [inline]
    __kmalloc_node_track_caller_noprof+0x2e9/0x4c0 mm/slub.c:4317
    kmalloc_reserve+0x111/0x2a0 net/core/skbuff.c:609
    __alloc_skb+0x1f3/0x440 net/core/skbuff.c:678
    alloc_skb include/linux/skbuff.h:1323 [inline]
    alloc_skb_with_frags+0xc3/0x820 net/core/skbuff.c:6612
    sock_alloc_send_pskb+0x91a/0xa60 net/core/sock.c:2884
    sock_alloc_send_skb include/net/sock.h:1803 [inline]
    mld_newpack+0x1c3/0xaf0 net/ipv6/mcast.c:1747
    add_grhead net/ipv6/mcast.c:1850 [inline]
    add_grec+0x1492/0x19a0 net/ipv6/mcast.c:1988
    mld_send_cr net/ipv6/mcast.c:2114 [inline]
    mld_ifc_work+0x691/0xd90 net/ipv6/mcast.c:2651
   page last free pid 5300 tgid 5300 stack trace:
    reset_page_owner include/linux/page_owner.h:25 [inline]
    free_pages_prepare mm/page_alloc.c:1127 [inline]
    free_unref_page+0xd3f/0x1010 mm/page_alloc.c:2659
    __slab_free+0x2c2/0x380 mm/slub.c:4524
    qlink_free mm/kasan/quarantine.c:163 [inline]
    qlist_free_all+0x9a/0x140 mm/kasan/quarantine.c:179
    kasan_quarantine_reduce+0x14f/0x170 mm/kasan/quarantine.c:286
    __kasan_slab_alloc+0x23/0x80 mm/kasan/common.c:329
    kasan_slab_alloc include/linux/kasan.h:250 [inline]
    slab_post_alloc_hook mm/slub.c:4119 [inline]
    slab_alloc_node mm/slub.c:4168 [inline]
    __do_kmalloc_node mm/slub.c:4297 [inline]
    __kmalloc_noprof+0x236/0x4c0 mm/slub.c:4310
    kmalloc_noprof include/linux/slab.h:905 [inline]
    kzalloc_noprof include/linux/slab.h:1037 [inline]
    fib_create_info+0xc14/0x25b0 net/ipv4/fib_semantics.c:1435
    fib_table_insert+0x1f6/0x1f20 net/ipv4/fib_trie.c:1231
    fib_magic+0x3d8/0x620 net/ipv4/fib_frontend.c:1112
    fib_add_ifaddr+0x40c/0x5e0 net/ipv4/fib_frontend.c:1156
    fib_netdev_event+0x375/0x490 net/ipv4/fib_frontend.c:1494
    notifier_call_chain+0x1a5/0x3f0 kernel/notifier.c:85
    __dev_notify_flags+0x207/0x400
    dev_change_flags+0xf0/0x1a0 net/core/dev.c:9045
    do_setlink+0xc90/0x4210 net/core/rtnetlink.c:3109
    rtnl_changelink net/core/rtnetlink.c:3723 [inline]
    __rtnl_newlink net/core/rtnetlink.c:3875 [inline]
    rtnl_newlink+0x1bb6/0x2210 net/core/rtnetlink.c:4012

   Memory state around the buggy address:
    ffff888011838f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
    ffff888011838f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
   >ffff888011839000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                                  ^
    ffff888011839080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
    ffff888011839100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
   ==================================================================

Reported-by: syzbot+45212e9d87a98c3f5b42@syzkaller.appspotmail.com
Link: https://lore.kernel.org/linux-btrfs/678e7da5.050a0220.303755.007c.GAE@google.com/
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Link: https://lore.kernel.org/linux-btrfs/CACT4Y+ZFBdo7pT8L2AzM=vegZwjp-wNkVJZQf0Ta3vZqtExaSw@mail.gmail.com/
Fixes: 871383be592b ("btrfs: add missing unlocks to transaction abort paths")
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/transaction.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 15312013f2a34..aca83a98b75a2 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -274,8 +274,10 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info,
 	cur_trans = fs_info->running_transaction;
 	if (cur_trans) {
 		if (TRANS_ABORTED(cur_trans)) {
+			const int abort_error = cur_trans->aborted;
+
 			spin_unlock(&fs_info->trans_lock);
-			return cur_trans->aborted;
+			return abort_error;
 		}
 		if (btrfs_blocked_trans_types[cur_trans->state] & type) {
 			spin_unlock(&fs_info->trans_lock);

From fdef89ce6fada462aef9cb90a140c93c8c209f0f Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Tue, 21 Jan 2025 12:24:39 +0000
Subject: [PATCH 186/368] btrfs: avoid starting new transaction when cleaning
 qgroup during subvolume drop

At btrfs_qgroup_cleanup_dropped_subvolume() all we want to commit the
current transaction in order to have all the qgroup rfer/excl numbers up
to date. However we are using btrfs_start_transaction(), which joins the
current transaction if there is one that is not yet committing, but also
starts a new one if there is none or if the current one is already
committing (its state is >= TRANS_STATE_COMMIT_START). This later case
results in unnecessary IO, wasting time and a pointless rotation of the
backup roots in the super block.

So instead of using btrfs_start_transaction() followed by a
btrfs_commit_transaction(), use btrfs_commit_current_transaction() which
achieves our purpose and avoids starting and committing new transactions.

Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index aaf16019d829a..f9d3766c809b4 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1880,11 +1880,7 @@ int btrfs_qgroup_cleanup_dropped_subvolume(struct btrfs_fs_info *fs_info, u64 su
 	 * Commit current transaction to make sure all the rfer/excl numbers
 	 * get updated.
 	 */
-	trans = btrfs_start_transaction(fs_info->quota_root, 0);
-	if (IS_ERR(trans))
-		return PTR_ERR(trans);
-
-	ret = btrfs_commit_transaction(trans);
+	ret = btrfs_commit_current_transaction(fs_info->quota_root);
 	if (ret < 0)
 		return ret;
 

From 013eb043f37bd87c4d60d51034401a5a6d105bcf Mon Sep 17 00:00:00 2001
From: Howard Chu <howardchu95@gmail.com>
Date: Thu, 12 Dec 2024 18:30:47 -0800
Subject: [PATCH 187/368] perf trace: Fix BPF loading failure (-E2BIG)

As reported by Namhyung Kim and acknowledged by Qiao Zhao (link:
https://lore.kernel.org/linux-perf-users/20241206001436.1947528-1-namhyung@kernel.org/),
on certain machines, perf trace failed to load the BPF program into the
kernel. The verifier runs perf trace's BPF program for up to 1 million
instructions, returning an E2BIG error, whereas the perf trace BPF
program should be much less complex than that. This patch aims to fix
the issue described above.

The E2BIG problem from clang-15 to clang-16 is cause by this line:
 } else if (size < 0 && size >= -6) { /* buffer */

Specifically this check: size < 0. seems like clang generates a cool
optimization to this sign check that breaks things.

Making 'size' s64, and use
 } else if ((int)size < 0 && size >= -6) { /* buffer */

Solves the problem. This is some Hogwarts magic.

And the unbounded access of clang-12 and clang-14 (clang-13 works this
time) is fixed by making variable 'aug_size' s64.

As for this:
-if (aug_size > TRACE_AUG_MAX_BUF)
-	aug_size = TRACE_AUG_MAX_BUF;
+aug_size = args->args[index] > TRACE_AUG_MAX_BUF ? TRACE_AUG_MAX_BUF : args->args[index];

This makes the BPF skel generated by clang-18 work. Yes, new clangs
introduce problems too.

Sorry, I only know that it works, but I don't know how it works. I'm not
an expert in the BPF verifier. I really hope this is not a kernel
version issue, as that would make the test case (kernel_nr) *
(clang_nr), a true horror story. I will test it on more kernel versions
in the future.

Fixes: 395d38419f18: ("perf trace augmented_raw_syscalls: Add more check s to pass the verifier")
Reported-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Howard Chu <howardchu95@gmail.com>
Tested-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20241213023047.541218-1-howardchu95@gmail.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
index 4a62ed593e84e..e4352881e3faa 100644
--- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
+++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
@@ -431,9 +431,9 @@ static bool pid_filter__has(struct pids_filtered *pids, pid_t pid)
 static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
 {
 	bool augmented, do_output = false;
-	int zero = 0, size, aug_size, index,
-	    value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value);
+	int zero = 0, index, value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value);
 	u64 output = 0; /* has to be u64, otherwise it won't pass the verifier */
+	s64 aug_size, size;
 	unsigned int nr, *beauty_map;
 	struct beauty_payload_enter *payload;
 	void *arg, *payload_offset;
@@ -484,14 +484,11 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
 		} else if (size > 0 && size <= value_size) { /* struct */
 			if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, size, arg))
 				augmented = true;
-		} else if (size < 0 && size >= -6) { /* buffer */
+		} else if ((int)size < 0 && size >= -6) { /* buffer */
 			index = -(size + 1);
 			barrier_var(index); // Prevent clang (noticed with v18) from removing the &= 7 trick.
 			index &= 7;	    // Satisfy the bounds checking with the verifier in some kernels.
-			aug_size = args->args[index];
-
-			if (aug_size > TRACE_AUG_MAX_BUF)
-				aug_size = TRACE_AUG_MAX_BUF;
+			aug_size = args->args[index] > TRACE_AUG_MAX_BUF ? TRACE_AUG_MAX_BUF : args->args[index];
 
 			if (aug_size > 0) {
 				if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, aug_size, arg))

From 915175b49f65d9edeb81659e82cbb27b621dbc17 Mon Sep 17 00:00:00 2001
From: Jinliang Zheng <alexjlzheng@gmail.com>
Date: Wed, 15 Jan 2025 20:35:25 +0800
Subject: [PATCH 188/368] xfs: fix the entry condition of exact EOF block
 allocation optimization

When we call create(), lseek() and write() sequentially, offset != 0
cannot be used as a judgment condition for whether the file already
has extents.

Furthermore, when xfs_bmap_adjacent() has not given a better blkno,
it is not necessary to use exact EOF block allocation.

Suggested-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Jinliang Zheng <alexjlzheng@tencent.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
---
 fs/xfs/libxfs/xfs_bmap.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 40ad22fb808b9..0ef19f1469ec9 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3563,12 +3563,12 @@ xfs_bmap_btalloc_at_eof(
 	int			error;
 
 	/*
-	 * If there are already extents in the file, try an exact EOF block
-	 * allocation to extend the file as a contiguous extent. If that fails,
-	 * or it's the first allocation in a file, just try for a stripe aligned
-	 * allocation.
+	 * If there are already extents in the file, and xfs_bmap_adjacent() has
+	 * given a better blkno, try an exact EOF block allocation to extend the
+	 * file as a contiguous extent. If that fails, or it's the first
+	 * allocation in a file, just try for a stripe aligned allocation.
 	 */
-	if (ap->offset) {
+	if (ap->eof) {
 		xfs_extlen_t	nextminlen = 0;
 
 		/*
@@ -3736,7 +3736,8 @@ xfs_bmap_btalloc_best_length(
 	int			error;
 
 	ap->blkno = XFS_INO_TO_FSB(args->mp, ap->ip->i_ino);
-	xfs_bmap_adjacent(ap);
+	if (!xfs_bmap_adjacent(ap))
+		ap->eof = false;
 
 	/*
 	 * Search for an allocation group with a single extent large enough for

From 89841b23809f5fb12cbead142204064739fef25a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 16 Jan 2025 07:03:35 +0100
Subject: [PATCH 189/368] xfs: remove an out of data comment in _xfs_buf_alloc

There hasn't been anything like an io_length for a long time.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
---
 fs/xfs/xfs_buf.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 7fbdd4b30676c..f1252ed8bd0a7 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -232,11 +232,6 @@ _xfs_buf_alloc(
 	bp->b_mount = target->bt_mount;
 	bp->b_flags = flags;
 
-	/*
-	 * Set length and io_length to the same value initially.
-	 * I/O routines should use io_length, which will be the same in
-	 * most cases but may be reset (e.g. XFS recovery).
-	 */
 	error = xfs_buf_get_maps(bp, nmaps);
 	if (error)  {
 		kmem_cache_free(xfs_buf_cache, bp);

From f5f0ed89f13e3e5246404a322ee85169a226bfb5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 22 Jan 2025 06:43:21 +0100
Subject: [PATCH 190/368] xfs: don't call remap_verify_area with sb write
 protection held

The XFS_IOC_EXCHANGE_RANGE ioctl with the XFS_EXCHANGE_RANGE_TO_EOF flag
operates on a range bounded by the end of the file.  This means the
actual amount of blocks exchanged is derived from the inode size, which
is only stable with the IOLOCK (i_rwsem) held.  Do that, it currently
calls remap_verify_area from inside the sb write protection which nests
outside the IOLOCK.  But this makes fsnotify_file_area_perm which is
called from remap_verify_area unhappy when the kernel is built with
lockdep and the recently added CONFIG_FANOTIFY_ACCESS_PERMISSIONS
option.

Fix this by always calling remap_verify_area before taking the write
protection, and passing a 0 size to remap_verify_area similar to
the FICLONE/FICLONERANGE ioctls when they are asked to clone until
the file end.

(Note: the size argument gets passed to fsnotify_file_area_perm, but
then isn't actually used there).

Fixes: 9a64d9b3109d ("xfs: introduce new file range exchange ioctl")
Cc: <stable@vger.kernel.org> # v6.10
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
---
 fs/xfs/xfs_exchrange.c | 71 ++++++++++++++++--------------------------
 1 file changed, 27 insertions(+), 44 deletions(-)

diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c
index f340a2015c4c7..0b41bdfecdfbc 100644
--- a/fs/xfs/xfs_exchrange.c
+++ b/fs/xfs/xfs_exchrange.c
@@ -329,22 +329,6 @@ xfs_exchrange_mappings(
  * successfully but before locks are dropped.
  */
 
-/* Verify that we have security clearance to perform this operation. */
-static int
-xfs_exchange_range_verify_area(
-	struct xfs_exchrange	*fxr)
-{
-	int			ret;
-
-	ret = remap_verify_area(fxr->file1, fxr->file1_offset, fxr->length,
-			true);
-	if (ret)
-		return ret;
-
-	return remap_verify_area(fxr->file2, fxr->file2_offset, fxr->length,
-			true);
-}
-
 /*
  * Performs necessary checks before doing a range exchange, having stabilized
  * mutable inode attributes via i_rwsem.
@@ -355,11 +339,13 @@ xfs_exchange_range_checks(
 	unsigned int		alloc_unit)
 {
 	struct inode		*inode1 = file_inode(fxr->file1);
+	loff_t			size1 = i_size_read(inode1);
 	struct inode		*inode2 = file_inode(fxr->file2);
+	loff_t			size2 = i_size_read(inode2);
 	uint64_t		allocmask = alloc_unit - 1;
 	int64_t			test_len;
 	uint64_t		blen;
-	loff_t			size1, size2, tmp;
+	loff_t			tmp;
 	int			error;
 
 	/* Don't touch certain kinds of inodes */
@@ -368,24 +354,25 @@ xfs_exchange_range_checks(
 	if (IS_SWAPFILE(inode1) || IS_SWAPFILE(inode2))
 		return -ETXTBSY;
 
-	size1 = i_size_read(inode1);
-	size2 = i_size_read(inode2);
-
 	/* Ranges cannot start after EOF. */
 	if (fxr->file1_offset > size1 || fxr->file2_offset > size2)
 		return -EINVAL;
 
-	/*
-	 * If the caller said to exchange to EOF, we set the length of the
-	 * request large enough to cover everything to the end of both files.
-	 */
 	if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) {
+		/*
+		 * If the caller said to exchange to EOF, we set the length of
+		 * the request large enough to cover everything to the end of
+		 * both files.
+		 */
 		fxr->length = max_t(int64_t, size1 - fxr->file1_offset,
 					     size2 - fxr->file2_offset);
-
-		error = xfs_exchange_range_verify_area(fxr);
-		if (error)
-			return error;
+	} else {
+		/*
+		 * Otherwise we require both ranges to end within EOF.
+		 */
+		if (fxr->file1_offset + fxr->length > size1 ||
+		    fxr->file2_offset + fxr->length > size2)
+			return -EINVAL;
 	}
 
 	/*
@@ -401,15 +388,6 @@ xfs_exchange_range_checks(
 	    check_add_overflow(fxr->file2_offset, fxr->length, &tmp))
 		return -EINVAL;
 
-	/*
-	 * We require both ranges to end within EOF, unless we're exchanging
-	 * to EOF.
-	 */
-	if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) &&
-	    (fxr->file1_offset + fxr->length > size1 ||
-	     fxr->file2_offset + fxr->length > size2))
-		return -EINVAL;
-
 	/*
 	 * Make sure we don't hit any file size limits.  If we hit any size
 	 * limits such that test_length was adjusted, we abort the whole
@@ -747,6 +725,7 @@ xfs_exchange_range(
 {
 	struct inode		*inode1 = file_inode(fxr->file1);
 	struct inode		*inode2 = file_inode(fxr->file2);
+	loff_t			check_len = fxr->length;
 	int			ret;
 
 	BUILD_BUG_ON(XFS_EXCHANGE_RANGE_ALL_FLAGS &
@@ -779,14 +758,18 @@ xfs_exchange_range(
 		return -EBADF;
 
 	/*
-	 * If we're not exchanging to EOF, we can check the areas before
-	 * stabilizing both files' i_size.
+	 * If we're exchanging to EOF we can't calculate the length until taking
+	 * the iolock.  Pass a 0 length to remap_verify_area similar to the
+	 * FICLONE and FICLONERANGE ioctls that support cloning to EOF as well.
 	 */
-	if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)) {
-		ret = xfs_exchange_range_verify_area(fxr);
-		if (ret)
-			return ret;
-	}
+	if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)
+		check_len = 0;
+	ret = remap_verify_area(fxr->file1, fxr->file1_offset, check_len, true);
+	if (ret)
+		return ret;
+	ret = remap_verify_area(fxr->file2, fxr->file2_offset, check_len, true);
+	if (ret)
+		return ret;
 
 	/* Update cmtime if the fd/inode don't forbid it. */
 	if (!(fxr->file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1))

From 2e3c688ddaf2bb8e3696a773b5278711a90ea080 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 24 Jan 2025 09:52:45 +0100
Subject: [PATCH 191/368] ASoC: renesas: SND_SIU_MIGOR should depend on
 DMADEVICES

If CONFIG_DMADEVICES=n:

    WARNING: unmet direct dependencies detected for SND_SOC_SH4_SIU
      Depends on [n]: SOUND [=y] && SND [=y] && SND_SOC [=y] && (SUPERH [=y] || ARCH_RENESAS || COMPILE_TEST [=n]) && ARCH_SHMOBILE [=y] && HAVE_CLK [=y] && DMADEVICES [=n]
      Selected by [y]:
      - SND_SIU_MIGOR [=y] && SOUND [=y] && SND [=y] && SND_SOC [=y] && (SUPERH [=y] || ARCH_RENESAS || COMPILE_TEST [=n]) && SH_MIGOR [=y] && I2C [=y]

SND_SIU_MIGOR selects SND_SOC_SH4_SIU.  As the latter depends on
DMADEVICES, the former should depend on DMADEVICES, too.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202501241032.oOmsmzvk-lkp@intel.com/
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://patch.msgid.link/8c17ff52584ce824b8b42d08ea1b942ebeb7f4d9.1737708688.git.geert+renesas@glider.be
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/renesas/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/renesas/Kconfig b/sound/soc/renesas/Kconfig
index 426632996a0a3..cb01fb36355f0 100644
--- a/sound/soc/renesas/Kconfig
+++ b/sound/soc/renesas/Kconfig
@@ -67,7 +67,7 @@ config SND_SH7760_AC97
 
 config SND_SIU_MIGOR
 	tristate "SIU sound support on Migo-R"
-	depends on SH_MIGOR && I2C
+	depends on SH_MIGOR && I2C && DMADEVICES
 	select SND_SOC_SH4_SIU
 	select SND_SOC_WM8978
 	help

From 0141978ae75bd48bac13fca6de131a5071c32011 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Fri, 17 Jan 2025 16:14:20 +0800
Subject: [PATCH 192/368] x86/acpi: Fix LAPIC/x2APIC parsing order

On some systems, the same CPU (with the same APIC ID) is assigned a
different logical CPU id after commit ec9aedb2aa1a ("x86/acpi: Ignore
invalid x2APIC entries").

This means that Linux enumerates the CPUs in a different order, which
violates ACPI specification[1] that states:

  "OSPM should initialize processors in the order that they appear in
   the MADT"

The problematic commit parses all LAPIC entries before any x2APIC
entries, aiming to ignore x2APIC entries with APIC ID < 255 when valid
LAPIC entries exist. However, it disrupts the CPU enumeration order on
systems where x2APIC entries precede LAPIC entries in the MADT.

Fix this problem by:

 1) Parsing LAPIC entries first without registering them in the
    topology to evaluate whether valid LAPIC entries exist.

 2) Restoring the MADT in order parser which invokes either the LAPIC
    or the X2APIC parser function depending on the entry type.

The X2APIC parser still ignores entries < 0xff in case that #1 found
valid LAPIC entries independent of their position in the MADT table.

Link: https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#madt-processor-local-apic-sapic-structure-entry-order
Cc: All applicable <stable@vger.kernel.org>
Reported-by: Jim Mattson <jmattson@google.com>
Closes: https://lore.kernel.org/all/20241010213136.668672-1-jmattson@google.com/
Fixes: ec9aedb2aa1a ("x86/acpi: Ignore invalid x2APIC entries")
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Tested-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://patch.msgid.link/20250117081420.4046737-1-rui.zhang@intel.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/kernel/acpi/boot.c | 50 +++++++++++++++++++++++++++++++++----
 1 file changed, 45 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 3a44a9dc3fb7a..18485170d51b4 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -226,6 +226,28 @@ acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end)
 	return 0;
 }
 
+static int __init
+acpi_check_lapic(union acpi_subtable_headers *header, const unsigned long end)
+{
+	struct acpi_madt_local_apic *processor = NULL;
+
+	processor = (struct acpi_madt_local_apic *)header;
+
+	if (BAD_MADT_ENTRY(processor, end))
+		return -EINVAL;
+
+	/* Ignore invalid ID */
+	if (processor->id == 0xff)
+		return 0;
+
+	/* Ignore processors that can not be onlined */
+	if (!acpi_is_processor_usable(processor->lapic_flags))
+		return 0;
+
+	has_lapic_cpus = true;
+	return 0;
+}
+
 static int __init
 acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end)
 {
@@ -257,7 +279,6 @@ acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end)
 			       processor->processor_id, /* ACPI ID */
 			       processor->lapic_flags & ACPI_MADT_ENABLED);
 
-	has_lapic_cpus = true;
 	return 0;
 }
 
@@ -1029,6 +1050,8 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
 static int __init acpi_parse_madt_lapic_entries(void)
 {
 	int count, x2count = 0;
+	struct acpi_subtable_proc madt_proc[2];
+	int ret;
 
 	if (!boot_cpu_has(X86_FEATURE_APIC))
 		return -ENODEV;
@@ -1037,10 +1060,27 @@ static int __init acpi_parse_madt_lapic_entries(void)
 				      acpi_parse_sapic, MAX_LOCAL_APIC);
 
 	if (!count) {
-		count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
-					acpi_parse_lapic, MAX_LOCAL_APIC);
-		x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
-					acpi_parse_x2apic, MAX_LOCAL_APIC);
+		/* Check if there are valid LAPIC entries */
+		acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, acpi_check_lapic, MAX_LOCAL_APIC);
+
+		/*
+		 * Enumerate the APIC IDs in the order that they appear in the
+		 * MADT, no matter LAPIC entry or x2APIC entry is used.
+		 */
+		memset(madt_proc, 0, sizeof(madt_proc));
+		madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC;
+		madt_proc[0].handler = acpi_parse_lapic;
+		madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC;
+		madt_proc[1].handler = acpi_parse_x2apic;
+		ret = acpi_table_parse_entries_array(ACPI_SIG_MADT,
+				sizeof(struct acpi_table_madt),
+				madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC);
+		if (ret < 0) {
+			pr_err("Error parsing LAPIC/X2APIC entries\n");
+			return ret;
+		}
+		count = madt_proc[0].count;
+		x2count = madt_proc[1].count;
 	}
 	if (!count && !x2count) {
 		pr_err("No LAPIC entries present\n");

From 8d28d0ddb986f56920ac97ae704cc3340a699a30 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Fri, 24 Jan 2025 17:20:55 +0800
Subject: [PATCH 193/368] md/md-bitmap: Synchronize bitmap_get_stats() with
 bitmap lifetime

After commit ec6bb299c7c3 ("md/md-bitmap: add 'sync_size' into struct
md_bitmap_stats"), following panic is reported:

Oops: general protection fault, probably for non-canonical address
RIP: 0010:bitmap_get_stats+0x2b/0xa0
Call Trace:
 <TASK>
 md_seq_show+0x2d2/0x5b0
 seq_read_iter+0x2b9/0x470
 seq_read+0x12f/0x180
 proc_reg_read+0x57/0xb0
 vfs_read+0xf6/0x380
 ksys_read+0x6c/0xf0
 do_syscall_64+0x82/0x170
 entry_SYSCALL_64_after_hwframe+0x76/0x7e

Root cause is that bitmap_get_stats() can be called at anytime if mddev
is still there, even if bitmap is destroyed, or not fully initialized.
Deferenceing bitmap in this case can crash the kernel. Meanwhile, the
above commit start to deferencing bitmap->storage, make the problem
easier to trigger.

Fix the problem by protecting bitmap_get_stats() with bitmap_info.mutex.

Cc: stable@vger.kernel.org # v6.12+
Fixes: 32a7627cf3a3 ("[PATCH] md: optimised resync using Bitmap based intent logging")
Reported-and-tested-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Closes: https://lore.kernel.org/linux-raid/ca3a91a2-50ae-4f68-b317-abd9889f3907@oracle.com/T/#m6e5086c95201135e4941fe38f9efa76daf9666c5
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Link: https://lore.kernel.org/r/20250124092055.4050195-1-yukuai1@huaweicloud.com
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/md-bitmap.c | 5 ++++-
 drivers/md/md.c        | 5 +++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index ec4ecd96e6b14..23c09d22fcdbc 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -2355,7 +2355,10 @@ static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats)
 
 	if (!bitmap)
 		return -ENOENT;
-
+	if (bitmap->mddev->bitmap_info.external)
+		return -ENOENT;
+	if (!bitmap->storage.sb_page) /* no superblock */
+		return -EINVAL;
 	sb = kmap_local_page(bitmap->storage.sb_page);
 	stats->sync_size = le64_to_cpu(sb->sync_size);
 	kunmap_local(sb);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 866015b681af8..465ca2af1e6ef 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -8376,6 +8376,10 @@ static int md_seq_show(struct seq_file *seq, void *v)
 		return 0;
 
 	spin_unlock(&all_mddevs_lock);
+
+	/* prevent bitmap to be freed after checking */
+	mutex_lock(&mddev->bitmap_info.mutex);
+
 	spin_lock(&mddev->lock);
 	if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
 		seq_printf(seq, "%s : ", mdname(mddev));
@@ -8451,6 +8455,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
 		seq_printf(seq, "\n");
 	}
 	spin_unlock(&mddev->lock);
+	mutex_unlock(&mddev->bitmap_info.mutex);
 	spin_lock(&all_mddevs_lock);
 
 	if (mddev == list_last_entry(&all_mddevs, struct mddev, all_mddevs))

From a23ad06bfee5e51cd9e51aebf11401e7b4b5d00a Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 24 Jan 2025 14:32:25 -0700
Subject: [PATCH 194/368] io_uring/register: use atomic_read/write for sq_flags
 migration

A previous commit changed all of the migration from the old to the new
ring for resizing to use READ/WRITE_ONCE. However, ->sq_flags is an
atomic_t, and while most archs won't complain on this, some will indeed
flag this:

io_uring/register.c:554:9: sparse: sparse: cast to non-scalar
io_uring/register.c:554:9: sparse: sparse: cast from non-scalar

Just use atomic_set/atomic_read for handling this case.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202501242000.A2sKqaCL-lkp@intel.com/
Fixes: 2c5aae129f42 ("io_uring/register: document io_register_resize_rings() shared mem usage")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/register.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/io_uring/register.c b/io_uring/register.c
index 0db181437ae33..9a4d2fbce4aec 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -552,7 +552,7 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
 	ctx->cqe_cached = ctx->cqe_sentinel = NULL;
 
 	WRITE_ONCE(n.rings->sq_dropped, READ_ONCE(o.rings->sq_dropped));
-	WRITE_ONCE(n.rings->sq_flags, READ_ONCE(o.rings->sq_flags));
+	atomic_set(&n.rings->sq_flags, atomic_read(&o.rings->sq_flags));
 	WRITE_ONCE(n.rings->cq_flags, READ_ONCE(o.rings->cq_flags));
 	WRITE_ONCE(n.rings->cq_overflow, READ_ONCE(o.rings->cq_overflow));
 

From 0f3a822ae2254a1e7ce3a130a1efd94e2cab73ee Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sat, 25 Jan 2025 13:04:40 +0100
Subject: [PATCH 195/368] ALSA: hda/realtek: Fix quirk matching for Legion Pro
 7

The recent cleanup of the quirk table entries with the codec ID
matching caused a regression on some Lenovo Legion 7 models with PCI
SSID 17aa:386f: it assumed wrongly as if the codec SSID on the machine
were also 17aa:386f, but in this case, it was 17aa:38a8.  This made
the binding with a wrong sub-codec, instead of TAS2781, the Cirrus
codec was bound.

For addressing the regression, correct the quirk entry to the right
value 17aa:38a8.

Note that this makes the entry appearing in an unsorted position.
This exception is needed because the entry must match before the PCI
SSID 17aa:386f.

Also there is another entry for 17aa:38a8, but the latter is for PCI
SSID matching while the new entry is for the codec SSID matching.

Fixes: 504f052aa343 ("ALSA: hda/realtek: Use codec SSID matching for Lenovo devices")
Reported-and-tested-by: Samantha Glocker <iam@anislandsomewhere.com>
Closes: https://lore.kernel.org/CAGPQRHYd48U__UKYj2jJnT4+dnNNoWRBi+wj6zPRn=JpNMBUrg@mail.gmail.com
Cc: <stable@vger.kernel.org>
Link: https://patch.msgid.link/20250125120519.16420-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index d3c9ed9635888..d36a79a8ecbf8 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -10918,7 +10918,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x3869, "Lenovo Yoga7 14IAL7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN),
 	HDA_CODEC_QUIRK(0x17aa, 0x386e, "Legion Y9000X 2022 IAH7", ALC287_FIXUP_CS35L41_I2C_2),
 	SND_PCI_QUIRK(0x17aa, 0x386e, "Yoga Pro 7 14ARP8", ALC285_FIXUP_SPEAKER2_TO_DAC1),
-	HDA_CODEC_QUIRK(0x17aa, 0x386f, "Legion Pro 7 16ARX8H", ALC287_FIXUP_TAS2781_I2C),
+	HDA_CODEC_QUIRK(0x17aa, 0x38a8, "Legion Pro 7 16ARX8H", ALC287_FIXUP_TAS2781_I2C), /* this must match before PCI SSID 17aa:386f below */
 	SND_PCI_QUIRK(0x17aa, 0x386f, "Legion Pro 7i 16IAX7", ALC287_FIXUP_CS35L41_I2C_2),
 	SND_PCI_QUIRK(0x17aa, 0x3870, "Lenovo Yoga 7 14ARB7", ALC287_FIXUP_YOGA7_14ARB7_I2C),
 	SND_PCI_QUIRK(0x17aa, 0x3877, "Lenovo Legion 7 Slim 16ARHA7", ALC287_FIXUP_CS35L41_I2C_2),

From d85fc52cbb9a719c8335d93a28d6a79d7acd419f Mon Sep 17 00:00:00 2001
From: Lianqin Hu <hulianqin@vivo.com>
Date: Sun, 26 Jan 2025 03:51:11 +0000
Subject: [PATCH 196/368] ALSA: usb-audio: Add delay quirk for iBasso DC07 Pro

Audio control requests that sets sampling frequency sometimes fail on
this card. Adding delay between control messages eliminates that problem.

usb 1-1: New USB device found, idVendor=2fc6, idProduct=f0b7
usb 1-1: New USB device strings: Mfr=1, Product=2, SerialNumber=3
usb 1-1: Product: iBasso DC07 Pro
usb 1-1: Manufacturer: iBasso
usb 1-1: SerialNumber: CTUA171130B

Signed-off-by: Lianqin Hu <hulianqin@vivo.com>
Cc: <stable@vger.kernel.org>
Link: https://patch.msgid.link/TYUPR06MB62174A48D04E09A37996DF84D2ED2@TYUPR06MB6217.apcprd06.prod.outlook.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/quirks.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 7968d6a2f592a..a97efb7b131ea 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -2343,6 +2343,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
 		   QUIRK_FLAG_CTL_MSG_DELAY_1M),
 	DEVICE_FLG(0x2d95, 0x8021, /* VIVO USB-C-XE710 HEADSET */
 		   QUIRK_FLAG_CTL_MSG_DELAY_1M),
+	DEVICE_FLG(0x2fc6, 0xf0b7, /* iBasso DC07 Pro */
+		   QUIRK_FLAG_CTL_MSG_DELAY_1M),
 	DEVICE_FLG(0x30be, 0x0101, /* Schiit Hel */
 		   QUIRK_FLAG_IGNORE_CTL_ERROR),
 	DEVICE_FLG(0x413c, 0xa506, /* Dell AE515 sound bar */

From 5851a88dac1501353659690343b936d6fcb5d509 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 22 Jan 2025 07:48:36 +0100
Subject: [PATCH 197/368] i2c: imx-lpi2c: select CONFIG_I2C_SLAVE

The addition of target mode causes a build failure when CONFIG_I2C_SLAVE
is turned off:

drivers/i2c/busses/i2c-imx-lpi2c.c:1273:10: error: 'const struct i2c_algorithm' has no member named 'reg_target'
 1273 |         .reg_target     = lpi2c_imx_register_target,
      |          ^~~~~~~~~~
drivers/i2c/busses/i2c-imx-lpi2c.c:1274:10: error: 'const struct i2c_algorithm' has no member named 'unreg_target'
 1274 |         .unreg_target   = lpi2c_imx_unregister_target,
      |          ^~~~~~~~~~~~

Select the Kconfig symbol like we do for other similar drivers.

Fixes: 1ee867e465c1 ("i2c: imx-lpi2c: add target mode support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Carlos Song <carlos.song@nxp.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 drivers/i2c/busses/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index eec95c724b25b..fc438f4457713 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -756,6 +756,7 @@ config I2C_IMX
 config I2C_IMX_LPI2C
 	tristate "IMX Low Power I2C interface"
 	depends on ARCH_MXC || COMPILE_TEST
+	select I2C_SLAVE
 	help
 	  Say Y here if you want to use the Low Power IIC bus controller
 	  on the Freescale i.MX processors.

From 6250ebe666e425e173df5e11e8a612d57921f48d Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sat, 25 Jan 2025 11:15:25 -0800
Subject: [PATCH 198/368] i2c: Fix core-managed per-client debugfs handling

The debugfs directory should be created when a device
is probed, not when it is registered. It should be removed
when the device is removed, not when it is unregistered.

Fixes: d06905d68610 ("i2c: add core-managed per-client directory in debugfs")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/i2c/i2c-core-base.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index c24ccefb015ee..d4bbf6827d1a0 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -583,6 +583,9 @@ static int i2c_device_probe(struct device *dev)
 		goto err_detach_pm_domain;
 	}
 
+	client->debugfs = debugfs_create_dir(dev_name(&client->dev),
+					     client->adapter->debugfs);
+
 	if (driver->probe)
 		status = driver->probe(client);
 	else
@@ -602,6 +605,7 @@ static int i2c_device_probe(struct device *dev)
 	return 0;
 
 err_release_driver_resources:
+	debugfs_remove_recursive(client->debugfs);
 	devres_release_group(&client->dev, client->devres_group_id);
 err_detach_pm_domain:
 	dev_pm_domain_detach(&client->dev, do_power_on);
@@ -627,6 +631,8 @@ static void i2c_device_remove(struct device *dev)
 		driver->remove(client);
 	}
 
+	debugfs_remove_recursive(client->debugfs);
+
 	devres_release_group(&client->dev, client->devres_group_id);
 
 	dev_pm_domain_detach(&client->dev, true);
@@ -1015,8 +1021,6 @@ i2c_new_client_device(struct i2c_adapter *adap, struct i2c_board_info const *inf
 	if (status)
 		goto out_remove_swnode;
 
-	client->debugfs = debugfs_create_dir(dev_name(&client->dev), adap->debugfs);
-
 	dev_dbg(&adap->dev, "client [%s] registered with bus id %s\n",
 		client->name, dev_name(&client->dev));
 
@@ -1061,7 +1065,6 @@ void i2c_unregister_device(struct i2c_client *client)
 	if (ACPI_COMPANION(&client->dev))
 		acpi_device_clear_enumerated(ACPI_COMPANION(&client->dev));
 
-	debugfs_remove_recursive(client->debugfs);
 	device_remove_software_node(&client->dev);
 	device_unregister(&client->dev);
 }

From 6494bd2d05f927fc0395c2ea11461517a9e0bb80 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 14 Jan 2025 00:00:53 +0900
Subject: [PATCH 199/368] genksyms: fix syntax error for attribute after
 'union'

A longstanding issue with genksyms is that it has hidden syntax errors.

When a syntax error occurs, yyerror() is called. However,
error_with_pos() is a no-op unless the -w option is provided.

You can observe syntax errors by manually passing the -w option.

For example, with CONFIG_MODVERSIONS=y on v6.13-rc1:

    $ make -s KCFLAGS=-D__GENKSYMS__ fs/lockd/svc.i
    $ cat fs/lockd/svc.i | scripts/genksyms/genksyms -w
        [ snip ]
    ./include/net/addrconf.h:35: syntax error

The syntax error occurs in the following code in include/net/addrconf.h:

    union __packed {
            [ snip ]
    };

The issue arises from __packed, which is defined as
__attribute__((__packed__)), immediately after the 'union' keyword.

This commit allows the 'union' keyword to be followed by attributes.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Nicolas Schier <n.schier@avm.de>
---
 scripts/genksyms/parse.y | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y
index 33639232a709e..a2cd035a78c95 100644
--- a/scripts/genksyms/parse.y
+++ b/scripts/genksyms/parse.y
@@ -236,16 +236,16 @@ type_specifier:
 	   so that it is easier to expand the definition fully later.  */
 	| STRUCT_KEYW attribute_opt IDENT
 		{ remove_node($1); (*$3)->tag = SYM_STRUCT; $$ = $3; }
-	| UNION_KEYW IDENT
-		{ remove_node($1); (*$2)->tag = SYM_UNION; $$ = $2; }
+	| UNION_KEYW attribute_opt IDENT
+		{ remove_node($1); (*$3)->tag = SYM_UNION; $$ = $3; }
 	| ENUM_KEYW IDENT
 		{ remove_node($1); (*$2)->tag = SYM_ENUM; $$ = $2; }
 
 	/* Full definitions of an s/u/e.  Record it.  */
 	| STRUCT_KEYW attribute_opt IDENT class_body
 		{ record_compound($1, $3, $4, SYM_STRUCT); $$ = $4; }
-	| UNION_KEYW IDENT class_body
-		{ record_compound($1, $2, $3, SYM_UNION); $$ = $3; }
+	| UNION_KEYW attribute_opt IDENT class_body
+		{ record_compound($1, $3, $4, SYM_UNION); $$ = $4; }
 	| ENUM_KEYW IDENT enum_body
 		{ record_compound($1, $2, $3, SYM_ENUM); $$ = $3; }
 	/*
@@ -255,7 +255,7 @@ type_specifier:
 		{ add_symbol(NULL, SYM_ENUM, NULL, 0); $$ = $2; }
 	/* Anonymous s/u definitions.  Nothing needs doing.  */
 	| STRUCT_KEYW attribute_opt class_body		{ $$ = $3; }
-	| UNION_KEYW class_body				{ $$ = $2; }
+	| UNION_KEYW attribute_opt class_body		{ $$ = $3; }
 	;
 
 simple_type_specifier:

From c825840527813582385edca3ddeee46886527258 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 14 Jan 2025 00:00:54 +0900
Subject: [PATCH 200/368] genksyms: fix syntax error for builtin (u)int*x*_t
 types

A longstanding issue with genksyms is that it has hidden syntax errors.

When a syntax error occurs, yyerror() is called. However,
error_with_pos() is a no-op unless the -w option is provided.

You can observe syntax errors by manually passing the -w option.

For example, genksyms fails to parse the following code in
arch/arm64/lib/xor-neon.c:

    static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r)
    {
            [ snip ]
    }

The syntax error occurs because genksyms does not recognize the
uint64x2_t keyword.

This commit adds support for builtin types described in Arm Neon
Intrinsics Reference.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Nicolas Schier <n.schier@avm.de>
---
 scripts/genksyms/lex.l | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/genksyms/lex.l b/scripts/genksyms/lex.l
index a1f969dcf24f1..22aeb57649d9c 100644
--- a/scripts/genksyms/lex.l
+++ b/scripts/genksyms/lex.l
@@ -51,6 +51,7 @@ MC_TOKEN		([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
 
 %%
 
+u?int(8|16|32|64)x(1|2|4|8|16)_t	return BUILTIN_INT_KEYW;
 
  /* Keep track of our location in the original source files.  */
 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n	return FILENAME;

From a23d4c2f5b80a8dc5f1e40658abbe5983af1a0e9 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 14 Jan 2025 00:00:55 +0900
Subject: [PATCH 201/368] genksyms: fix syntax error for attribute before
 init-declarator

A longstanding issue with genksyms is that it has hidden syntax errors.

For example, genksyms fails to parse the following valid code:

    int x, __attribute__((__section__(".init.data")))y;

Here, only 'y' is annotated by the attribute, although I am not aware
of actual uses of this pattern in the kernel tree.

When a syntax error occurs, yyerror() is called. However,
error_with_pos() is a no-op unless the -w option is provided.

You can observe syntax errors by manually passing the -w option.

    $ echo 'int x, __attribute__((__section__(".init.data")))y;' | scripts/genksyms/genksyms -w
    <stdin>:1: syntax error

This commit allows attributes to be placed between a comma and
init_declarator.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Nicolas Schier <n.schier@avm.de>
---
 scripts/genksyms/parse.y | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y
index a2cd035a78c95..ee600a804fa10 100644
--- a/scripts/genksyms/parse.y
+++ b/scripts/genksyms/parse.y
@@ -173,9 +173,9 @@ init_declarator_list:
 		  $$ = $1;
 		  dont_want_type_specifier = true;
 		}
-	| init_declarator_list ',' init_declarator
-		{ struct string_list *decl = *$3;
-		  *$3 = NULL;
+	| init_declarator_list ',' attribute_opt init_declarator
+		{ struct string_list *decl = *$4;
+		  *$4 = NULL;
 		  free_list(*$2, NULL);
 		  *$2 = decl_spec;
 
@@ -186,7 +186,7 @@ init_declarator_list:
 		  add_symbol(current_name,
 			     is_typedef ? SYM_TYPEDEF : SYM_NORMAL, decl, is_extern);
 		  current_name = NULL;
-		  $$ = $3;
+		  $$ = $4;
 		  dont_want_type_specifier = true;
 		}
 	;

From f764fab72d98833b47d389ac2ed35bd000132d87 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Mon, 14 Oct 2024 15:18:04 +0200
Subject: [PATCH 202/368] cifs: Change translation of
 STATUS_NOT_A_REPARSE_POINT to -ENODATA
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

STATUS_NOT_A_REPARSE_POINT indicates that object does not have reparse point
buffer attached, for example returned by FSCTL_GET_REPARSE_POINT.

Currently STATUS_NOT_A_REPARSE_POINT is translated to -EIO. Change it to
-ENODATA which better describe the situation when no reparse point is set.

Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/netmisc.c      | 7 +++++++
 fs/smb/client/nterr.c        | 1 +
 fs/smb/client/nterr.h        | 1 +
 fs/smb/client/smb2maperror.c | 2 +-
 4 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/fs/smb/client/netmisc.c b/fs/smb/client/netmisc.c
index 17b3e21ea8689..9dc16211e7a13 100644
--- a/fs/smb/client/netmisc.c
+++ b/fs/smb/client/netmisc.c
@@ -871,6 +871,13 @@ map_smb_to_linux_error(char *buf, bool logErr)
 	}
 	/* else ERRHRD class errors or junk  - return EIO */
 
+	/* special cases for NT status codes which cannot be translated to DOS codes */
+	if (smb->Flags2 & SMBFLG2_ERR_STATUS) {
+		__u32 err = le32_to_cpu(smb->Status.CifsError);
+		if (err == (NT_STATUS_NOT_A_REPARSE_POINT))
+			rc = -ENODATA;
+	}
+
 	cifs_dbg(FYI, "Mapping smb error code 0x%x to POSIX err %d\n",
 		 le32_to_cpu(smb->Status.CifsError), rc);
 
diff --git a/fs/smb/client/nterr.c b/fs/smb/client/nterr.c
index d396a8e98a81c..8f0bc441295ef 100644
--- a/fs/smb/client/nterr.c
+++ b/fs/smb/client/nterr.c
@@ -674,6 +674,7 @@ const struct nt_err_code_struct nt_errs[] = {
 	{"NT_STATUS_QUOTA_LIST_INCONSISTENT",
 	 NT_STATUS_QUOTA_LIST_INCONSISTENT},
 	{"NT_STATUS_FILE_IS_OFFLINE", NT_STATUS_FILE_IS_OFFLINE},
+	{"NT_STATUS_NOT_A_REPARSE_POINT", NT_STATUS_NOT_A_REPARSE_POINT},
 	{"NT_STATUS_NO_MORE_ENTRIES", NT_STATUS_NO_MORE_ENTRIES},
 	{"NT_STATUS_MORE_ENTRIES", NT_STATUS_MORE_ENTRIES},
 	{"NT_STATUS_SOME_UNMAPPED", NT_STATUS_SOME_UNMAPPED},
diff --git a/fs/smb/client/nterr.h b/fs/smb/client/nterr.h
index edd4741cab0a1..180602c22355e 100644
--- a/fs/smb/client/nterr.h
+++ b/fs/smb/client/nterr.h
@@ -546,6 +546,7 @@ extern const struct nt_err_code_struct nt_errs[];
 #define NT_STATUS_TOO_MANY_LINKS 0xC0000000 | 0x0265
 #define NT_STATUS_QUOTA_LIST_INCONSISTENT 0xC0000000 | 0x0266
 #define NT_STATUS_FILE_IS_OFFLINE 0xC0000000 | 0x0267
+#define NT_STATUS_NOT_A_REPARSE_POINT 0xC0000000 | 0x0275
 #define NT_STATUS_NO_SUCH_JOB 0xC0000000 | 0xEDE	/* scheduler */
 
 #endif				/* _NTERR_H */
diff --git a/fs/smb/client/smb2maperror.c b/fs/smb/client/smb2maperror.c
index b05313acf9b2b..612e7b5181b6c 100644
--- a/fs/smb/client/smb2maperror.c
+++ b/fs/smb/client/smb2maperror.c
@@ -871,7 +871,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
 	{STATUS_VALIDATE_CONTINUE, -EIO, "STATUS_VALIDATE_CONTINUE"},
 	{STATUS_NO_MATCH, -EIO, "STATUS_NO_MATCH"},
 	{STATUS_NO_MORE_MATCHES, -EIO, "STATUS_NO_MORE_MATCHES"},
-	{STATUS_NOT_A_REPARSE_POINT, -EIO, "STATUS_NOT_A_REPARSE_POINT"},
+	{STATUS_NOT_A_REPARSE_POINT, -ENODATA, "STATUS_NOT_A_REPARSE_POINT"},
 	{STATUS_IO_REPARSE_TAG_INVALID, -EIO, "STATUS_IO_REPARSE_TAG_INVALID"},
 	{STATUS_IO_REPARSE_TAG_MISMATCH, -EIO,
 	"STATUS_IO_REPARSE_TAG_MISMATCH"},

From 1f566840a82982141f94086061927a90e79440e5 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Fri, 24 Jan 2025 20:54:41 -0500
Subject: [PATCH 203/368] clocksource: Use pr_info() for "Checking clocksource
 synchronization" message

The "Checking clocksource synchronization" message is normally printed
when clocksource_verify_percpu() is called for a given clocksource if
both the CLOCK_SOURCE_UNSTABLE and CLOCK_SOURCE_VERIFY_PERCPU flags
are set.

It is an informational message and so pr_info() is the correct choice.

Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Acked-by: John Stultz <jstultz@google.com>
Link: https://lore.kernel.org/all/20250125015442.3740588-1-longman@redhat.com
---
 kernel/time/clocksource.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 7304d7cf47f2d..77d9566d3aa68 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -382,7 +382,8 @@ void clocksource_verify_percpu(struct clocksource *cs)
 		return;
 	}
 	testcpu = smp_processor_id();
-	pr_warn("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n", cs->name, testcpu, cpumask_pr_args(&cpus_chosen));
+	pr_info("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n",
+		cs->name, testcpu, cpumask_pr_args(&cpus_chosen));
 	for_each_cpu(cpu, &cpus_chosen) {
 		if (cpu == testcpu)
 			continue;

From 825c78e6a60c309a59d18d5ac5968aa79cef0bd6 Mon Sep 17 00:00:00 2001
From: Xu Lu <luxu.kernel@bytedance.com>
Date: Mon, 27 Jan 2025 17:38:46 +0800
Subject: [PATCH 204/368] irqchip/riscv: Ensure ordering of memory writes and
 IPI writes

RISC-V distinguishes between memory accesses and device I/O and uses FENCE
instruction to order them as viewed by other RISC-V harts and external
devices or coprocessors. The FENCE instruction can order any combination of
device input(I), device output(O), memory reads(R) and memory
writes(W). For example, 'fence w, o' is used to ensure all memory writes
from instructions preceding the FENCE instruction appear earlier in the
global memory order than device output writes from instructions after the
FENCE instruction.

RISC-V issues IPIs by writing to the IMSIC/ACLINT MMIO registers, which is
regarded as device output operation. However, the existing implementation
of the IMSIC/ACLINT drivers issue the IPI via writel_relaxed(), which does
not guarantee the order of device output operation and preceding memory
writes. As a consequence the hart receiving the IPI might not observe the
IPI related data.

Fix this by replacing writel_relaxed() with writel() when issuing IPIs,
which uses 'fence w, o' to ensure all previous writes made by the current
hart are visible to other harts before they receive the IPI.

Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250127093846.98625-1-luxu.kernel@bytedance.com
---
 drivers/irqchip/irq-riscv-imsic-early.c      | 2 +-
 drivers/irqchip/irq-thead-c900-aclint-sswi.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/irqchip/irq-riscv-imsic-early.c b/drivers/irqchip/irq-riscv-imsic-early.c
index c5c2e6929a2f5..275df50057057 100644
--- a/drivers/irqchip/irq-riscv-imsic-early.c
+++ b/drivers/irqchip/irq-riscv-imsic-early.c
@@ -27,7 +27,7 @@ static void imsic_ipi_send(unsigned int cpu)
 {
 	struct imsic_local_config *local = per_cpu_ptr(imsic->global.local, cpu);
 
-	writel_relaxed(IMSIC_IPI_ID, local->msi_va);
+	writel(IMSIC_IPI_ID, local->msi_va);
 }
 
 static void imsic_ipi_starting_cpu(void)
diff --git a/drivers/irqchip/irq-thead-c900-aclint-sswi.c b/drivers/irqchip/irq-thead-c900-aclint-sswi.c
index b0e366ade4271..8ff6e7a1363bd 100644
--- a/drivers/irqchip/irq-thead-c900-aclint-sswi.c
+++ b/drivers/irqchip/irq-thead-c900-aclint-sswi.c
@@ -31,7 +31,7 @@ static DEFINE_PER_CPU(void __iomem *, sswi_cpu_regs);
 
 static void thead_aclint_sswi_ipi_send(unsigned int cpu)
 {
-	writel_relaxed(0x1, per_cpu(sswi_cpu_regs, cpu));
+	writel(0x1, per_cpu(sswi_cpu_regs, cpu));
 }
 
 static void thead_aclint_sswi_ipi_clear(void)

From 987f379b54091cc1b1db986bde71cee1081350b3 Mon Sep 17 00:00:00 2001
From: Stefan Eichenberger <eichest@gmail.com>
Date: Fri, 24 Jan 2025 09:50:39 +0100
Subject: [PATCH 205/368] irqchip/irq-mvebu-icu: Fix access to msi_data from
 irq_domain::host_data

mvebu_icu_translate() incorrectly casts irq_domain::host_data directly to
mvebu_icu_msi_data. However, host_data actually points to a structure of
type msi_domain_info.

This incorrect cast causes issues such as the thermal sensors of the
CP110 platform malfunctioning. Specifically, the translation of the SEI
interrupt to IRQ_TYPE_EDGE_RISING fails, preventing proper interrupt
handling. The following error was observed:

  genirq: Setting trigger mode 4 for irq 85 failed (irq_chip_set_type_parent+0x0/0x34)
  armada_thermal f2400000.system-controller:thermal-sensor@70: Cannot request threaded IRQ 85

Resolve the issue by first casting host_data to msi_domain_info and then
accessing mvebu_icu_msi_data through msi_domain_info::chip_data.

Fixes: d929e4db22b6 ("irqchip/irq-mvebu-icu: Prepare for real per device MSI")
Signed-off-by: Stefan Eichenberger <eichest@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/all/20250124085140.44792-1-eichest@gmail.com
---
 drivers/irqchip/irq-mvebu-icu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-mvebu-icu.c b/drivers/irqchip/irq-mvebu-icu.c
index b337f6c05f184..4eebed39880a5 100644
--- a/drivers/irqchip/irq-mvebu-icu.c
+++ b/drivers/irqchip/irq-mvebu-icu.c
@@ -68,7 +68,8 @@ static int mvebu_icu_translate(struct irq_domain *d, struct irq_fwspec *fwspec,
 			       unsigned long *hwirq, unsigned int *type)
 {
 	unsigned int param_count = static_branch_unlikely(&legacy_bindings) ? 3 : 2;
-	struct mvebu_icu_msi_data *msi_data = d->host_data;
+	struct msi_domain_info *info = d->host_data;
+	struct mvebu_icu_msi_data *msi_data = info->chip_data;
 	struct mvebu_icu *icu = msi_data->icu;
 
 	/* Check the count of the parameters in dt */

From fb95897b8c60653805aa09daec575ca30983f768 Mon Sep 17 00:00:00 2001
From: Wentao Liang <vulab@iscas.ac.cn>
Date: Fri, 24 Jan 2025 11:22:28 +0800
Subject: [PATCH 206/368] xfs: Propagate errors from
 xfs_reflink_cancel_cow_range in xfs_dax_write_iomap_end

In xfs_dax_write_iomap_end(), directly return the result of
xfs_reflink_cancel_cow_range() when !written, ensuring proper
error propagation and improving code robustness.

Fixes: ea6c49b784f0 ("xfs: support CoW in fsdax mode")
Cc: stable@vger.kernel.org # v6.0
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Wentao Liang <vulab@iscas.ac.cn>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
---
 fs/xfs/xfs_iomap.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 50fa3ef89f6c9..d61460309a783 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -976,10 +976,8 @@ xfs_dax_write_iomap_end(
 	if (!xfs_is_cow_inode(ip))
 		return 0;
 
-	if (!written) {
-		xfs_reflink_cancel_cow_range(ip, pos, length, true);
-		return 0;
-	}
+	if (!written)
+		return xfs_reflink_cancel_cow_range(ip, pos, length, true);
 
 	return xfs_reflink_end_cow(ip, pos, written);
 }

From 28aecef5b1015bf6023ddc12b1a67f6678271fcb Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.ibm.com>
Date: Sun, 19 Jan 2025 22:02:38 +0530
Subject: [PATCH 207/368] selftests: livepatch: handle PRINTK_CALLER in
 check_result()

Some arch configs (like ppc64) enable CONFIG_PRINTK_CALLER,
which adds the caller id as part of the dmesg. With recent
util-linux's update 467a5b3192f16 ('dmesg: add caller_id support')
the standard "dmesg" has been enhanced to print PRINTK_CALLER fields.

Due to this, even though the expected vs observed are same,
end testcase results are failed.

 -% insmod test_modules/test_klp_livepatch.ko
 -livepatch: enabling patch 'test_klp_livepatch'
 -livepatch: 'test_klp_livepatch': initializing patching transition
 -livepatch: 'test_klp_livepatch': starting patching transition
 -livepatch: 'test_klp_livepatch': completing patching transition
 -livepatch: 'test_klp_livepatch': patching complete
 -% echo 0 > /sys/kernel/livepatch/test_klp_livepatch/enabled
 -livepatch: 'test_klp_livepatch': initializing unpatching transition
 -livepatch: 'test_klp_livepatch': starting unpatching transition
 -livepatch: 'test_klp_livepatch': completing unpatching transition
 -livepatch: 'test_klp_livepatch': unpatching complete
 -% rmmod test_klp_livepatch
 +[   T3659] % insmod test_modules/test_klp_livepatch.ko
 +[   T3682] livepatch: enabling patch 'test_klp_livepatch'
 +[   T3682] livepatch: 'test_klp_livepatch': initializing patching transition
 +[   T3682] livepatch: 'test_klp_livepatch': starting patching transition
 +[    T826] livepatch: 'test_klp_livepatch': completing patching transition
 +[    T826] livepatch: 'test_klp_livepatch': patching complete
 +[   T3659] % echo 0 > /sys/kernel/livepatch/test_klp_livepatch/enabled
 +[   T3659] livepatch: 'test_klp_livepatch': initializing unpatching transition
 +[   T3659] livepatch: 'test_klp_livepatch': starting unpatching transition
 +[    T789] livepatch: 'test_klp_livepatch': completing unpatching transition
 +[    T789] livepatch: 'test_klp_livepatch': unpatching complete
 +[   T3659] % rmmod test_klp_livepatch

  ERROR: livepatch kselftest(s) failed
 not ok 1 selftests: livepatch: test-livepatch.sh # exit=1

Currently the check_result() handles the "[time]" removal from
the dmesg. Enhance the check to also handle removal of "[Thread Id]"
or "[CPU Id]".

Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Acked-by: Miroslav Benes <mbenes@suse.cz>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Tested-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20250119163238.749847-1-maddy@linux.ibm.com
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 tools/testing/selftests/livepatch/functions.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh
index e5d06fb402335..15601402dee65 100644
--- a/tools/testing/selftests/livepatch/functions.sh
+++ b/tools/testing/selftests/livepatch/functions.sh
@@ -306,7 +306,8 @@ function check_result {
 	result=$(dmesg | awk -v last_dmesg="$LAST_DMESG" 'p; $0 == last_dmesg { p=1 }' | \
 		 grep -e 'livepatch:' -e 'test_klp' | \
 		 grep -v '\(tainting\|taints\) kernel' | \
-		 sed 's/^\[[ 0-9.]*\] //')
+		 sed 's/^\[[ 0-9.]*\] //' | \
+		 sed 's/^\[[ ]*[CT][0-9]*\] //')
 
 	if [[ "$expect" == "$result" ]] ; then
 		echo "ok"

From 26b63bee2f6e711c5a169997fd126fddcfb90848 Mon Sep 17 00:00:00 2001
From: Wentao Liang <vulab@iscas.ac.cn>
Date: Fri, 24 Jan 2025 11:45:09 +0800
Subject: [PATCH 208/368] xfs: Add error handling for
 xfs_reflink_cancel_cow_range

In xfs_inactive(), xfs_reflink_cancel_cow_range() is called
without error handling, risking unnoticed failures and
inconsistent behavior compared to other parts of the code.

Fix this issue by adding an error handling for the
xfs_reflink_cancel_cow_range(), improving code robustness.

Fixes: 6231848c3aa5 ("xfs: check for cow blocks before trying to clear them")
Cc: stable@vger.kernel.org # v4.17
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Wentao Liang <vulab@iscas.ac.cn>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
---
 fs/xfs/xfs_inode.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index c95fe1b1de4e6..b1f9f156ec888 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1404,8 +1404,11 @@ xfs_inactive(
 		goto out;
 
 	/* Try to clean out the cow blocks if there are any. */
-	if (xfs_inode_has_cow_data(ip))
-		xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
+	if (xfs_inode_has_cow_data(ip)) {
+		error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
+		if (error)
+			goto out;
+	}
 
 	if (VFS_I(ip)->i_nlink != 0) {
 		/*

From eb5c79828cfa72e8dbdf2db842a781ad6806cdaf Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sat, 25 Jan 2025 20:32:15 +0100
Subject: [PATCH 209/368] firmware: cs_dsp: FW_CS_DSP_KUNIT_TEST should not
 select REGMAP

Enabling a (modular) test should not silently enable additional kernel
functionality, as that may increase the attack vector of a product.

Fix this by making FW_CS_DSP_KUNIT_TEST (and FW_CS_DSP_KUNIT_TEST_UTILS)
depend on REGMAP instead of selecting it.

After this, one can safely enable CONFIG_KUNIT_ALL_TESTS=m to build
modules for all appropriate tests for ones system, without pulling in
extra unwanted functionality, while still allowing a tester to manually
enable REGMAP_BUILD and this test suite on a system where REGMAP is not
enabled by default.

Fixes: dd0b6b1f29b92202 ("firmware: cs_dsp: Add KUnit testing of bin file download")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://patch.msgid.link/73c81ac85e21f1c5a75b7628d90cbb0e1b4ed0fa.1737833376.git.geert@linux-m68k.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/firmware/cirrus/Kconfig | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/firmware/cirrus/Kconfig b/drivers/firmware/cirrus/Kconfig
index ee09269c63b51..0a883091259a2 100644
--- a/drivers/firmware/cirrus/Kconfig
+++ b/drivers/firmware/cirrus/Kconfig
@@ -6,15 +6,13 @@ config FW_CS_DSP
 
 config FW_CS_DSP_KUNIT_TEST_UTILS
 	tristate
-	depends on KUNIT
-	select REGMAP
+	depends on KUNIT && REGMAP
 	select FW_CS_DSP
 
 config FW_CS_DSP_KUNIT_TEST
 	tristate "KUnit tests for Cirrus Logic cs_dsp" if !KUNIT_ALL_TESTS
-	depends on KUNIT
+	depends on KUNIT && REGMAP
 	default KUNIT_ALL_TESTS
-	select REGMAP
 	select FW_CS_DSP
 	select FW_CS_DSP_KUNIT_TEST_UTILS
 	help

From 3ff53862c322aa7bb115d84348d5a641dc905d87 Mon Sep 17 00:00:00 2001
From: Daniel Baluta <daniel.baluta@nxp.com>
Date: Mon, 27 Jan 2025 10:34:22 +0200
Subject: [PATCH 210/368] ASoC: amd: acp: Fix possible deadlock

On error path, function acp_i2s_set_tdm_slot returns without releasing
the lock and this could result in potential deadlocks in the future.

Error reported by sparse:
sound/soc/amd/acp/acp-i2s.c:95:12: error: context imbalance in
'acp_i2s_set_tdm_slot' - different lock contexts for basic block

Fixes: cd60dec8994c ("ASoC: amd: acp: Refactor TDM slots selction based on acp revision id")
Signed-off-by: Daniel Baluta <daniel.baluta@nxp.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.dev>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://patch.msgid.link/20250127083422.20406-1-daniel.baluta@nxp.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/amd/acp/acp-i2s.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/amd/acp/acp-i2s.c b/sound/soc/amd/acp/acp-i2s.c
index 1f59ee248771c..89e99ed4275a2 100644
--- a/sound/soc/amd/acp/acp-i2s.c
+++ b/sound/soc/amd/acp/acp-i2s.c
@@ -181,6 +181,7 @@ static int acp_i2s_set_tdm_slot(struct snd_soc_dai *dai, u32 tx_mask, u32 rx_mas
 			break;
 		default:
 			dev_err(dev, "Unknown chip revision %d\n", chip->acp_rev);
+			spin_unlock_irq(&adata->acp_lock);
 			return -EINVAL;
 		}
 	}

From cc77e2ce187d26cc66af3577bf896d7410eb25ab Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel@debian.org>
Date: Sat, 18 Jan 2025 06:36:43 +0100
Subject: [PATCH 211/368] ata: libata-core: Add ATA_QUIRK_NOLPM for Samsung SSD
 870 QVO drives

Disabling link power management on Samsung SSD 870 QVO drives
to make them work again after the switch of the default LPM
policy to low.

Testing so far has shown that regular Samsung SSD 870
(the non QVO variants) do not need it and work fine with
the default LPM policy.

Cc: stable@vger.kernel.org
Fixes: 7627a0edef54 ("ata: ahci: Drop low power policy board type")
Signed-off-by: Daniel Baumann <daniel@debian.org>
Link: https://lore.kernel.org/linux-ide/ac64a484-022c-42a0-95bc-1520333b1536@debian.org/
Signed-off-by: Niklas Cassel <cassel@kernel.org>
---
 drivers/ata/libata-core.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index c085dd81ebe7f..63ec2f2184319 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4143,6 +4143,10 @@ static const struct ata_dev_quirks_entry __ata_dev_quirks[] = {
 	{ "Samsung SSD 860*",		NULL,	ATA_QUIRK_NO_NCQ_TRIM |
 						ATA_QUIRK_ZERO_AFTER_TRIM |
 						ATA_QUIRK_NO_NCQ_ON_ATI },
+	{ "Samsung SSD 870 QVO*",	NULL,	ATA_QUIRK_NO_NCQ_TRIM |
+						ATA_QUIRK_ZERO_AFTER_TRIM |
+						ATA_QUIRK_NO_NCQ_ON_ATI |
+						ATA_QUIRK_NOLPM },
 	{ "Samsung SSD 870*",		NULL,	ATA_QUIRK_NO_NCQ_TRIM |
 						ATA_QUIRK_ZERO_AFTER_TRIM |
 						ATA_QUIRK_NO_NCQ_ON_ATI },

From 2f0805d7c08bea71c95561bfb3e45d93b05196b9 Mon Sep 17 00:00:00 2001
From: Liang Jie <liangjie@lixiang.com>
Date: Fri, 10 Jan 2025 18:05:24 +0800
Subject: [PATCH 212/368] ceph: streamline request head structures in MDS
 client

The existence of the ceph_mds_request_head_old structure in the MDS
client code is no longer required due to improvements in handling
different MDS request header versions. This patch removes the now
redundant ceph_mds_request_head_old structure and replaces its usage
with the flexible and extensible ceph_mds_request_head structure.

Changes include:
- Modification of find_legacy_request_head to directly cast the
  pointer to ceph_mds_request_head_legacy without going through the
  old structure.
- Update sizeof calculations in create_request_message to use
  offsetofend for consistency and future-proofing, rather than
  referencing the old structure.
- Use of the structured ceph_mds_request_head directly instead of the
  old one.

Additionally, this consolidation normalizes the handling of
request_head_version v1 to align with versions v2 and v3, leading to
a more consistent and maintainable codebase.

These changes simplify the codebase and reduce potential confusion
stemming from the existence of an obsolete structure.

Signed-off-by: Liang Jie <liangjie@lixiang.com>
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c         | 16 ++++++++--------
 include/linux/ceph/ceph_fs.h | 14 --------------
 2 files changed, 8 insertions(+), 22 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index ae37f0e24c996..921f08a27dd7a 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2945,12 +2945,12 @@ static struct ceph_mds_request_head_legacy *
 find_legacy_request_head(void *p, u64 features)
 {
 	bool legacy = !(features & CEPH_FEATURE_FS_BTIME);
-	struct ceph_mds_request_head_old *ohead;
+	struct ceph_mds_request_head *head;
 
 	if (legacy)
 		return (struct ceph_mds_request_head_legacy *)p;
-	ohead = (struct ceph_mds_request_head_old *)p;
-	return (struct ceph_mds_request_head_legacy *)&ohead->oldest_client_tid;
+	head = (struct ceph_mds_request_head *)p;
+	return (struct ceph_mds_request_head_legacy *)&head->oldest_client_tid;
 }
 
 /*
@@ -3020,7 +3020,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
 	if (legacy)
 		len = sizeof(struct ceph_mds_request_head_legacy);
 	else if (request_head_version == 1)
-		len = sizeof(struct ceph_mds_request_head_old);
+		len = offsetofend(struct ceph_mds_request_head, args);
 	else if (request_head_version == 2)
 		len = offsetofend(struct ceph_mds_request_head, ext_num_fwd);
 	else
@@ -3104,11 +3104,11 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
 		msg->hdr.version = cpu_to_le16(3);
 		p = msg->front.iov_base + sizeof(*lhead);
 	} else if (request_head_version == 1) {
-		struct ceph_mds_request_head_old *ohead = msg->front.iov_base;
+		struct ceph_mds_request_head *nhead = msg->front.iov_base;
 
 		msg->hdr.version = cpu_to_le16(4);
-		ohead->version = cpu_to_le16(1);
-		p = msg->front.iov_base + sizeof(*ohead);
+		nhead->version = cpu_to_le16(1);
+		p = msg->front.iov_base + offsetofend(struct ceph_mds_request_head, args);
 	} else if (request_head_version == 2) {
 		struct ceph_mds_request_head *nhead = msg->front.iov_base;
 
@@ -3265,7 +3265,7 @@ static int __prepare_send_request(struct ceph_mds_session *session,
 	 * so we limit to retry at most 256 times.
 	 */
 	if (req->r_attempts) {
-	       old_max_retry = sizeof_field(struct ceph_mds_request_head_old,
+	       old_max_retry = sizeof_field(struct ceph_mds_request_head,
 					    num_retry);
 	       old_max_retry = 1 << (old_max_retry * BITS_PER_BYTE);
 	       if ((old_version && req->r_attempts >= old_max_retry) ||
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 2d7d86f0290d9..c7f2c63b3bc3f 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -504,20 +504,6 @@ struct ceph_mds_request_head_legacy {
 
 #define CEPH_MDS_REQUEST_HEAD_VERSION  3
 
-struct ceph_mds_request_head_old {
-	__le16 version;                /* struct version */
-	__le64 oldest_client_tid;
-	__le32 mdsmap_epoch;           /* on client */
-	__le32 flags;                  /* CEPH_MDS_FLAG_* */
-	__u8 num_retry, num_fwd;       /* count retry, fwd attempts */
-	__le16 num_releases;           /* # include cap/lease release records */
-	__le32 op;                     /* mds op code */
-	__le32 caller_uid, caller_gid;
-	__le64 ino;                    /* use this ino for openc, mkdir, mknod,
-					  etc. (if replaying) */
-	union ceph_mds_request_args_ext args;
-} __attribute__ ((packed));
-
 struct ceph_mds_request_head {
 	__le16 version;                /* struct version */
 	__le64 oldest_client_tid;

From 3981be13ec1baf811bfb93ed6a98bafc85cdeab1 Mon Sep 17 00:00:00 2001
From: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Date: Fri, 24 Jan 2025 11:46:23 -0800
Subject: [PATCH 213/368] ceph: exchange hardcoded value on NAME_MAX

Initially, ceph_fs_debugfs_init() had temporary
name buffer with hardcoded length of 80 symbols.
Then, it was hardcoded again for 100 symbols.
Finally, it makes sense to exchange hardcoded
value on properly defined constant and 255 symbols
should be enough for any name case.

Signed-off-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Reviewed-by: Patrick Donnelly <pdonnell@ibm.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index fdf9dc15eafae..fdd404fc81124 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -412,7 +412,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
 
 void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 {
-	char name[100];
+	char name[NAME_MAX];
 
 	doutc(fsc->client, "begin\n");
 	fsc->debugfs_congestion_kb =

From 5aa21b0495df1fac6d39f45011c1572bb431c44c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 27 Jan 2025 15:30:44 +0100
Subject: [PATCH 214/368] loop: don't clear LO_FLAGS_PARTSCAN on
 LOOP_SET_STATUS{,64}

LOOP_SET_STATUS{,64} can set a lot more flags than it is supposed to
clear (the LOOP_SET_STATUS_CLEARABLE_FLAGS vs
LOOP_SET_STATUS_SETTABLE_FLAGS defines should have been a hint..).

Fix this by only clearing the bits in LOOP_SET_STATUS_CLEARABLE_FLAGS.

Fixes: ae074d07a0e5 ("loop: move updating lo_flag s out of loop_set_status_from_info")
Reported-by: kernel test robot <oliver.sang@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20250127143045.538279-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 1ec7417c7f005..d1f1d6bef2e69 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1281,8 +1281,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 	partscan = !(lo->lo_flags & LO_FLAGS_PARTSCAN) &&
 		(info->lo_flags & LO_FLAGS_PARTSCAN);
 
-	lo->lo_flags &= ~(LOOP_SET_STATUS_SETTABLE_FLAGS |
-			  LOOP_SET_STATUS_CLEARABLE_FLAGS);
+	lo->lo_flags &= ~LOOP_SET_STATUS_CLEARABLE_FLAGS;
 	lo->lo_flags |= (info->lo_flags & LOOP_SET_STATUS_SETTABLE_FLAGS);
 
 	if (size_changed) {

From 6c1bb4031729871fa203983bd77bed1ee3c61347 Mon Sep 17 00:00:00 2001
From: Ondrej Jirman <megi@xff.cz>
Date: Mon, 27 Jan 2025 16:04:55 +0100
Subject: [PATCH 215/368] ASoC: codec: es8316: "DAC Soft Ramp Rate" is just a 2
 bit control

Max value should be 3, otherwise "DAC Soft Ramp Switch" will be
overwritten by this control.

Signed-off-by: Ondrej Jirman <megi@xff.cz>
Link: https://patch.msgid.link/20250127150458.1489425-1-megi@xff.cz
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/es8316.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/codecs/es8316.c b/sound/soc/codecs/es8316.c
index 61729e5b50a8e..71aad3e4c83f8 100644
--- a/sound/soc/codecs/es8316.c
+++ b/sound/soc/codecs/es8316.c
@@ -99,7 +99,7 @@ static const struct snd_kcontrol_new es8316_snd_controls[] = {
 	SOC_DOUBLE_R_TLV("DAC Playback Volume", ES8316_DAC_VOLL,
 			 ES8316_DAC_VOLR, 0, 0xc0, 1, dac_vol_tlv),
 	SOC_SINGLE("DAC Soft Ramp Switch", ES8316_DAC_SET1, 4, 1, 1),
-	SOC_SINGLE("DAC Soft Ramp Rate", ES8316_DAC_SET1, 2, 4, 0),
+	SOC_SINGLE("DAC Soft Ramp Rate", ES8316_DAC_SET1, 2, 3, 0),
 	SOC_SINGLE("DAC Notch Filter Switch", ES8316_DAC_SET2, 6, 1, 0),
 	SOC_SINGLE("DAC Double Fs Switch", ES8316_DAC_SET2, 7, 1, 0),
 	SOC_SINGLE("DAC Stereo Enhancement", ES8316_DAC_SET3, 0, 7, 0),

From debe797c1e972ebe434c90f3fa7f54d9cf7ab251 Mon Sep 17 00:00:00 2001
From: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Date: Thu, 5 Dec 2024 19:26:14 +0100
Subject: [PATCH 216/368] tools/power turbostat: Add fixed RAPL PSYS divisor
 for SPR

Intel Sapphire Rapids is an exception and has fixed divisor for RAPL PSYS
counter set to 1.0. Add a platform bit and enable it for SPR.

Reported-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index a2ca1c6c3638a..1bcecfed721b2 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -358,7 +358,7 @@ unsigned long long cpuidle_cur_sys_lpi_us;
 unsigned int tj_max;
 unsigned int tj_max_override;
 double rapl_power_units, rapl_time_units;
-double rapl_dram_energy_units, rapl_energy_units;
+double rapl_dram_energy_units, rapl_energy_units, rapl_psys_energy_units;
 double rapl_joule_counter_range;
 unsigned int crystal_hz;
 unsigned long long tsc_hz;
@@ -424,6 +424,7 @@ struct platform_features {
 	bool has_per_core_rapl;	/* Indicates cores energy collection is per-core, not per-package. AMD specific for now */
 	bool has_rapl_divisor;	/* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */
 	bool has_fixed_rapl_unit;	/* Fixed Energy Unit used for DRAM RAPL Domain */
+	bool has_fixed_rapl_psys_unit;	/* Fixed Energy Unit used for PSYS RAPL Domain */
 	int rapl_quirk_tdp;	/* Hardcoded TDP value when cannot be retrieved from hardware */
 	int tcc_offset_bits;	/* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */
 	bool enable_tsc_tweak;	/* Use CPU Base freq instead of TSC freq for aperf/mperf counter */
@@ -824,6 +825,7 @@ static const struct platform_features spr_features = {
 	.has_msr_core_c1_res = 1,
 	.has_irtl_msrs = 1,
 	.has_cst_prewake_bit = 1,
+	.has_fixed_rapl_psys_unit = 1,
 	.trl_msrs = TRL_BASE | TRL_CORECOUNT,
 	.rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
 };
@@ -1292,7 +1294,7 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = {
 	 .msr = MSR_PLATFORM_ENERGY_STATUS,
 	 .msr_mask = 0x00000000FFFFFFFF,
 	 .msr_shift = 0,
-	 .platform_rapl_msr_scale = &rapl_energy_units,
+	 .platform_rapl_msr_scale = &rapl_psys_energy_units,
 	 .rci_index = RAPL_RCI_INDEX_ENERGY_PLATFORM,
 	 .bic = BIC_SysWatt | BIC_Sys_J,
 	 .compat_scale = 1.0,
@@ -7112,6 +7114,11 @@ void rapl_probe_intel(void)
 	else
 		rapl_dram_energy_units = rapl_energy_units;
 
+	if (platform->has_fixed_rapl_psys_unit)
+		rapl_psys_energy_units = 1.0;
+	else
+		rapl_psys_energy_units = rapl_energy_units;
+
 	time_unit = msr >> 16 & 0xF;
 	if (time_unit == 0)
 		time_unit = 0xA;

From 1af5baeda512d0940748fdf9b559e1041dbab0cf Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 8 Jan 2025 14:19:42 +0800
Subject: [PATCH 217/368] tools/power turbostat: Enhance turbostat
 self-performance visibility

Include procfs and sysfs data collection time in the system summary
row of the "usec" column.  This is useful for isolating where the
time goes during turbostat data collection.

Background:

Column "usec" shows
1. the number of microseconds elapsed during counter collection,
   including thread migration -- if any, for each CPU row.
2. total elapsed time to collect the counters on all cpus, for the
   summary row.
This can be used to check the time cost of a give column. For example,
run below commands separately
   turbostat --show usec sleep 1
   turbostat --show usec,CoreTmp sleep 1
and the delta in the usec column will tell the time cost for CoreTmp
(Thermal MSR read)

Problem:

Some of the kernel procfs/sysfs accesses are expensive, especially on
high core count systems. "usec" column cannot tell this because it only
includes the time cost of the counters.

Solution:

Leave the per CPU "usec" as it is and modify the summary "usec" to
include the time cost of the procfs/sysfs snapshot.

With it, the "usec" column can be used to get
1. the baseline, e.g.
	turbostat --show usec sleep 1
2. the baseline + some per CPU counter cost, e.g.
	turbostat --show usec,CoreTmp sleep 1
3. the baseline + some per CPU sysfs cost, e.g.
	turbostat --show usec,C1 sleep 1
4. the baseline + /proc/interrupts cost, e.g
	turbostat --show usec,IRQ sleep 1

Man-page update is also included.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 | 2 +-
 tools/power/x86/turbostat/turbostat.c | 7 ++++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index f043a93defd4a..99bf905ade812 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -136,7 +136,7 @@ displays the statistics gathered since it was forked.
 The system configuration dump (if --quiet is not used) is followed by statistics.  The first row of the statistics labels the content of each column (below).  The second row of statistics is the system summary line.  The system summary line has a '-' in the columns for the Package, Core, and CPU.  The contents of the system summary line depends on the type of column.  Columns that count items (eg. IRQ) show the sum across all CPUs in the system.  Columns that show a percentage show the average across all CPUs in the system.  Columns that dump raw MSR values simply show 0 in the summary.  After the system summary row, each row describes a specific Package/Core/CPU.  Note that if the --cpu parameter is used to limit which specific CPUs are displayed, turbostat will still collect statistics for all CPUs in the system and will still show the system summary for all CPUs in the system.
 .SH COLUMN DESCRIPTIONS
 .PP
-\fBusec\fP For each CPU, the number of microseconds elapsed during counter collection, including thread migration -- if any.  This counter is disabled by default, and is enabled with "--enable usec", or --debug.  On the summary row, usec refers to the total elapsed time to collect the counters on all cpus.
+\fBusec\fP For each CPU, the number of microseconds elapsed during counter collection, including thread migration -- if any.  This counter is disabled by default, and is enabled with "--enable usec", or --debug.  On the summary row, usec refers to the total elapsed time to snapshot the procfs/sysfs and collect the counters on all cpus.
 .PP
 \fBTime_Of_Day_Seconds\fP For each CPU, the gettimeofday(2) value (seconds.subsec since Epoch) when the counters ending the measurement interval were collected.  This column is disabled by default, and can be enabled with "--enable Time_Of_Day_Seconds" or "--debug".  On the summary row, Time_Of_Day_Seconds refers to the timestamp following collection of counters on the last CPU.
 .PP
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 1bcecfed721b2..adcf5f0a06334 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -370,6 +370,9 @@ unsigned int has_hwp_activity_window;	/* IA32_HWP_REQUEST[bits 41:32] */
 unsigned int has_hwp_epp;	/* IA32_HWP_REQUEST[bits 31:24] */
 unsigned int has_hwp_pkg;	/* IA32_HWP_REQUEST_PKG */
 unsigned int first_counter_read = 1;
+
+static struct timeval procsysfs_tv_begin;
+
 int ignore_stdin;
 bool no_msr;
 bool no_perf;
@@ -3638,7 +3641,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 
 	/* remember first tv_begin */
 	if (average.threads.tv_begin.tv_sec == 0)
-		average.threads.tv_begin = t->tv_begin;
+		average.threads.tv_begin = procsysfs_tv_begin;
 
 	/* remember last tv_end */
 	average.threads.tv_end = t->tv_end;
@@ -5983,6 +5986,8 @@ int snapshot_sys_lpi_us(void)
  */
 int snapshot_proc_sysfs_files(void)
 {
+	gettimeofday(&procsysfs_tv_begin, (struct timezone *)NULL);
+
 	if (DO_BIC(BIC_IRQ) || DO_BIC(BIC_NMI))
 		if (snapshot_proc_interrupts())
 			return 1;

From 7c6fee25bdf5c8f8a1bcc6fa3566fffb7fe9eb9a Mon Sep 17 00:00:00 2001
From: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Date: Tue, 14 Jan 2025 16:11:28 +0100
Subject: [PATCH 218/368] tools/power turbostat: Check for non-zero value when
 MSR probing

For some MSRs, for example, the Platform Energy Counter (RAPL PSYS), it
is required to additionally check for a non-zero value to confirm that
it is present.

From Intel SDM vol. 4:

    Platform Energy Counter (R/O)
    This MSR is valid only if both platform vendor hardware
    implementation and BIOS enablement support it.
    This MSR will read 0 if not valid.

Signed-off-by: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index adcf5f0a06334..6b72b922e2f5d 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2113,13 +2113,17 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
 int probe_msr(int cpu, off_t offset)
 {
 	ssize_t retval;
-	unsigned long long dummy;
+	unsigned long long value;
 
 	assert(!no_msr);
 
-	retval = pread(get_msr_fd(cpu), &dummy, sizeof(dummy), offset);
+	retval = pread(get_msr_fd(cpu), &value, sizeof(value), offset);
 
-	if (retval != sizeof(dummy))
+	/*
+	 * Expect MSRs to accumulate some non-zero value since the system was powered on.
+	 * Treat zero as a read failure.
+	 */
+	if (retval != sizeof(value) || value == 0)
 		return 1;
 
 	return 0;

From 34537ddd208d614dbefeb97823ae1c79e7771588 Mon Sep 17 00:00:00 2001
From: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Date: Tue, 10 Dec 2024 18:27:38 +0100
Subject: [PATCH 219/368] tools/power turbostat: Return default value for
 unmapped PMT domains

When requesting PMT counters with --add command, user may want to skip
specifying values for all the domains (that is, cpu, core, package etc).
For the domains that user did not provide information on how to read the
counter, return default value - zero.

Signed-off-by: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 6b72b922e2f5d..60b1ade8659b5 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -4615,7 +4615,8 @@ unsigned long pmt_gen_value_mask(unsigned int lsb, unsigned int msb)
 
 unsigned long pmt_read_counter(struct pmt_counter *ppmt, unsigned int domain_id)
 {
-	assert(domain_id < ppmt->num_domains);
+	if (domain_id >= ppmt->num_domains)
+		return 0;
 
 	const unsigned long *pmmio = ppmt->domains[domain_id].pcounter;
 	const unsigned long value = pmmio ? *pmmio : 0;

From 089134cb0502ba962bce9402ce96e0875876d401 Mon Sep 17 00:00:00 2001
From: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Date: Tue, 10 Dec 2024 11:41:58 +0100
Subject: [PATCH 220/368] tools/power turbostat: Extend PMT identification with
 a sequence number

When platforms expose multiple PMT aggregators with the same GUID, the
only way to identify them and map to specific domain is by reading them
in an order they were exposed via PCIe. Intel PMT kernel driver does
keep the same order and numbers the telemetry directories accordingly.

Use GUID and sequence number (order) to uniquely identify PMT
aggregators.

Signed-off-by: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 60b1ade8659b5..14c4958867463 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1536,6 +1536,7 @@ static struct msr_counter_arch_info msr_counter_arch_infos[] = {
 #define PMT_COUNTER_MTL_DC6_LSB    0
 #define PMT_COUNTER_MTL_DC6_MSB    63
 #define PMT_MTL_DC6_GUID           0x1a067102
+#define PMT_MTL_DC6_SEQ            0
 
 #define PMT_COUNTER_NAME_SIZE_BYTES      16
 #define PMT_COUNTER_TYPE_NAME_SIZE_BYTES 32
@@ -9083,7 +9084,7 @@ void *pmt_get_counter_pointer(struct pmt_mmio *pmmio, unsigned long counter_offs
 	return ret;
 }
 
-struct pmt_mmio *pmt_add_guid(unsigned int guid)
+struct pmt_mmio *pmt_add_guid(unsigned int guid, unsigned int seq)
 {
 	struct pmt_mmio *ret;
 
@@ -9091,6 +9092,11 @@ struct pmt_mmio *pmt_add_guid(unsigned int guid)
 	if (!ret)
 		ret = pmt_mmio_open(guid);
 
+	while (ret && seq) {
+		ret = ret->next;
+		--seq;
+	}
+
 	return ret;
 }
 
@@ -9137,7 +9143,7 @@ void pmt_counter_add_domain(struct pmt_counter *pcounter, unsigned long *pmmio,
 	pcounter->domains[domain_id].pcounter = pmmio;
 }
 
-int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type,
+int pmt_add_counter(unsigned int guid, unsigned int seq, const char *name, enum pmt_datatype type,
 		    unsigned int lsb, unsigned int msb, unsigned int offset, enum counter_scope scope,
 		    enum counter_format format, unsigned int domain_id, enum pmt_open_mode mode)
 {
@@ -9157,10 +9163,10 @@ int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type,
 		exit(1);
 	}
 
-	mmio = pmt_add_guid(guid);
+	mmio = pmt_add_guid(guid, seq);
 	if (!mmio) {
 		if (mode != PMT_OPEN_TRY) {
-			fprintf(stderr, "%s: failed to map PMT MMIO for guid %x\n", __func__, guid);
+			fprintf(stderr, "%s: failed to map PMT MMIO for guid %x, seq %u\n", __func__, guid, seq);
 			exit(1);
 		}
 
@@ -9216,9 +9222,9 @@ int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type,
 void pmt_init(void)
 {
 	if (BIC_IS_ENABLED(BIC_Diec6)) {
-		pmt_add_counter(PMT_MTL_DC6_GUID, "Die%c6", PMT_TYPE_XTAL_TIME, PMT_COUNTER_MTL_DC6_LSB,
-				PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET, SCOPE_PACKAGE, FORMAT_DELTA,
-				0, PMT_OPEN_TRY);
+		pmt_add_counter(PMT_MTL_DC6_GUID, PMT_MTL_DC6_SEQ, "Die%c6", PMT_TYPE_XTAL_TIME,
+				PMT_COUNTER_MTL_DC6_LSB, PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET,
+				SCOPE_PACKAGE, FORMAT_DELTA, 0, PMT_OPEN_TRY);
 	}
 }
 
@@ -9699,6 +9705,7 @@ void parse_add_command_pmt(char *add_command)
 	unsigned int lsb;
 	unsigned int msb;
 	unsigned int guid;
+	unsigned int seq = 0; /* By default, pick first file in a sequence with a given GUID. */
 	unsigned int domain_id;
 	enum counter_scope scope = 0;
 	enum pmt_datatype type = PMT_TYPE_RAW;
@@ -9778,6 +9785,10 @@ void parse_add_command_pmt(char *add_command)
 			goto next;
 		}
 
+		if (sscanf(add_command, "seq=%x", &seq) == 1) {
+			goto next;
+		}
+
 next:
 		add_command = strchr(add_command, ',');
 		if (add_command) {
@@ -9864,7 +9875,7 @@ void parse_add_command_pmt(char *add_command)
 		exit(1);
 	}
 
-	pmt_add_counter(guid, name, type, lsb, msb, offset, scope, format, domain_id, PMT_OPEN_REQUIRED);
+	pmt_add_counter(guid, seq, name, type, lsb, msb, offset, scope, format, domain_id, PMT_OPEN_REQUIRED);
 }
 
 void parse_add_command(char *add_command)

From 4265a86582eaa224d171328be0c71e2a7ccd194f Mon Sep 17 00:00:00 2001
From: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Date: Thu, 12 Dec 2024 18:59:25 +0100
Subject: [PATCH 221/368] tools/power turbostat: Add PMT directory iterator
 helper

PMT directories exposed in sysfs use the following pattern:
  telem%u
for example:
  telem0, telem2, telem3, ..., telem15, telem16

This naming scheme preserves the ordering from the PCIe discovery, which
is important to correctly map the telemetry directory to the specific
domain (cpu, core, package etc).

Because readdir() traverses the entries in alphabetical order, causing
for example "telem13" to be traversed before "telem3", it is necessary
to use scandir() with custom compare() callback to preserve the PCIe
ordering.

Signed-off-by: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 87 +++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 14c4958867463..6104d5bcca5c1 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1589,6 +1589,93 @@ struct pmt_counter {
 	struct pmt_domain_info *domains;
 };
 
+/*
+ * PMT telemetry directory iterator.
+ * Used to iterate telemetry files in sysfs in correct order.
+ */
+struct pmt_diriter_t
+{
+	DIR *dir;
+	struct dirent **namelist;
+	unsigned int num_names;
+	unsigned int current_name_idx;
+};
+
+int pmt_telemdir_filter(const struct dirent *e)
+{
+	unsigned int dummy;
+	return sscanf(e->d_name, "telem%u", &dummy);
+}
+
+int pmt_telemdir_sort(const struct dirent **a, const struct dirent **b)
+{
+	unsigned int aidx = 0, bidx = 0;
+
+	sscanf((*a)->d_name, "telem%u", &aidx);
+	sscanf((*b)->d_name, "telem%u", &bidx);
+
+	return aidx >= bidx;
+}
+
+const struct dirent* pmt_diriter_next(struct pmt_diriter_t *iter)
+{
+	const struct dirent *ret = NULL;
+
+	if (!iter->dir)
+		return NULL;
+
+	if (iter->current_name_idx >= iter->num_names)
+		return NULL;
+
+	ret = iter->namelist[iter->current_name_idx];
+	++iter->current_name_idx;
+
+	return ret;
+}
+
+const struct dirent* pmt_diriter_begin(struct pmt_diriter_t *iter, const char *pmt_root_path)
+{
+	int num_names = iter->num_names;
+
+	if (!iter->dir) {
+		iter->dir = opendir(pmt_root_path);
+		if (iter->dir == NULL)
+			return NULL;
+
+		num_names = scandir(pmt_root_path, &iter->namelist, pmt_telemdir_filter, pmt_telemdir_sort);
+		if (num_names == -1)
+			return NULL;
+	}
+
+	iter->current_name_idx = 0;
+	iter->num_names = num_names;
+
+	return pmt_diriter_next(iter);
+}
+
+void pmt_diriter_init(struct pmt_diriter_t *iter)
+{
+	memset(iter, 0, sizeof(*iter));
+}
+
+void pmt_diriter_remove(struct pmt_diriter_t *iter)
+{
+	if (iter->namelist) {
+		for (unsigned int i = 0; i < iter->num_names; i++) {
+			free(iter->namelist[i]);
+			iter->namelist[i] = NULL;
+		}
+	}
+
+	free(iter->namelist);
+	iter->namelist = NULL;
+	iter->num_names = 0;
+	iter->current_name_idx = 0;
+
+	closedir(iter->dir);
+	iter->dir = NULL;
+}
+
 unsigned int pmt_counter_get_width(const struct pmt_counter *p)
 {
 	return (p->msb - p->lsb) + 1;

From 16ce467875ef8572b82f9af30fcf7b2f65fc2e95 Mon Sep 17 00:00:00 2001
From: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Date: Fri, 6 Dec 2024 11:22:00 +0100
Subject: [PATCH 222/368] tools/power turbostat: Allow mapping multiple PMT
 files with the same GUID

Some platforms may expose multiple telemetry files identified with the
same GUID. Interpreting it correctly, to associate given counter with a
CPU, core or a package requires more metadata from the user.

Parse and create  ordered, linked list of those PMT aggregators, so that
we can identify specific aggregator with GUID + sequence number.

Signed-off-by: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 75 ++++++++++++++-------------
 1 file changed, 40 insertions(+), 35 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 6104d5bcca5c1..f76e1de3f9687 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -9033,46 +9033,35 @@ int parse_telem_info_file(int fd_dir, const char *info_filename, const char *for
 
 struct pmt_mmio *pmt_mmio_open(unsigned int target_guid)
 {
-	DIR *dirp;
-	struct dirent *entry;
+	struct pmt_diriter_t pmt_iter;
+	const struct dirent *entry;
 	struct stat st;
-	unsigned int telem_idx;
 	int fd_telem_dir, fd_pmt;
 	unsigned long guid, size, offset;
 	size_t mmap_size;
 	void *mmio;
-	struct pmt_mmio *ret = NULL;
+	struct pmt_mmio *head = NULL, *last = NULL;
+	struct pmt_mmio *new_pmt = NULL;
 
 	if (stat(SYSFS_TELEM_PATH, &st) == -1)
 		return NULL;
 
-	dirp = opendir(SYSFS_TELEM_PATH);
-	if (dirp == NULL)
+	pmt_diriter_init(&pmt_iter);
+	entry = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH);
+	if (!entry) {
+		pmt_diriter_remove(&pmt_iter);
 		return NULL;
+	}
 
-	for (;;) {
-		entry = readdir(dirp);
-
-		if (entry == NULL)
-			break;
-
-		if (strcmp(entry->d_name, ".") == 0)
-			continue;
-
-		if (strcmp(entry->d_name, "..") == 0)
-			continue;
-
-		if (sscanf(entry->d_name, "telem%u", &telem_idx) != 1)
-			continue;
-
-		if (fstatat(dirfd(dirp), entry->d_name, &st, 0) == -1) {
+	for (;entry != NULL; entry = pmt_diriter_next(&pmt_iter)) {
+		if (fstatat(dirfd(pmt_iter.dir), entry->d_name, &st, 0) == -1) {
 			break;
 		}
 
 		if (!S_ISDIR(st.st_mode))
 			continue;
 
-		fd_telem_dir = openat(dirfd(dirp), entry->d_name, O_RDONLY);
+		fd_telem_dir = openat(dirfd(pmt_iter.dir), entry->d_name, O_RDONLY);
 		if (fd_telem_dir == -1) {
 			break;
 		}
@@ -9106,35 +9095,51 @@ struct pmt_mmio *pmt_mmio_open(unsigned int target_guid)
 		mmap_size = ROUND_UP_TO_PAGE_SIZE(size);
 		mmio = mmap(0, mmap_size, PROT_READ, MAP_SHARED, fd_pmt, 0);
 		if (mmio != MAP_FAILED) {
-
 			if (debug)
 				fprintf(stderr, "%s: 0x%lx mmaped at: %p\n", __func__, guid, mmio);
 
-			ret = calloc(1, sizeof(*ret));
+			new_pmt = calloc(1, sizeof(*new_pmt));
 
-			if (!ret) {
+			if (!new_pmt) {
 				fprintf(stderr, "%s: Failed to allocate pmt_mmio\n", __func__);
 				exit(1);
 			}
 
-			ret->guid = guid;
-			ret->mmio_base = mmio;
-			ret->pmt_offset = offset;
-			ret->size = size;
+			/*
+			 * Create linked list of mmaped regions,
+			 * but preserve the ordering from sysfs.
+			 * Ordering is important for the user to
+			 * use the seq=%u parameter when adding a counter.
+			 */
+			new_pmt->guid = guid;
+			new_pmt->mmio_base = mmio;
+			new_pmt->pmt_offset = offset;
+			new_pmt->size = size;
+			new_pmt->next = pmt_mmios;
+
+			if (last)
+				last->next = new_pmt;
+			else
+				head = new_pmt;
 
-			ret->next = pmt_mmios;
-			pmt_mmios = ret;
+			last = new_pmt;
 		}
 
 loop_cleanup_and_break:
 		close(fd_pmt);
 		close(fd_telem_dir);
-		break;
 	}
 
-	closedir(dirp);
+	pmt_diriter_remove(&pmt_iter);
 
-	return ret;
+	/*
+	 * If we found something, stick just
+	 * created linked list to the front.
+	 */
+	if (head)
+		pmt_mmios = head;
+
+	return head;
 }
 
 struct pmt_mmio *pmt_mmio_find(unsigned int guid)

From 83fbeb9f9776cd044d36af606127f56206337bab Mon Sep 17 00:00:00 2001
From: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Date: Thu, 12 Dec 2024 19:11:34 +0100
Subject: [PATCH 223/368] tools/power turbostat: Allow adding PMT counters
 directly by sysfs path

Allow user to add PMT counters by either identifying the source with:
  guid=%u,seq=%u
or, since this patch, with direct sysfs path:
  path=%s, for example path=/sys/class/intel_pmt/telem5

In the later case, the guid and sequence number will be infered
by turbostat.

Signed-off-by: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 107 +++++++++++++++++++++++++-
 1 file changed, 106 insertions(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index f76e1de3f9687..0f2475fa9fa4e 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -9788,11 +9788,96 @@ bool starts_with(const char *str, const char *prefix)
 	return strncmp(prefix, str, strlen(prefix)) == 0;
 }
 
+int pmt_parse_from_path(const char *target_path, unsigned int *out_guid, unsigned int *out_seq)
+{
+	struct pmt_diriter_t pmt_iter;
+	const struct dirent *dirname;
+	struct stat stat, target_stat;
+	int fd_telem_dir = -1;
+	int fd_target_dir;
+	unsigned int seq = 0;
+	unsigned long guid, target_guid;
+	int ret = -1;
+
+	fd_target_dir = open(target_path, O_RDONLY | O_DIRECTORY);
+	if (fd_target_dir == -1) {
+		return -1;
+	}
+
+	if (fstat(fd_target_dir, &target_stat) == -1) {
+		fprintf(stderr, "%s: Failed to stat the target: %s", __func__, strerror(errno));
+		exit(1);
+	}
+
+	if (parse_telem_info_file(fd_target_dir, "guid", "%lx", &target_guid)) {
+		fprintf(stderr, "%s: Failed to parse the target guid file: %s", __func__, strerror(errno));
+		exit(1);
+	}
+
+	close(fd_target_dir);
+
+	pmt_diriter_init(&pmt_iter);
+
+	for (dirname = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); dirname != NULL;
+	     dirname = pmt_diriter_next(&pmt_iter)) {
+
+		fd_telem_dir = openat(dirfd(pmt_iter.dir), dirname->d_name, O_RDONLY | O_DIRECTORY);
+		if (fd_telem_dir == -1) {
+			continue;
+		}
+
+		if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) {
+			fprintf(stderr, "%s: Failed to parse the guid file: %s", __func__, strerror(errno));
+			continue;
+		}
+
+		if (fstat(fd_telem_dir, &stat) == -1) {
+			fprintf(stderr, "%s: Failed to stat %s directory: %s", __func__,
+				dirname->d_name, strerror(errno));
+			continue;
+		}
+
+		/*
+		 * If reached the same directory as target, exit the loop.
+		 * Seq has the correct value now.
+		 */
+		if (stat.st_dev == target_stat.st_dev && stat.st_ino == target_stat.st_ino) {
+			ret = 0;
+			break;
+		}
+
+		/*
+		 * If reached directory with the same guid,
+		 * but it's not the target directory yet,
+		 * increment seq and continue the search.
+		 */
+		if (guid == target_guid)
+			++seq;
+
+		close(fd_telem_dir);
+		fd_telem_dir = -1;
+	}
+
+	pmt_diriter_remove(&pmt_iter);
+
+	if (fd_telem_dir != -1)
+		close(fd_telem_dir);
+
+	if (!ret) {
+		*out_guid = target_guid;
+		*out_seq = seq;
+	}
+
+	return ret;
+}
+
 void parse_add_command_pmt(char *add_command)
 {
 	char *name = NULL;
 	char *type_name = NULL;
 	char *format_name = NULL;
+	char *direct_path = NULL;
+	static const char direct_path_prefix[] = "path=";
 	unsigned int offset;
 	unsigned int lsb;
 	unsigned int msb;
@@ -9881,6 +9966,10 @@ void parse_add_command_pmt(char *add_command)
 			goto next;
 		}
 
+		if (strncmp(add_command, direct_path_prefix, strlen(direct_path_prefix)) == 0) {
+			direct_path = add_command + strlen(direct_path_prefix);
+			goto next;
+		}
 next:
 		add_command = strchr(add_command, ',');
 		if (add_command) {
@@ -9952,8 +10041,24 @@ void parse_add_command_pmt(char *add_command)
 		exit(1);
 	}
 
+	if (direct_path && has_guid) {
+		printf("%s: path and guid+seq parameters are mutually exclusive\n"
+		       "notice: passed guid=0x%x and path=%s\n", __func__, guid, direct_path);
+		exit(1);
+	}
+
+	if (direct_path) {
+		if (pmt_parse_from_path(direct_path, &guid, &seq)) {
+			printf("%s: failed to parse PMT file from %s\n", __func__, direct_path);
+			exit(1);
+		}
+
+		/* GUID was just infered from the direct path. */
+		has_guid = true;
+	}
+
 	if (!has_guid) {
-		printf("%s: missing %s\n", __func__, "guid");
+		printf("%s: missing %s\n", __func__, "guid or path");
 		exit(1);
 	}
 

From a80e53472209b1c749e02e91ac62c053ac457099 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 3 Dec 2024 16:11:21 -0500
Subject: [PATCH 224/368] tools/power turbostat: version 2025.01.14

Fix checkpatch whitespace issues since 2024.11.30

Summary of Changes since 2024.11.30:

	Enable SysWatt by default.

	Add initial PTL, CWF platform support.

	Refuse to run on unsupported platforms without --force
	to avoid not-so-useful measurements mistakenly made
	using obsolete versions.

	Harden initial PMT code in response to early use.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 36 ++++++++++++---------------
 1 file changed, 16 insertions(+), 20 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 0f2475fa9fa4e..76d2632e60ac6 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3,7 +3,7 @@
  * turbostat -- show CPU frequency and C-state residency
  * on modern Intel and AMD processors.
  *
- * Copyright (c) 2024 Intel Corporation.
+ * Copyright (c) 2025 Intel Corporation.
  * Len Brown <len.brown@intel.com>
  */
 
@@ -271,11 +271,11 @@ struct msr_counter bic[] = {
 #define	BIC_Sys_J		(1ULL << 60)
 #define	BIC_NMI			(1ULL << 61)
 
-#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
-#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt)
+#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die)
+#define BIC_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt)
 #define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
 #define BIC_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6)
-#define BIC_OTHER ( BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
+#define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
 
 #define BIC_DISABLED_BY_DEFAULT	(BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
 
@@ -1593,8 +1593,7 @@ struct pmt_counter {
  * PMT telemetry directory iterator.
  * Used to iterate telemetry files in sysfs in correct order.
  */
-struct pmt_diriter_t
-{
+struct pmt_diriter_t {
 	DIR *dir;
 	struct dirent **namelist;
 	unsigned int num_names;
@@ -1604,6 +1603,7 @@ struct pmt_diriter_t
 int pmt_telemdir_filter(const struct dirent *e)
 {
 	unsigned int dummy;
+
 	return sscanf(e->d_name, "telem%u", &dummy);
 }
 
@@ -1617,7 +1617,7 @@ int pmt_telemdir_sort(const struct dirent **a, const struct dirent **b)
 	return aidx >= bidx;
 }
 
-const struct dirent* pmt_diriter_next(struct pmt_diriter_t *iter)
+const struct dirent *pmt_diriter_next(struct pmt_diriter_t *iter)
 {
 	const struct dirent *ret = NULL;
 
@@ -1633,7 +1633,7 @@ const struct dirent* pmt_diriter_next(struct pmt_diriter_t *iter)
 	return ret;
 }
 
-const struct dirent* pmt_diriter_begin(struct pmt_diriter_t *iter, const char *pmt_root_path)
+const struct dirent *pmt_diriter_begin(struct pmt_diriter_t *iter, const char *pmt_root_path)
 {
 	int num_names = iter->num_names;
 
@@ -2302,7 +2302,7 @@ void help(void)
 		"  -h, --help\n"
 		"		print this help message\n"
 		"  -v, --version\n"
-		"		print version information\n" "\n" "For more help, run \"man turbostat\"\n");
+		"		print version information\n\nFor more help, run \"man turbostat\"\n");
 }
 
 /*
@@ -9053,18 +9053,16 @@ struct pmt_mmio *pmt_mmio_open(unsigned int target_guid)
 		return NULL;
 	}
 
-	for (;entry != NULL; entry = pmt_diriter_next(&pmt_iter)) {
-		if (fstatat(dirfd(pmt_iter.dir), entry->d_name, &st, 0) == -1) {
+	for ( ; entry != NULL; entry = pmt_diriter_next(&pmt_iter)) {
+		if (fstatat(dirfd(pmt_iter.dir), entry->d_name, &st, 0) == -1)
 			break;
-		}
 
 		if (!S_ISDIR(st.st_mode))
 			continue;
 
 		fd_telem_dir = openat(dirfd(pmt_iter.dir), entry->d_name, O_RDONLY);
-		if (fd_telem_dir == -1) {
+		if (fd_telem_dir == -1)
 			break;
-		}
 
 		if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) {
 			close(fd_telem_dir);
@@ -9425,7 +9423,7 @@ int get_and_dump_counters(void)
 
 void print_version()
 {
-	fprintf(outf, "turbostat version 2024.11.30 - Len Brown <lenb@kernel.org>\n");
+	fprintf(outf, "turbostat version 2025.01.14 - Len Brown <lenb@kernel.org>\n");
 }
 
 #define COMMAND_LINE_SIZE 2048
@@ -9750,7 +9748,7 @@ void parse_add_command_msr(char *add_command)
 
 	}
 	if ((msr_num == 0) && (path == NULL) && (perf_device[0] == '\0' || perf_event[0] == '\0')) {
-		fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter | perf/device/event ) required\n");
+		fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter | perf/device/event) required\n");
 		fail++;
 	}
 
@@ -9822,9 +9820,8 @@ int pmt_parse_from_path(const char *target_path, unsigned int *out_guid, unsigne
 	     dirname = pmt_diriter_next(&pmt_iter)) {
 
 		fd_telem_dir = openat(dirfd(pmt_iter.dir), dirname->d_name, O_RDONLY | O_DIRECTORY);
-		if (fd_telem_dir == -1) {
+		if (fd_telem_dir == -1)
 			continue;
-		}
 
 		if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) {
 			fprintf(stderr, "%s: Failed to parse the guid file: %s", __func__, strerror(errno));
@@ -9962,9 +9959,8 @@ void parse_add_command_pmt(char *add_command)
 			goto next;
 		}
 
-		if (sscanf(add_command, "seq=%x", &seq) == 1) {
+		if (sscanf(add_command, "seq=%x", &seq) == 1)
 			goto next;
-		}
 
 		if (strncmp(add_command, direct_path_prefix, strlen(direct_path_prefix)) == 0) {
 			direct_path = add_command + strlen(direct_path_prefix);

From 1a202afeaa370970413846c2cb09b383875e753c Mon Sep 17 00:00:00 2001
From: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Date: Fri, 17 Jan 2025 13:36:59 +0100
Subject: [PATCH 225/368] tools/power turbostat: Add tcore clock PMT type

Some PMT counters, for example module c1e residency on Intel Clearwater
Forest, are reported using tcore clock type.

Signed-off-by: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 32 ++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 76d2632e60ac6..ecaa4e0fb2c0d 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1538,6 +1538,8 @@ static struct msr_counter_arch_info msr_counter_arch_infos[] = {
 #define PMT_MTL_DC6_GUID           0x1a067102
 #define PMT_MTL_DC6_SEQ            0
 
+unsigned long long tcore_clock_freq_hz = 800000000;
+
 #define PMT_COUNTER_NAME_SIZE_BYTES      16
 #define PMT_COUNTER_TYPE_NAME_SIZE_BYTES 32
 
@@ -1560,6 +1562,7 @@ struct pmt_mmio {
 enum pmt_datatype {
 	PMT_TYPE_RAW,
 	PMT_TYPE_XTAL_TIME,
+	PMT_TYPE_TCORE_CLOCK,
 };
 
 struct pmt_domain_info {
@@ -2474,6 +2477,7 @@ void print_header(char *delim)
 			break;
 
 		case PMT_TYPE_XTAL_TIME:
+		case PMT_TYPE_TCORE_CLOCK:
 			outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name);
 			break;
 		}
@@ -2548,6 +2552,7 @@ void print_header(char *delim)
 			break;
 
 		case PMT_TYPE_XTAL_TIME:
+		case PMT_TYPE_TCORE_CLOCK:
 			outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name);
 			break;
 		}
@@ -2679,6 +2684,7 @@ void print_header(char *delim)
 			break;
 
 		case PMT_TYPE_XTAL_TIME:
+		case PMT_TYPE_TCORE_CLOCK:
 			outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name);
 			break;
 		}
@@ -2997,7 +3003,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 
 	for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) {
 		const unsigned long value_raw = t->pmt_counter[i];
-		const double value_converted = 100.0 * value_raw / crystal_hz / interval_float;
+		double value_converted;
 		switch (ppmt->type) {
 		case PMT_TYPE_RAW:
 			if (pmt_counter_get_width(ppmt) <= 32)
@@ -3009,8 +3015,13 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 			break;
 
 		case PMT_TYPE_XTAL_TIME:
+			value_converted = 100.0 * value_raw / crystal_hz / interval_float;
 			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
 			break;
+
+		case PMT_TYPE_TCORE_CLOCK:
+			value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float;
+			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
 		}
 	}
 
@@ -3077,7 +3088,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 
 	for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) {
 		const unsigned long value_raw = c->pmt_counter[i];
-		const double value_converted = 100.0 * value_raw / crystal_hz / interval_float;
+		double value_converted;
 		switch (ppmt->type) {
 		case PMT_TYPE_RAW:
 			if (pmt_counter_get_width(ppmt) <= 32)
@@ -3089,8 +3100,13 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 			break;
 
 		case PMT_TYPE_XTAL_TIME:
+			value_converted = 100.0 * value_raw / crystal_hz / interval_float;
 			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
 			break;
+
+		case PMT_TYPE_TCORE_CLOCK:
+			value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float;
+			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
 		}
 	}
 
@@ -3275,7 +3291,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 
 	for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) {
 		const unsigned long value_raw = p->pmt_counter[i];
-		const double value_converted = 100.0 * value_raw / crystal_hz / interval_float;
+		double value_converted;
 		switch (ppmt->type) {
 		case PMT_TYPE_RAW:
 			if (pmt_counter_get_width(ppmt) <= 32)
@@ -3287,8 +3303,13 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 			break;
 
 		case PMT_TYPE_XTAL_TIME:
+			value_converted = 100.0 * value_raw / crystal_hz / interval_float;
 			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
 			break;
+
+		case PMT_TYPE_TCORE_CLOCK:
+			value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float;
+			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
 		}
 	}
 
@@ -10016,6 +10037,11 @@ void parse_add_command_pmt(char *add_command)
 			has_type = true;
 		}
 
+		if (strcmp("tcore_clock", type_name) == 0) {
+			type = PMT_TYPE_TCORE_CLOCK;
+			has_type = true;
+		}
+
 		if (!has_type) {
 			printf("%s: invalid %s: %s\n", __func__, "type", type_name);
 			exit(1);

From 698244bbb3bfd32ddf9a0b70a12b1c7d69056497 Mon Sep 17 00:00:00 2001
From: Nick Chan <towinchenmi@gmail.com>
Date: Sun, 19 Jan 2025 00:31:42 +0800
Subject: [PATCH 226/368] irqchip/apple-aic: Only handle PMC interrupt as FIQ
 when configured so

The CPU PMU in Apple SoCs can be configured to fire its interrupt in one of
several ways, and since Apple A11 one of the methods is FIQ, but the check
of the configuration register fails to test explicitely for FIQ mode. It
tests whether the IMODE bitfield is zero or not and the PMCRO_IACT bit is
set. That results in false positives when the IMODE bitfield is not zero,
but does not have the mode PMCR0_IMODE_FIQ.

Only handle the PMC interrupt as a FIQ when the CPU PMU has been configured
to fire FIQs, i.e. the IMODE bitfield value is PMCR0_IMODE_FIQ and
PMCR0_IACT is set.

Fixes: c7708816c944 ("irqchip/apple-aic: Wire PMU interrupts")
Signed-off-by: Nick Chan <towinchenmi@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/all/20250118163554.16733-1-towinchenmi@gmail.com
---
 drivers/irqchip/irq-apple-aic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c
index da5250f0155cf..2b1684c60e3ca 100644
--- a/drivers/irqchip/irq-apple-aic.c
+++ b/drivers/irqchip/irq-apple-aic.c
@@ -577,7 +577,8 @@ static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs)
 						  AIC_FIQ_HWIRQ(AIC_TMR_EL02_VIRT));
 	}
 
-	if (read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & PMCR0_IACT) {
+	if ((read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & (PMCR0_IMODE | PMCR0_IACT)) ==
+			(FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_FIQ) | PMCR0_IACT)) {
 		int irq;
 		if (cpumask_test_cpu(smp_processor_id(),
 				     &aic_irqc->fiq_aff[AIC_CPU_PMU_P]->aff))

From d555ed45a5a10a813528c7685f432369d536ae3d Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 31 Oct 2024 14:42:56 +0100
Subject: [PATCH 227/368] PCI: Restore original INTX_DISABLE bit by pcim_intx()

pcim_intx() tries to restore the INTx bit at removal via devres, but there
is a chance that it restores a wrong value.

Because the value to be restored is blindly assumed to be the negative of
the enable argument, when a driver calls pcim_intx() unnecessarily for the
already enabled state, it'll restore to the disabled state in turn.  That
is, the function assumes the case like:

  // INTx == 1
  pcim_intx(pdev, 0); // old INTx value assumed to be 1 -> correct

but it might be like the following, too:

  // INTx == 0
  pcim_intx(pdev, 0); // old INTx value assumed to be 1 -> wrong

Also, when a driver calls pcim_intx() multiple times with different enable
argument values, the last one will win no matter what value it is.  This
can lead to inconsistency, e.g.

  // INTx == 1
  pcim_intx(pdev, 0); // OK
  ...
  pcim_intx(pdev, 1); // now old INTx wrongly assumed to be 0

This patch addresses those inconsistencies by saving the original INTx
state at the first pcim_intx() call.  For that, get_or_create_intx_devres()
is folded into pcim_intx() caller side; it allows us to simply check the
already allocated devres and record the original INTx along with the
devres_alloc() call.

Link: https://lore.kernel.org/r/20241031134300.10296-1-tiwai@suse.de
Fixes: 25216afc9db5 ("PCI: Add managed pcim_intx()")
Link: https://lore.kernel.org/87v7xk2ps5.wl-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Philipp Stanner <pstanner@redhat.com>
Cc: stable@vger.kernel.org	# v6.11+
---
 drivers/pci/devres.c | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c
index d1d97a4bb36d3..3431a7df3e0d9 100644
--- a/drivers/pci/devres.c
+++ b/drivers/pci/devres.c
@@ -419,19 +419,12 @@ static void pcim_intx_restore(struct device *dev, void *data)
 	pci_intx(pdev, res->orig_intx);
 }
 
-static struct pcim_intx_devres *get_or_create_intx_devres(struct device *dev)
+static void save_orig_intx(struct pci_dev *pdev, struct pcim_intx_devres *res)
 {
-	struct pcim_intx_devres *res;
-
-	res = devres_find(dev, pcim_intx_restore, NULL, NULL);
-	if (res)
-		return res;
+	u16 pci_command;
 
-	res = devres_alloc(pcim_intx_restore, sizeof(*res), GFP_KERNEL);
-	if (res)
-		devres_add(dev, res);
-
-	return res;
+	pci_read_config_word(pdev, PCI_COMMAND, &pci_command);
+	res->orig_intx = !(pci_command & PCI_COMMAND_INTX_DISABLE);
 }
 
 /**
@@ -447,12 +440,23 @@ static struct pcim_intx_devres *get_or_create_intx_devres(struct device *dev)
 int pcim_intx(struct pci_dev *pdev, int enable)
 {
 	struct pcim_intx_devres *res;
+	struct device *dev = &pdev->dev;
 
-	res = get_or_create_intx_devres(&pdev->dev);
-	if (!res)
-		return -ENOMEM;
+	/*
+	 * pcim_intx() must only restore the INTx value that existed before the
+	 * driver was loaded, i.e., before it called pcim_intx() for the
+	 * first time.
+	 */
+	res = devres_find(dev, pcim_intx_restore, NULL, NULL);
+	if (!res) {
+		res = devres_alloc(pcim_intx_restore, sizeof(*res), GFP_KERNEL);
+		if (!res)
+			return -ENOMEM;
+
+		save_orig_intx(pdev, res);
+		devres_add(dev, res);
+	}
 
-	res->orig_intx = !enable;
 	pci_intx(pdev, enable);
 
 	return 0;

From 38567b972a22706e9a1a52b2c4bc9ea4b5ed00ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= <mic@digikod.net>
Date: Wed, 15 Jan 2025 15:47:50 +0100
Subject: [PATCH 228/368] selftests: Handle old glibc without execveat(2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add an execveat(2) wrapper because glibc < 2.34 does not have one.  This
fixes the check-exec tests and samples.

Cc: Günther Noack <gnoack@google.com>
Cc: Jeff Xu <jeffxu@chromium.org>
Cc: Kees Cook <kees@kernel.org>
Cc: Mimi Zohar <zohar@linux.ibm.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Roberto Sassu <roberto.sassu@huawei.com>
Cc: Serge Hallyn <serge@hallyn.com>
Cc: Stefan Berger <stefanb@linux.ibm.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Reported-by: Nathan Chancellor <nathan@kernel.org>
Closes: https://lore.kernel.org/r/20250114205645.GA2825031@ax162
Signed-off-by: Mickaël Salaün <mic@digikod.net>
Reviewed-by: Günther Noack <gnoack3000@gmail.com>
Link: https://lore.kernel.org/r/20250115144753.311152-1-mic@digikod.net
Signed-off-by: Kees Cook <kees@kernel.org>
---
 samples/check-exec/inc.c                   | 11 +++++++++--
 tools/testing/selftests/exec/check-exec.c  | 11 +++++++++--
 tools/testing/selftests/landlock/fs_test.c | 10 ++++++++--
 3 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/samples/check-exec/inc.c b/samples/check-exec/inc.c
index 94b87569d2a2e..7f6ef06a2f067 100644
--- a/samples/check-exec/inc.c
+++ b/samples/check-exec/inc.c
@@ -21,8 +21,15 @@
 #include <stdlib.h>
 #include <string.h>
 #include <sys/prctl.h>
+#include <sys/syscall.h>
 #include <unistd.h>
 
+static int sys_execveat(int dirfd, const char *pathname, char *const argv[],
+			char *const envp[], int flags)
+{
+	return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags);
+}
+
 /* Returns 1 on error, 0 otherwise. */
 static int interpret_buffer(char *buffer, size_t buffer_size)
 {
@@ -78,8 +85,8 @@ static int interpret_stream(FILE *script, char *const script_name,
 	 * script execution.  We must use the script file descriptor instead of
 	 * the script path name to avoid race conditions.
 	 */
-	err = execveat(fileno(script), "", script_argv, envp,
-		       AT_EMPTY_PATH | AT_EXECVE_CHECK);
+	err = sys_execveat(fileno(script), "", script_argv, envp,
+			   AT_EMPTY_PATH | AT_EXECVE_CHECK);
 	if (err && restrict_stream) {
 		perror("ERROR: Script execution check");
 		return 1;
diff --git a/tools/testing/selftests/exec/check-exec.c b/tools/testing/selftests/exec/check-exec.c
index 4d3f4525e1e1c..55bce47e56b73 100644
--- a/tools/testing/selftests/exec/check-exec.c
+++ b/tools/testing/selftests/exec/check-exec.c
@@ -22,6 +22,7 @@
 #include <sys/prctl.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
+#include <sys/syscall.h>
 #include <sys/sysmacros.h>
 #include <unistd.h>
 
@@ -31,6 +32,12 @@
 
 #include "../kselftest_harness.h"
 
+static int sys_execveat(int dirfd, const char *pathname, char *const argv[],
+			char *const envp[], int flags)
+{
+	return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags);
+}
+
 static void drop_privileges(struct __test_metadata *const _metadata)
 {
 	const unsigned int noroot = SECBIT_NOROOT | SECBIT_NOROOT_LOCKED;
@@ -219,8 +226,8 @@ static void test_exec_fd(struct __test_metadata *_metadata, const int fd,
 	 * test framework as an error.  With AT_EXECVE_CHECK, we only check a
 	 * potential successful execution.
 	 */
-	access_ret =
-		execveat(fd, "", argv, NULL, AT_EMPTY_PATH | AT_EXECVE_CHECK);
+	access_ret = sys_execveat(fd, "", argv, NULL,
+				  AT_EMPTY_PATH | AT_EXECVE_CHECK);
 	access_errno = errno;
 	if (err_code) {
 		EXPECT_EQ(-1, access_ret);
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index cd66901be612b..ac9701c018e0d 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -59,6 +59,12 @@ int open_tree(int dfd, const char *filename, unsigned int flags)
 }
 #endif
 
+static int sys_execveat(int dirfd, const char *pathname, char *const argv[],
+			char *const envp[], int flags)
+{
+	return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags);
+}
+
 #ifndef RENAME_EXCHANGE
 #define RENAME_EXCHANGE (1 << 1)
 #endif
@@ -2018,8 +2024,8 @@ static void test_check_exec(struct __test_metadata *const _metadata,
 	int ret;
 	char *const argv[] = { (char *)path, NULL };
 
-	ret = execveat(AT_FDCWD, path, argv, NULL,
-		       AT_EMPTY_PATH | AT_EXECVE_CHECK);
+	ret = sys_execveat(AT_FDCWD, path, argv, NULL,
+			   AT_EMPTY_PATH | AT_EXECVE_CHECK);
 	if (err) {
 		EXPECT_EQ(-1, ret);
 		EXPECT_EQ(errno, err);

From b32c36975da48afc9089f8b61f7b2dcc40e479d2 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Mon, 27 Jan 2025 16:42:19 -0600
Subject: [PATCH 229/368] tools/power turbostat: Fix forked child affinity
 regression

In "one-shot" mode, turbostat
1. takes a counter snapshot
2. forks and waits for a child
3. takes the end counter snapshot and prints the result.

But turbostat counter snapshots currently use affinity to travel
around the system so that counter reads are "local", and this
affinity must be cleared between #1 and #2 above.

The offending commit removed that reset that allowed the child
to run on cpu_present_set.

Fix that issue, and improve upon the original by using
cpu_possible_set for the child.  This allows the child
to also run on CPUs that hotplug online during its runtime.

Reported-by: Zhang Rui <rui.zhang@intel.com>
Fixes: 7bb3fe27ad4f ("tools/power/turbostat: Obey allowed CPUs during startup")
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 54 ++++++++++++++++++++++++++-
 1 file changed, 52 insertions(+), 2 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index ecaa4e0fb2c0d..1f188a0908da6 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1120,8 +1120,8 @@ int backwards_count;
 char *progname;
 
 #define CPU_SUBSET_MAXCPUS	1024	/* need to use before probe... */
-cpu_set_t *cpu_present_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset;
-size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size;
+cpu_set_t *cpu_present_set, *cpu_possible_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset;
+size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size;
 #define MAX_ADDED_THREAD_COUNTERS 24
 #define MAX_ADDED_CORE_COUNTERS 8
 #define MAX_ADDED_PACKAGE_COUNTERS 16
@@ -8488,6 +8488,33 @@ int dir_filter(const struct dirent *dirp)
 		return 0;
 }
 
+char *possible_file = "/sys/devices/system/cpu/possible";
+char possible_buf[1024];
+
+int initialize_cpu_possible_set(void)
+{
+	FILE *fp;
+
+	fp = fopen(possible_file, "r");
+	if (!fp) {
+		warn("open %s", possible_file);
+		return -1;
+	}
+	if (fread(possible_buf, sizeof(char), 1024, fp) == 0) {
+		warn("read %s", possible_file);
+		goto err;
+	}
+	if (parse_cpu_str(possible_buf, cpu_possible_set, cpu_possible_setsize)) {
+		warnx("%s: cpu str malformat %s\n", possible_file, cpu_effective_str);
+		goto err;
+	}
+	return 0;
+
+err:
+	fclose(fp);
+	return -1;
+}
+
 void topology_probe(bool startup)
 {
 	int i;
@@ -8519,6 +8546,16 @@ void topology_probe(bool startup)
 	CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
 	for_all_proc_cpus(mark_cpu_present);
 
+	/*
+	 * Allocate and initialize cpu_possible_set
+	 */
+	cpu_possible_set = CPU_ALLOC((topo.max_cpu_num + 1));
+	if (cpu_possible_set == NULL)
+		err(3, "CPU_ALLOC");
+	cpu_possible_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
+	CPU_ZERO_S(cpu_possible_setsize, cpu_possible_set);
+	initialize_cpu_possible_set();
+
 	/*
 	 * Allocate and initialize cpu_effective_set
 	 */
@@ -9371,6 +9408,18 @@ void turbostat_init()
 	}
 }
 
+void affinitize_child(void)
+{
+	/* Prefer cpu_possible_set, if available */
+	if (sched_setaffinity(0, cpu_possible_setsize, cpu_possible_set)) {
+		warn("sched_setaffinity cpu_possible_set");
+
+		/* Otherwise, allow child to run on same cpu set as turbostat */
+		if (sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set))
+			warn("sched_setaffinity cpu_allowed_set");
+	}
+}
+
 int fork_it(char **argv)
 {
 	pid_t child_pid;
@@ -9386,6 +9435,7 @@ int fork_it(char **argv)
 	child_pid = fork();
 	if (!child_pid) {
 		/* child */
+		affinitize_child();
 		execvp(argv[0], argv);
 		err(errno, "exec %s", argv[0]);
 	} else {

From bde4ccfd5ab5361490514fc4af7497989cfbee17 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Thu, 23 Jan 2025 20:38:56 -0800
Subject: [PATCH 230/368] perf annotate: Use an array for the disassembler
 preference

Prior to this change a string was used which could cause issues with
an unrecognized disassembler in symbol__disassembler. Change to
initializing an array of perf_disassembler enum values. If a value
already exists then adding it a second time is ignored to avoid array
out of bounds problems present in the previous code, it also allows a
statically sized array and removes memory allocation needs. Errors in
the disassembler string are reported when the config is parsed during
perf annotate or perf top start up. If the array is uninitialized
after processing the config file the default llvm, capstone then
objdump values are added but without a need to parse a string.

Fixes: a6e8a58de629 ("perf disasm: Allow configuring what disassemblers to use")
Closes: https://lore.kernel.org/lkml/CAP-5=fUdfCyxmEiTpzS2uumUp3-SyQOseX2xZo81-dQtWXj6vA@mail.gmail.com/
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20250124043856.1177264-1-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/annotate.c | 76 +++++++++++++++++++++++++++++++---
 tools/perf/util/annotate.h | 15 ++++---
 tools/perf/util/disasm.c   | 83 +++++++-------------------------------
 3 files changed, 96 insertions(+), 78 deletions(-)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 0d2ea22bd9e48..31bb326b07a68 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -2100,6 +2100,57 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel,
 	return 0;
 }
 
+const char * const perf_disassembler__strs[] = {
+	[PERF_DISASM_UNKNOWN]  = "unknown",
+	[PERF_DISASM_LLVM]     = "llvm",
+	[PERF_DISASM_CAPSTONE] = "capstone",
+	[PERF_DISASM_OBJDUMP]  = "objdump",
+};
+
+
+static void annotation_options__add_disassembler(struct annotation_options *options,
+						 enum perf_disassembler dis)
+{
+	for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers); i++) {
+		if (options->disassemblers[i] == dis) {
+			/* Disassembler is already present then don't add again. */
+			return;
+		}
+		if (options->disassemblers[i] == PERF_DISASM_UNKNOWN) {
+			/* Found a free slot. */
+			options->disassemblers[i] = dis;
+			return;
+		}
+	}
+	pr_err("Failed to add disassembler %d\n", dis);
+}
+
+static int annotation_options__add_disassemblers_str(struct annotation_options *options,
+						const char *str)
+{
+	while (str && *str != '\0') {
+		const char *comma = strchr(str, ',');
+		int len = comma ? comma - str : (int)strlen(str);
+		bool match = false;
+
+		for (u8 i = 0; i < ARRAY_SIZE(perf_disassembler__strs); i++) {
+			const char *dis_str = perf_disassembler__strs[i];
+
+			if (len == (int)strlen(dis_str) && !strncmp(str, dis_str, len)) {
+				annotation_options__add_disassembler(options, i);
+				match = true;
+				break;
+			}
+		}
+		if (!match) {
+			pr_err("Invalid disassembler '%.*s'\n", len, str);
+			return -1;
+		}
+		str = comma ? comma + 1 : NULL;
+	}
+	return 0;
+}
+
 static int annotation__config(const char *var, const char *value, void *data)
 {
 	struct annotation_options *opt = data;
@@ -2115,11 +2166,10 @@ static int annotation__config(const char *var, const char *value, void *data)
 		else if (opt->offset_level < ANNOTATION__MIN_OFFSET_LEVEL)
 			opt->offset_level = ANNOTATION__MIN_OFFSET_LEVEL;
 	} else if (!strcmp(var, "annotate.disassemblers")) {
-		opt->disassemblers_str = strdup(value);
-		if (!opt->disassemblers_str) {
-			pr_err("Not enough memory for annotate.disassemblers\n");
-			return -1;
-		}
+		int err = annotation_options__add_disassemblers_str(opt, value);
+
+		if (err)
+			return err;
 	} else if (!strcmp(var, "annotate.hide_src_code")) {
 		opt->hide_src_code = perf_config_bool("hide_src_code", value);
 	} else if (!strcmp(var, "annotate.jump_arrows")) {
@@ -2185,9 +2235,25 @@ void annotation_options__exit(void)
 	zfree(&annotate_opts.objdump_path);
 }
 
+static void annotation_options__default_init_disassemblers(struct annotation_options *options)
+{
+	if (options->disassemblers[0] != PERF_DISASM_UNKNOWN) {
+		/* Already initialized. */
+		return;
+	}
+#ifdef HAVE_LIBLLVM_SUPPORT
+	annotation_options__add_disassembler(options, PERF_DISASM_LLVM);
+#endif
+#ifdef HAVE_LIBCAPSTONE_SUPPORT
+	annotation_options__add_disassembler(options, PERF_DISASM_CAPSTONE);
+#endif
+	annotation_options__add_disassembler(options, PERF_DISASM_OBJDUMP);
+}
+
 void annotation_config__init(void)
 {
 	perf_config(annotation__config, &annotate_opts);
+	annotation_options__default_init_disassemblers(&annotate_opts);
 }
 
 static unsigned int parse_percent_type(char *str1, char *str2)
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 0ba5846dad4de..98db1b88daf43 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -34,8 +34,13 @@ struct annotated_data_type;
 #define ANNOTATION__BR_CNTR_WIDTH 30
 #define ANNOTATION_DUMMY_LEN	256
 
-// llvm, capstone, objdump
-#define MAX_DISASSEMBLERS 3
+enum perf_disassembler {
+	PERF_DISASM_UNKNOWN = 0,
+	PERF_DISASM_LLVM,
+	PERF_DISASM_CAPSTONE,
+	PERF_DISASM_OBJDUMP,
+};
+#define MAX_DISASSEMBLERS (PERF_DISASM_OBJDUMP + 1)
 
 struct annotation_options {
 	bool hide_src_code,
@@ -52,14 +57,12 @@ struct annotation_options {
 	     annotate_src,
 	     full_addr;
 	u8   offset_level;
-	u8   nr_disassemblers;
+	u8   disassemblers[MAX_DISASSEMBLERS];
 	int  min_pcnt;
 	int  max_lines;
 	int  context;
 	char *objdump_path;
 	char *disassembler_style;
-	const char *disassemblers_str;
-	const char *disassemblers[MAX_DISASSEMBLERS];
 	const char *prefix;
 	const char *prefix_strip;
 	unsigned int percent_type;
@@ -134,6 +137,8 @@ struct disasm_line {
 	struct annotation_line	 al;
 };
 
+extern const char * const perf_disassembler__strs[];
+
 void annotation_line__add(struct annotation_line *al, struct list_head *head);
 
 static inline double annotation_data__percent(struct annotation_data *data,
diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c
index b7de4d9fd0045..50c5c206b70e7 100644
--- a/tools/perf/util/disasm.c
+++ b/tools/perf/util/disasm.c
@@ -2216,56 +2216,6 @@ static int symbol__disassemble_objdump(const char *filename, struct symbol *sym,
 	return err;
 }
 
-static int annotation_options__init_disassemblers(struct annotation_options *options)
-{
-	char *disassembler;
-
-	if (options->disassemblers_str == NULL) {
-		const char *default_disassemblers_str =
-#ifdef HAVE_LIBLLVM_SUPPORT
-				"llvm,"
-#endif
-#ifdef HAVE_LIBCAPSTONE_SUPPORT
-				"capstone,"
-#endif
-				"objdump";
-
-		options->disassemblers_str = strdup(default_disassemblers_str);
-		if (!options->disassemblers_str)
-			goto out_enomem;
-	}
-
-	disassembler = strdup(options->disassemblers_str);
-	if (disassembler == NULL)
-		goto out_enomem;
-
-	while (1) {
-		char *comma = strchr(disassembler, ',');
-
-		if (comma != NULL)
-			*comma = '\0';
-
-		options->disassemblers[options->nr_disassemblers++] = strim(disassembler);
-
-		if (comma == NULL)
-			break;
-
-		disassembler = comma + 1;
-
-		if (options->nr_disassemblers >= MAX_DISASSEMBLERS) {
-			pr_debug("annotate.disassemblers can have at most %d entries, ignoring \"%s\"\n",
-				 MAX_DISASSEMBLERS, disassembler);
-			break;
-		}
-	}
-
-	return 0;
-
-out_enomem:
-	pr_err("Not enough memory for annotate.disassemblers\n");
-	return -1;
-}
-
 int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 {
 	struct annotation_options *options = args->options;
@@ -2274,7 +2224,6 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 	char symfs_filename[PATH_MAX];
 	bool delete_extract = false;
 	struct kcore_extract kce;
-	const char *disassembler;
 	bool decomp = false;
 	int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
 
@@ -2334,28 +2283,26 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 		}
 	}
 
-	err = annotation_options__init_disassemblers(options);
-	if (err)
-		goto out_remove_tmp;
-
 	err = -1;
+	for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers) && err != 0; i++) {
+		enum perf_disassembler dis = options->disassemblers[i];
 
-	for (int i = 0; i < options->nr_disassemblers && err != 0; ++i) {
-		disassembler = options->disassemblers[i];
-
-		if (!strcmp(disassembler, "llvm"))
+		switch (dis) {
+		case PERF_DISASM_LLVM:
 			err = symbol__disassemble_llvm(symfs_filename, sym, args);
-		else if (!strcmp(disassembler, "capstone"))
+			break;
+		case PERF_DISASM_CAPSTONE:
 			err = symbol__disassemble_capstone(symfs_filename, sym, args);
-		else if (!strcmp(disassembler, "objdump"))
+			break;
+		case PERF_DISASM_OBJDUMP:
 			err = symbol__disassemble_objdump(symfs_filename, sym, args);
-		else
-			pr_debug("Unknown disassembler %s, skipping...\n", disassembler);
-	}
-
-	if (err == 0) {
-		pr_debug("Disassembled with %s\nannotate.disassemblers=%s\n",
-			 disassembler, options->disassemblers_str);
+			break;
+		case PERF_DISASM_UNKNOWN: /* End of disassemblers. */
+		default:
+			goto out_remove_tmp;
+		}
+		if (err == 0)
+			pr_debug("Disassembled with %s\n", perf_disassembler__strs[dis]);
 	}
 out_remove_tmp:
 	if (decomp)

From c1feab95e0b2e9fce7e4f4b2739baf40d84543af Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 23 Jan 2025 22:51:04 -0500
Subject: [PATCH 231/368] add a string-to-qstr constructor

Quite a few places want to build a struct qstr by given string;
it would be convenient to have a primitive doing that, rather
than open-coding it via QSTR_INIT().

The closest approximation was in bcachefs, but that expands to
initializer list - {.len = strlen(string), .name = string}.
It would be more useful to have it as compound literal -
(struct qstr){.len = strlen(string), .name = string}.

Unlike initializer list it's a valid expression.  What's more,
it's a valid lvalue - it's an equivalent of anonymous local
variable with such initializer, so the things like
	path->dentry = d_alloc_pseudo(mnt->mnt_sb, &QSTR(name));
are valid.  It can also be used as initializer, with identical
effect -
	struct qstr x = (struct qstr){.name = s, .len = strlen(s)};
is equivalent to
	struct qstr anon_variable = {.name = s, .len = strlen(s)};
	struct qstr x = anon_variable;
	// anon_variable is never used after that point
and any even remotely sane compiler will manage to collapse that
into
	struct qstr x = {.name = s, .len = strlen(s)};

What compound literals can't be used for is initialization of
global variables, but those are covered by QSTR_INIT().

This commit lifts definition(s) of QSTR() into linux/dcache.h,
converts it to compound literal (all bcachefs users are fine
with that) and converts assorted open-coded instances to using
that.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/anon_inodes.c       |  4 ++--
 fs/bcachefs/fsck.c     |  2 +-
 fs/bcachefs/recovery.c |  2 --
 fs/bcachefs/util.h     |  2 --
 fs/erofs/xattr.c       |  2 +-
 fs/file_table.c        |  4 +---
 fs/kernfs/file.c       |  2 +-
 include/linux/dcache.h |  1 +
 mm/secretmem.c         |  3 +--
 net/sunrpc/rpc_pipe.c  | 14 +++++---------
 10 files changed, 13 insertions(+), 23 deletions(-)

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 42bd1cb7c9cdd..583ac81669c24 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -60,14 +60,14 @@ static struct inode *anon_inode_make_secure_inode(
 	const struct inode *context_inode)
 {
 	struct inode *inode;
-	const struct qstr qname = QSTR_INIT(name, strlen(name));
 	int error;
 
 	inode = alloc_anon_inode(anon_inode_mnt->mnt_sb);
 	if (IS_ERR(inode))
 		return inode;
 	inode->i_flags &= ~S_PRIVATE;
-	error =	security_inode_init_security_anon(inode, &qname, context_inode);
+	error =	security_inode_init_security_anon(inode, &QSTR(name),
+						  context_inode);
 	if (error) {
 		iput(inode);
 		return ERR_PTR(error);
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 75c8a97a6954c..7b3b63ed747cf 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -405,7 +405,7 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *
 		return ret;
 
 	struct bch_hash_info dir_hash = bch2_hash_info_init(c, &lostfound);
-	struct qstr name = (struct qstr) QSTR(name_buf);
+	struct qstr name = QSTR(name_buf);
 
 	inode->bi_dir = lostfound.bi_inum;
 
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 3c7f941dde39a..ebabba2968821 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -32,8 +32,6 @@
 #include <linux/sort.h>
 #include <linux/stat.h>
 
-#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
-
 void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
 {
 	if (btree >= BTREE_ID_NR_MAX)
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index fb02c1c360044..a27f4b84fe775 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -647,8 +647,6 @@ static inline int cmp_le32(__le32 l, __le32 r)
 
 #include <linux/uuid.h>
 
-#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
-
 static inline bool qstr_eq(const struct qstr l, const struct qstr r)
 {
 	return l.len == r.len && !memcmp(l.name, r.name, l.len);
diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
index a90d7d6497390..60d2cf26e837e 100644
--- a/fs/erofs/xattr.c
+++ b/fs/erofs/xattr.c
@@ -407,7 +407,7 @@ int erofs_getxattr(struct inode *inode, int index, const char *name,
 	}
 
 	it.index = index;
-	it.name = (struct qstr)QSTR_INIT(name, strlen(name));
+	it.name = QSTR(name);
 	if (it.name.len > EROFS_NAME_LEN)
 		return -ERANGE;
 
diff --git a/fs/file_table.c b/fs/file_table.c
index 976736be47cb6..a329623d0b421 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -351,9 +351,7 @@ static struct file *alloc_file(const struct path *path, int flags,
 static inline int alloc_path_pseudo(const char *name, struct inode *inode,
 				    struct vfsmount *mnt, struct path *path)
 {
-	struct qstr this = QSTR_INIT(name, strlen(name));
-
-	path->dentry = d_alloc_pseudo(mnt->mnt_sb, &this);
+	path->dentry = d_alloc_pseudo(mnt->mnt_sb, &QSTR(name));
 	if (!path->dentry)
 		return -ENOMEM;
 	path->mnt = mntget(mnt);
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 8502ef68459b9..0eb320617d7b1 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -927,7 +927,7 @@ static void kernfs_notify_workfn(struct work_struct *work)
 		if (!inode)
 			continue;
 
-		name = (struct qstr)QSTR_INIT(kn->name, strlen(kn->name));
+		name = QSTR(kn->name);
 		parent = kernfs_get_parent(kn);
 		if (parent) {
 			p_inode = ilookup(info->sb, kernfs_ino(parent));
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index bff956f7b2b98..3d53a60145911 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -57,6 +57,7 @@ struct qstr {
 };
 
 #define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
+#define QSTR(n) (struct qstr)QSTR_INIT(n, strlen(n))
 
 extern const struct qstr empty_name;
 extern const struct qstr slash_name;
diff --git a/mm/secretmem.c b/mm/secretmem.c
index 399552814fd0f..1b0a214ee5580 100644
--- a/mm/secretmem.c
+++ b/mm/secretmem.c
@@ -195,14 +195,13 @@ static struct file *secretmem_file_create(unsigned long flags)
 	struct file *file;
 	struct inode *inode;
 	const char *anon_name = "[secretmem]";
-	const struct qstr qname = QSTR_INIT(anon_name, strlen(anon_name));
 	int err;
 
 	inode = alloc_anon_inode(secretmem_mnt->mnt_sb);
 	if (IS_ERR(inode))
 		return ERR_CAST(inode);
 
-	err = security_inode_init_security_anon(inode, &qname, NULL);
+	err = security_inode_init_security_anon(inode, &QSTR(anon_name), NULL);
 	if (err) {
 		file = ERR_PTR(err);
 		goto err_free_inode;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 7ce3721c06ca5..eadc00410ebc5 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -630,7 +630,7 @@ static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry)
 static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent,
 					  const char *name)
 {
-	struct qstr q = QSTR_INIT(name, strlen(name));
+	struct qstr q = QSTR(name);
 	struct dentry *dentry = d_hash_and_lookup(parent, &q);
 	if (!dentry) {
 		dentry = d_alloc(parent, &q);
@@ -1190,8 +1190,7 @@ static const struct rpc_filelist files[] = {
 struct dentry *rpc_d_lookup_sb(const struct super_block *sb,
 			       const unsigned char *dir_name)
 {
-	struct qstr dir = QSTR_INIT(dir_name, strlen(dir_name));
-	return d_hash_and_lookup(sb->s_root, &dir);
+	return d_hash_and_lookup(sb->s_root, &QSTR(dir_name));
 }
 EXPORT_SYMBOL_GPL(rpc_d_lookup_sb);
 
@@ -1300,11 +1299,9 @@ rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data)
 	struct dentry *gssd_dentry;
 	struct dentry *clnt_dentry = NULL;
 	struct dentry *pipe_dentry = NULL;
-	struct qstr q = QSTR_INIT(files[RPCAUTH_gssd].name,
-				  strlen(files[RPCAUTH_gssd].name));
 
 	/* We should never get this far if "gssd" doesn't exist */
-	gssd_dentry = d_hash_and_lookup(root, &q);
+	gssd_dentry = d_hash_and_lookup(root, &QSTR(files[RPCAUTH_gssd].name));
 	if (!gssd_dentry)
 		return ERR_PTR(-ENOENT);
 
@@ -1314,9 +1311,8 @@ rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data)
 		goto out;
 	}
 
-	q.name = gssd_dummy_clnt_dir[0].name;
-	q.len = strlen(gssd_dummy_clnt_dir[0].name);
-	clnt_dentry = d_hash_and_lookup(gssd_dentry, &q);
+	clnt_dentry = d_hash_and_lookup(gssd_dentry,
+					&QSTR(gssd_dummy_clnt_dir[0].name));
 	if (!clnt_dentry) {
 		__rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1);
 		pipe_dentry = ERR_PTR(-ENOENT);

From 5499b5ac0b2c661cc37190a23a4aee9308b3d3ee Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Mon, 27 Jan 2025 20:58:42 -0600
Subject: [PATCH 232/368] tools/power turbostat: Harden one-shot mode against
 cpu offline

when turbostat interval mode can't migrate to a CPU, it complains,
prints no data, re-initializes with the new CPU configuration
and starts a new interval.

But this strategy in the face of a CPU hotplug offline during an interval
doesn't help in one-shot mode.  When the missing CPU is discovered
at the end of the interval, the forked program has already returned
and there is nothing left for a new interval to measure.

So instead of aborting get_coutners() and delta_cpu() if a missing CPU
is detected, complain, but carry on and output what statistics are
actually present.

Use the same strategy for delta_cpu when aperf:mperf are observed
to have been reset -- complain, but carry on and print data for
the CPUs that are still present.

Interval mode error handling is unchanged.

One-shot mode can now do this:

$ sudo chcpu -e 1 ; sudo ./turbostat --quiet --show PkgWatt,Busy%,CPU chcpu -d 1
CPU 1 enabled
CPU 1 disabled
get_counters: Could not migrate to CPU 1
./turbostat: Counter reset detected
0.036920 sec
CPU	Busy%	PkgWatt
-	0.00	10.00
0	99.73	10.00
1	0.00
2	91.53
3	16.83

Suggested-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 1f188a0908da6..8df08819e7b41 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2063,6 +2063,8 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk
 {
 	int retval, pkg_no, core_no, thread_no, node_no;
 
+	retval = 0;
+
 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
 		for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) {
 			for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
@@ -2078,14 +2080,12 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk
 					c = GET_CORE(core_base, core_no, node_no, pkg_no);
 					p = GET_PKG(pkg_base, pkg_no);
 
-					retval = func(t, c, p);
-					if (retval)
-						return retval;
+					retval |= func(t, c, p);
 				}
 			}
 		}
 	}
-	return 0;
+	return retval;
 }
 
 int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p)
@@ -3620,12 +3620,10 @@ int delta_cpu(struct thread_data *t, struct core_data *c,
 
 	/* always calculate thread delta */
 	retval = delta_thread(t, t2, c2);	/* c2 is core delta */
-	if (retval)
-		return retval;
 
 	/* calculate package delta only for 1st core in package */
 	if (is_cpu_first_core_in_package(t, c, p))
-		retval = delta_package(p, p2);
+		retval |= delta_package(p, p2);
 
 	return retval;
 }
@@ -5748,6 +5746,8 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
 {
 	int retval, pkg_no, node_no, core_no, thread_no;
 
+	retval = 0;
+
 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
 		for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
 			for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
@@ -5769,14 +5769,12 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
 					p = GET_PKG(pkg_base, pkg_no);
 					p2 = GET_PKG(pkg_base2, pkg_no);
 
-					retval = func(t, c, p, t2, c2, p2);
-					if (retval)
-						return retval;
+					retval |= func(t, c, p, t2, c2, p2);
 				}
 			}
 		}
 	}
-	return 0;
+	return retval;
 }
 
 /*
@@ -9462,10 +9460,9 @@ int fork_it(char **argv)
 	timersub(&tv_odd, &tv_even, &tv_delta);
 	if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
 		fprintf(outf, "%s: Counter reset detected\n", progname);
-	else {
-		compute_average(EVEN_COUNTERS);
-		format_all_counters(EVEN_COUNTERS);
-	}
+
+	compute_average(EVEN_COUNTERS);
+	format_all_counters(EVEN_COUNTERS);
 
 	fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0);
 

From 519b2b14bef70922bd64117a978ea7f2a683b75b Mon Sep 17 00:00:00 2001
From: Chenyuan Yang <chenyuan0y@gmail.com>
Date: Mon, 27 Jan 2025 10:06:55 -0600
Subject: [PATCH 233/368] ALSA: pcm: use new array-copying-wrapper

This is found by our static analysis tool.

pcm_native.c utilizes memdup_user() to copy an array from userspace.

There is a new wrapper, specifically designed for copying arrays. Use
this one instead.

This is similar to the
commit 3e91a38de1dc ("fbdev: viafb: use new array-copying-wrapper").

Signed-off-by: Chenyuan Yang <chenyuan0y@gmail.com>
Link: https://patch.msgid.link/20250127160655.3119470-1-cy1yang@outlook.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/pcm_native.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 8a3384342e8db..6c2b6a62d9d2f 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -3245,7 +3245,7 @@ static int snd_pcm_xfern_frames_ioctl(struct snd_pcm_substream *substream,
 	if (copy_from_user(&xfern, _xfern, sizeof(xfern)))
 		return -EFAULT;
 
-	bufs = memdup_user(xfern.bufs, sizeof(void *) * runtime->channels);
+	bufs = memdup_array_user(xfern.bufs, runtime->channels, sizeof(void *));
 	if (IS_ERR(bufs))
 		return PTR_ERR(bufs);
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)

From a9ab28b3d21aec6d0f56fe722953e20ce470237b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 28 Jan 2025 06:22:58 +0100
Subject: [PATCH 234/368] xfs: remove xfs_buf_cache.bc_lock

xfs_buf_cache.bc_lock serializes adding buffers to and removing them from
the hashtable.  But as the rhashtable code already uses fine grained
internal locking for inserts and removals the extra protection isn't
actually required.

It also happens to fix a lock order inversion vs b_lock added by the
recent lookup race fix.

Fixes: ee10f6fcdb96 ("xfs: fix buffer lookup vs release race")
Reported-by: Lai, Yi <yi1.lai@linux.intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
---
 fs/xfs/xfs_buf.c | 31 +++++++++++++++++--------------
 fs/xfs/xfs_buf.h |  1 -
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index f1252ed8bd0a7..ef207784876c8 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -41,8 +41,7 @@ struct kmem_cache *xfs_buf_cache;
  *
  * xfs_buf_rele:
  *	b_lock
- *	  pag_buf_lock
- *	    lru_lock
+ *	  lru_lock
  *
  * xfs_buftarg_drain_rele
  *	lru_lock
@@ -220,14 +219,21 @@ _xfs_buf_alloc(
 	 */
 	flags &= ~(XBF_UNMAPPED | XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD);
 
-	spin_lock_init(&bp->b_lock);
+	/*
+	 * A new buffer is held and locked by the owner.  This ensures that the
+	 * buffer is owned by the caller and racing RCU lookups right after
+	 * inserting into the hash table are safe (and will have to wait for
+	 * the unlock to do anything non-trivial).
+	 */
 	bp->b_hold = 1;
+	sema_init(&bp->b_sema, 0); /* held, no waiters */
+
+	spin_lock_init(&bp->b_lock);
 	atomic_set(&bp->b_lru_ref, 1);
 	init_completion(&bp->b_iowait);
 	INIT_LIST_HEAD(&bp->b_lru);
 	INIT_LIST_HEAD(&bp->b_list);
 	INIT_LIST_HEAD(&bp->b_li_list);
-	sema_init(&bp->b_sema, 0); /* held, no waiters */
 	bp->b_target = target;
 	bp->b_mount = target->bt_mount;
 	bp->b_flags = flags;
@@ -497,7 +503,6 @@ int
 xfs_buf_cache_init(
 	struct xfs_buf_cache	*bch)
 {
-	spin_lock_init(&bch->bc_lock);
 	return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params);
 }
 
@@ -647,17 +652,20 @@ xfs_buf_find_insert(
 	if (error)
 		goto out_free_buf;
 
-	spin_lock(&bch->bc_lock);
+	/* The new buffer keeps the perag reference until it is freed. */
+	new_bp->b_pag = pag;
+
+	rcu_read_lock();
 	bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash,
 			&new_bp->b_rhash_head, xfs_buf_hash_params);
 	if (IS_ERR(bp)) {
+		rcu_read_unlock();
 		error = PTR_ERR(bp);
-		spin_unlock(&bch->bc_lock);
 		goto out_free_buf;
 	}
 	if (bp && xfs_buf_try_hold(bp)) {
 		/* found an existing buffer */
-		spin_unlock(&bch->bc_lock);
+		rcu_read_unlock();
 		error = xfs_buf_find_lock(bp, flags);
 		if (error)
 			xfs_buf_rele(bp);
@@ -665,10 +673,8 @@ xfs_buf_find_insert(
 			*bpp = bp;
 		goto out_free_buf;
 	}
+	rcu_read_unlock();
 
-	/* The new buffer keeps the perag reference until it is freed. */
-	new_bp->b_pag = pag;
-	spin_unlock(&bch->bc_lock);
 	*bpp = new_bp;
 	return 0;
 
@@ -1085,7 +1091,6 @@ xfs_buf_rele_cached(
 	}
 
 	/* we are asked to drop the last reference */
-	spin_lock(&bch->bc_lock);
 	__xfs_buf_ioacct_dec(bp);
 	if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
 		/*
@@ -1097,7 +1102,6 @@ xfs_buf_rele_cached(
 			bp->b_state &= ~XFS_BSTATE_DISPOSE;
 		else
 			bp->b_hold--;
-		spin_unlock(&bch->bc_lock);
 	} else {
 		bp->b_hold--;
 		/*
@@ -1115,7 +1119,6 @@ xfs_buf_rele_cached(
 		ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
 		rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head,
 				xfs_buf_hash_params);
-		spin_unlock(&bch->bc_lock);
 		if (pag)
 			xfs_perag_put(pag);
 		freebuf = true;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 7e73663c5d4a5..3b4ed42e11c01 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -80,7 +80,6 @@ typedef unsigned int xfs_buf_flags_t;
 #define XFS_BSTATE_IN_FLIGHT	 (1 << 1)	/* I/O in flight */
 
 struct xfs_buf_cache {
-	spinlock_t		bc_lock;
 	struct rhashtable	bc_hash;
 };
 

From 6e74e53b34b6dec5a50e1404e2680852ec6768d2 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <cassel@kernel.org>
Date: Mon, 27 Jan 2025 16:43:04 +0100
Subject: [PATCH 235/368] ata: libata-sff: Ensure that we cannot write outside
 the allocated buffer

reveliofuzzing reported that a SCSI_IOCTL_SEND_COMMAND ioctl with out_len
set to 0xd42, SCSI command set to ATA_16 PASS-THROUGH, ATA command set to
ATA_NOP, and protocol set to ATA_PROT_PIO, can cause ata_pio_sector() to
write outside the allocated buffer, overwriting random memory.

While a ATA device is supposed to abort a ATA_NOP command, there does seem
to be a bug either in libata-sff or QEMU, where either this status is not
set, or the status is cleared before read by ata_sff_hsm_move().
Anyway, that is most likely a separate bug.

Looking at __atapi_pio_bytes(), it already has a safety check to ensure
that __atapi_pio_bytes() cannot write outside the allocated buffer.

Add a similar check to ata_pio_sector(), such that also ata_pio_sector()
cannot write outside the allocated buffer.

Cc: stable@vger.kernel.org
Reported-by: reveliofuzzing <reveliofuzzing@gmail.com>
Closes: https://lore.kernel.org/linux-ide/CA+-ZZ_jTgxh3bS7m+KX07_EWckSnW3N2adX3KV63y4g7M4CZ2A@mail.gmail.com/
Link: https://lore.kernel.org/r/20250127154303.15567-2-cassel@kernel.org
Signed-off-by: Niklas Cassel <cassel@kernel.org>
---
 drivers/ata/libata-sff.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 67f277e1c3bf3..5a46c066abc36 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -601,7 +601,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;
 	struct page *page;
-	unsigned int offset;
+	unsigned int offset, count;
 
 	if (!qc->cursg) {
 		qc->curbytes = qc->nbytes;
@@ -617,25 +617,27 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
 	page = nth_page(page, (offset >> PAGE_SHIFT));
 	offset %= PAGE_SIZE;
 
-	trace_ata_sff_pio_transfer_data(qc, offset, qc->sect_size);
+	/* don't overrun current sg */
+	count = min(qc->cursg->length - qc->cursg_ofs, qc->sect_size);
+
+	trace_ata_sff_pio_transfer_data(qc, offset, count);
 
 	/*
 	 * Split the transfer when it splits a page boundary.  Note that the
 	 * split still has to be dword aligned like all ATA data transfers.
 	 */
 	WARN_ON_ONCE(offset % 4);
-	if (offset + qc->sect_size > PAGE_SIZE) {
+	if (offset + count > PAGE_SIZE) {
 		unsigned int split_len = PAGE_SIZE - offset;
 
 		ata_pio_xfer(qc, page, offset, split_len);
-		ata_pio_xfer(qc, nth_page(page, 1), 0,
-			     qc->sect_size - split_len);
+		ata_pio_xfer(qc, nth_page(page, 1), 0, count - split_len);
 	} else {
-		ata_pio_xfer(qc, page, offset, qc->sect_size);
+		ata_pio_xfer(qc, page, offset, count);
 	}
 
-	qc->curbytes += qc->sect_size;
-	qc->cursg_ofs += qc->sect_size;
+	qc->curbytes += count;
+	qc->cursg_ofs += count;
 
 	if (qc->cursg_ofs == qc->cursg->length) {
 		qc->cursg = sg_next(qc->cursg);

From d63b0e8a628e62ca85a0f7915230186bb92f8bb4 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Tue, 28 Jan 2025 00:55:24 +0000
Subject: [PATCH 236/368] io_uring: fix multishots with selected buffers

We do io_kbuf_recycle() when arming a poll but every iteration of a
multishot can grab more buffers, which is why we need to flush the kbuf
ring state before continuing with waiting.

Cc: stable@vger.kernel.org
Fixes: b3fdea6ecb55c ("io_uring: multishot recv")
Reported-by: Muhammad Ramdhan <ramdhan@starlabs.sg>
Reported-by: Bing-Jhong Billy Jheng <billy@starlabs.sg>
Reported-by: Jacob Soo <jacob.soo@starlabs.sg>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/1bfc9990fe435f1fc6152ca9efeba5eb3e68339c.1738025570.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/poll.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/io_uring/poll.c b/io_uring/poll.c
index 356474c66f324..31b118133bb04 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -315,8 +315,10 @@ void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts)
 
 	ret = io_poll_check_events(req, ts);
 	if (ret == IOU_POLL_NO_ACTION) {
+		io_kbuf_recycle(req, 0);
 		return;
 	} else if (ret == IOU_POLL_REQUEUE) {
+		io_kbuf_recycle(req, 0);
 		__io_poll_execute(req, 0);
 		return;
 	}

From 5ce1e9bbb2a1d43cf9e613cb03e65ecdfd309fe9 Mon Sep 17 00:00:00 2001
From: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Date: Fri, 17 Jan 2025 13:50:29 +0100
Subject: [PATCH 237/368] tools/power turbostat: Add CPU%c1e BIC for CWF

Intel Clearwater Forest report PMT telemetry with GUID 0x14421519, which
can be used to obtain module c1e residency counter of type tcore clock.

Add early support for the counter by using heuristic that should work
for the Clearwater Forest platforms.

Signed-off-by: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 68 +++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 8df08819e7b41..364a44a7d7aee 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -205,6 +205,7 @@ struct msr_counter bic[] = {
 	{ 0x0, "SysWatt", NULL, 0, 0, 0, NULL, 0 },
 	{ 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 },
 	{ 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 },
+	{ 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 },
 };
 
 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -270,6 +271,7 @@ struct msr_counter bic[] = {
 #define	BIC_SysWatt		(1ULL << 59)
 #define	BIC_Sys_J		(1ULL << 60)
 #define	BIC_NMI			(1ULL << 61)
+#define	BIC_CPU_c1e		(1ULL << 62)
 
 #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die)
 #define BIC_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt)
@@ -1538,6 +1540,14 @@ static struct msr_counter_arch_info msr_counter_arch_infos[] = {
 #define PMT_MTL_DC6_GUID           0x1a067102
 #define PMT_MTL_DC6_SEQ            0
 
+#define PMT_COUNTER_CWF_MC1E_OFFSET_BASE          20936
+#define PMT_COUNTER_CWF_MC1E_OFFSET_INCREMENT     24
+#define PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE 12
+#define PMT_COUNTER_CWF_CPUS_PER_MODULE           4
+#define PMT_COUNTER_CWF_MC1E_LSB                  0
+#define PMT_COUNTER_CWF_MC1E_MSB                  63
+#define PMT_CWF_MC1E_GUID                         0x14421519
+
 unsigned long long tcore_clock_freq_hz = 800000000;
 
 #define PMT_COUNTER_NAME_SIZE_BYTES      16
@@ -9367,11 +9377,69 @@ int pmt_add_counter(unsigned int guid, unsigned int seq, const char *name, enum
 
 void pmt_init(void)
 {
+	int cpu_num;
+	unsigned long seq, offset, mod_num;
+
 	if (BIC_IS_ENABLED(BIC_Diec6)) {
 		pmt_add_counter(PMT_MTL_DC6_GUID, PMT_MTL_DC6_SEQ, "Die%c6", PMT_TYPE_XTAL_TIME,
 				PMT_COUNTER_MTL_DC6_LSB, PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET,
 				SCOPE_PACKAGE, FORMAT_DELTA, 0, PMT_OPEN_TRY);
 	}
+
+	if (BIC_IS_ENABLED(BIC_CPU_c1e)) {
+		seq = 0;
+		offset = PMT_COUNTER_CWF_MC1E_OFFSET_BASE;
+		mod_num = 0;	/* Relative module number for current PMT file. */
+
+		/* Open the counter for each CPU. */
+		for (cpu_num = 0; cpu_num < topo.max_cpu_num;) {
+
+			if (cpu_is_not_allowed(cpu_num))
+				goto next_loop_iter;
+
+			/*
+			 * Set the scope to CPU, even though CWF report the counter per module.
+			 * CPUs inside the same module will read from the same location, instead of reporting zeros.
+			 *
+			 * CWF with newer firmware might require a PMT_TYPE_XTAL_TIME intead of PMT_TYPE_TCORE_CLOCK.
+			 */
+			pmt_add_counter(PMT_CWF_MC1E_GUID, seq, "CPU%c1e", PMT_TYPE_TCORE_CLOCK,
+					PMT_COUNTER_CWF_MC1E_LSB, PMT_COUNTER_CWF_MC1E_MSB, offset, SCOPE_CPU,
+					FORMAT_DELTA, cpu_num, PMT_OPEN_TRY);
+
+			/*
+			 * Rather complex logic for each time we go to the next loop iteration,
+			 * so keep it as a label.
+			 */
+next_loop_iter:
+			/*
+			 * Advance the cpu number and check if we should also advance offset to
+			 * the next counter inside the PMT file.
+			 *
+			 * On Clearwater Forest platform, the counter is reported per module,
+			 * so open the same counter for all of the CPUs inside the module.
+			 * That way, reported table show the correct value for all of the CPUs inside the module,
+			 * instead of zeros.
+			 */
+			++cpu_num;
+			if (cpu_num % PMT_COUNTER_CWF_CPUS_PER_MODULE == 0) {
+				offset += PMT_COUNTER_CWF_MC1E_OFFSET_INCREMENT;
+				++mod_num;
+			}
+
+			/*
+			 * There are PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE in each PMT file.
+			 *
+			 * If that number is reached, seq must be incremented to advance to the next file in a sequence.
+			 * Offset inside that file and a module counter has to be reset.
+			 */
+			if (mod_num == PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE) {
+				++seq;
+				offset = PMT_COUNTER_CWF_MC1E_OFFSET_BASE;
+				mod_num = 0;
+			}
+		}
+	}
 }
 
 void turbostat_init()

From 3b4309546b48fc167aa615a2d881a09c0a97971f Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Wed, 29 Jan 2025 00:54:15 +0800
Subject: [PATCH 238/368] ALSA: hda: Fix headset detection failure due to
 unstable sort

The auto_parser assumed sort() was stable, but the kernel's sort() uses
heapsort, which has never been stable. After commit 0e02ca29a563
("lib/sort: optimize heapsort with double-pop variation"), the order of
equal elements changed, causing the headset to fail to work.

Fix the issue by recording the original order of elements before
sorting and using it as a tiebreaker for equal elements in the
comparison function.

Fixes: b9030a005d58 ("ALSA: hda - Use standard sort function in hda_auto_parser.c")
Reported-by: Austrum <austrum.lab@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219158
Tested-by: Austrum <austrum.lab@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Link: https://patch.msgid.link/20250128165415.643223-1-visitorckw@gmail.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_auto_parser.c | 8 +++++++-
 sound/pci/hda/hda_auto_parser.h | 1 +
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/hda_auto_parser.c b/sound/pci/hda/hda_auto_parser.c
index 84393f4f429df..8923813ce4247 100644
--- a/sound/pci/hda/hda_auto_parser.c
+++ b/sound/pci/hda/hda_auto_parser.c
@@ -80,7 +80,11 @@ static int compare_input_type(const void *ap, const void *bp)
 
 	/* In case one has boost and the other one has not,
 	   pick the one with boost first. */
-	return (int)(b->has_boost_on_pin - a->has_boost_on_pin);
+	if (a->has_boost_on_pin != b->has_boost_on_pin)
+		return (int)(b->has_boost_on_pin - a->has_boost_on_pin);
+
+	/* Keep the original order */
+	return a->order - b->order;
 }
 
 /* Reorder the surround channels
@@ -400,6 +404,8 @@ int snd_hda_parse_pin_defcfg(struct hda_codec *codec,
 	reorder_outputs(cfg->speaker_outs, cfg->speaker_pins);
 
 	/* sort inputs in the order of AUTO_PIN_* type */
+	for (i = 0; i < cfg->num_inputs; i++)
+		cfg->inputs[i].order = i;
 	sort(cfg->inputs, cfg->num_inputs, sizeof(cfg->inputs[0]),
 	     compare_input_type, NULL);
 
diff --git a/sound/pci/hda/hda_auto_parser.h b/sound/pci/hda/hda_auto_parser.h
index 579b11beac718..87af3d8c02f7f 100644
--- a/sound/pci/hda/hda_auto_parser.h
+++ b/sound/pci/hda/hda_auto_parser.h
@@ -37,6 +37,7 @@ struct auto_pin_cfg_item {
 	unsigned int is_headset_mic:1;
 	unsigned int is_headphone_mic:1; /* Mic-only in headphone jack */
 	unsigned int has_boost_on_pin:1;
+	int order;
 };
 
 struct auto_pin_cfg;

From c7b87ce0dd10b64b68a0b22cb83bbd556e28fe81 Mon Sep 17 00:00:00 2001
From: Howard Chu <howardchu95@gmail.com>
Date: Tue, 21 Jan 2025 18:55:19 -0800
Subject: [PATCH 239/368] perf trace: Fix runtime error of index out of bounds

libtraceevent parses and returns an array of argument fields, sometimes
larger than RAW_SYSCALL_ARGS_NUM (6) because it includes "__syscall_nr",
idx will traverse to index 6 (7th element) whereas sc->fmt->arg holds 6
elements max, creating an out-of-bounds access. This runtime error is
found by UBsan. The error message:

  $ sudo UBSAN_OPTIONS=print_stacktrace=1 ./perf trace -a --max-events=1
  builtin-trace.c:1966:35: runtime error: index 6 out of bounds for type 'syscall_arg_fmt [6]'
    #0 0x5c04956be5fe in syscall__alloc_arg_fmts /home/howard/hw/linux-perf/tools/perf/builtin-trace.c:1966
    #1 0x5c04956c0510 in trace__read_syscall_info /home/howard/hw/linux-perf/tools/perf/builtin-trace.c:2110
    #2 0x5c04956c372b in trace__syscall_info /home/howard/hw/linux-perf/tools/perf/builtin-trace.c:2436
    #3 0x5c04956d2f39 in trace__init_syscalls_bpf_prog_array_maps /home/howard/hw/linux-perf/tools/perf/builtin-trace.c:3897
    #4 0x5c04956d6d25 in trace__run /home/howard/hw/linux-perf/tools/perf/builtin-trace.c:4335
    #5 0x5c04956e112e in cmd_trace /home/howard/hw/linux-perf/tools/perf/builtin-trace.c:5502
    #6 0x5c04956eda7d in run_builtin /home/howard/hw/linux-perf/tools/perf/perf.c:351
    #7 0x5c04956ee0a8 in handle_internal_command /home/howard/hw/linux-perf/tools/perf/perf.c:404
    #8 0x5c04956ee37f in run_argv /home/howard/hw/linux-perf/tools/perf/perf.c:448
    #9 0x5c04956ee8e9 in main /home/howard/hw/linux-perf/tools/perf/perf.c:556
    #10 0x79eb3622a3b7 in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58
    #11 0x79eb3622a47a in __libc_start_main_impl ../csu/libc-start.c:360
    #12 0x5c04955422d4 in _start (/home/howard/hw/linux-perf/tools/perf/perf+0x4e02d4) (BuildId: 5b6cab2d59e96a4341741765ad6914a4d784dbc6)

     0.000 ( 0.014 ms): Chrome_ChildIO/117244 write(fd: 238, buf: !, count: 1)                                      = 1

Fixes: 5e58fcfaf4c6 ("perf trace: Allow allocating sc->arg_fmt even without the syscall tracepoint")
Signed-off-by: Howard Chu <howardchu95@gmail.com>
Link: https://lore.kernel.org/r/20250122025519.361873-1-howardchu95@gmail.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/builtin-trace.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index d7c7d29291fbf..d466447ae928a 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2107,8 +2107,12 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 		return PTR_ERR(sc->tp_format);
 	}
 
+	/*
+	 * The tracepoint format contains __syscall_nr field, so it's one more
+	 * than the actual number of syscall arguments.
+	 */
 	if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ?
-					RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields))
+					RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields - 1))
 		return -ENOMEM;
 
 	sc->args = sc->tp_format->format.fields;

From 72d81e10628be6a948463259cbb6d3b670b20054 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Tue, 28 Jan 2025 09:06:29 -0800
Subject: [PATCH 240/368] perf test: Skip syscall enum test if no landlock
 syscall

The perf trace enum augmentation test specifically targets landlock_
add_rule syscall but IIUC it's an optional and can be opt-out by a
kernel config.

Currently trace_landlock() runs `perf test -w landlock` before the
actual testing to check the availability but it's not enough since the
workload always returns 0.  Instead it could check if perf trace output
has 'landlock' string.

Fixes: d66763fed30f0bd8c ("perf test trace_btf_enum: Add regression test for the BTF augmentation of enums in 'perf trace'")
Reviewed-by: Howard Chu <howardchu95@gmail.com>
Link: https://lore.kernel.org/r/20250128170629.1251574-1-namhyung@kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/tests/shell/trace_btf_enum.sh | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tools/perf/tests/shell/trace_btf_enum.sh b/tools/perf/tests/shell/trace_btf_enum.sh
index 5a3b8a5a9b5cf..8d1e6bbeac906 100755
--- a/tools/perf/tests/shell/trace_btf_enum.sh
+++ b/tools/perf/tests/shell/trace_btf_enum.sh
@@ -26,8 +26,12 @@ check_vmlinux() {
 trace_landlock() {
   echo "Tracing syscall ${syscall}"
 
-  # test flight just to see if landlock_add_rule and libbpf are available
-  $TESTPROG
+  # test flight just to see if landlock_add_rule is available
+  if ! perf trace $TESTPROG 2>&1 | grep -q landlock
+  then
+    echo "No landlock system call found, skipping to non-syscall tracing."
+    return
+  fi
 
   if perf trace -e $syscall $TESTPROG 2>&1 | \
      grep -q -E ".*landlock_add_rule\(ruleset_fd: 11, rule_type: (LANDLOCK_RULE_PATH_BENEATH|LANDLOCK_RULE_NET_PORT), rule_attr: 0x[a-f0-9]+, flags: 45\) = -1.*"

From 9fae5884bb0e3480dbb69314b82ed3d8f8482eef Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@linaro.org>
Date: Wed, 18 Dec 2024 11:55:51 +0000
Subject: [PATCH 241/368] perf cpumap: Fix die and cluster IDs

Now that filename__read_int() returns -errno instead of -1 these
statements need to be updated otherwise error values will be used as
die IDs.

This appears as a -2 die ID when the platform doesn't export one:

  $ perf stat --per-core -a -- true

  S36-D-2-C0            1               9.45 msec cpu-clock

And the session topology test fails:

  $ perf test -vvv topology

  CPU 0, core 0, socket 36
  CPU 1, core 1, socket 36
  CPU 2, core 2, socket 36
  CPU 3, core 3, socket 36
  FAILED tests/topology.c:137 Cpu map - Die ID doesn't match
  ---- end(-1) ----
  38: Session topology                                                : FAILED!

Fixes: 05be17eed774 ("tool api fs: Correctly encode errno for read/write open failures")
Reported-by: Thomas Richter <tmricht@linux.ibm.com>
Signed-off-by: James Clark <james.clark@linaro.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20241218115552.912517-1-james.clark@linaro.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/cpumap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 27094211edd8a..5c329ad614e9b 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -293,7 +293,7 @@ struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data)
 
 	die = cpu__get_die_id(cpu);
 	/* There is no die_id on legacy system. */
-	if (die == -1)
+	if (die < 0)
 		die = 0;
 
 	/*
@@ -322,7 +322,7 @@ struct aggr_cpu_id aggr_cpu_id__cluster(struct perf_cpu cpu, void *data)
 	struct aggr_cpu_id id;
 
 	/* There is no cluster_id on legacy system. */
-	if (cluster == -1)
+	if (cluster < 0)
 		cluster = 0;
 
 	id = aggr_cpu_id__die(cpu, data);

From f214b7beb00621b983e67ce97477afc3ab4b38f4 Mon Sep 17 00:00:00 2001
From: Jay Cornwall <jay.cornwall@amd.com>
Date: Thu, 16 Jan 2025 14:36:39 -0600
Subject: [PATCH 242/368] drm/amdkfd: Block per-queue reset when
 halt_if_hws_hang=1

The purpose of halt_if_hws_hang is to preserve GPU state for driver
debugging when queue preemption fails. Issuing per-queue reset may
kill wavefronts which caused the preemption failure.

Signed-off-by: Jay Cornwall <jay.cornwall@amd.com>
Reviewed-by: Jonathan Kim <Jonathan.Kim@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org # 6.12.x
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 1405e8affd484..d4593374e7a1e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2325,9 +2325,9 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
 	 */
 	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
 	if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd)) {
+		while (halt_if_hws_hang)
+			schedule();
 		if (reset_queues_on_hws_hang(dqm)) {
-			while (halt_if_hws_hang)
-				schedule();
 			dqm->is_hws_hang = true;
 			kfd_hws_hang(dqm);
 			retval = -ETIME;

From 5cda56bd86c455341087dca29c65dc7c87f84340 Mon Sep 17 00:00:00 2001
From: Kenneth Feng <kenneth.feng@amd.com>
Date: Mon, 20 Jan 2025 15:33:03 +0800
Subject: [PATCH 243/368] drm/amd/amdgpu: change the config of cgcg on gfx12

change the config of cgcg on gfx12

Signed-off-by: Kenneth Feng <kenneth.feng@amd.com>
Reviewed-by: Yang Wang <kevinyang.wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org # 6.12.x
---
 drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index c1772f44b1d74..2523221a2519d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -4021,17 +4021,6 @@ static void gfx_v12_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade
 
 		if (def != data)
 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
-
-		data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
-		data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
-		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
-
-		/* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
-		if (adev->sdma.num_instances > 1) {
-			data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
-			data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
-			WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
-		}
 	}
 }
 

From 819bf6662b93a5a8b0c396d2c7e7fab6264c9808 Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar@amd.com>
Date: Wed, 22 Jan 2025 09:12:41 +0530
Subject: [PATCH 244/368] drm/amd/pm: Mark MM activity as unsupported

Aldebaran doesn't support querying MM activity percentage. Keep the
field as 0xFFs to mark it as unsupported.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index f6b0293543275..83163d7c7f001 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -1732,7 +1732,6 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
 
 	gpu_metrics->average_gfx_activity = metrics.AverageGfxActivity;
 	gpu_metrics->average_umc_activity = metrics.AverageUclkActivity;
-	gpu_metrics->average_mm_activity = 0;
 
 	/* Valid power data is available only from primary die */
 	if (aldebaran_is_primary(smu)) {

From f88192d2335b5a911fcfa09338cc00624571ec5e Mon Sep 17 00:00:00 2001
From: loanchen <lo-an.chen@amd.com>
Date: Wed, 15 Jan 2025 17:43:29 +0800
Subject: [PATCH 245/368] drm/amd/display: Correct register address in dcn35

[Why]
the offset address of mmCLK5_spll_field_8 was incorrect for dcn35
which causes SSC not to be enabled.

Reviewed-by: Charlene Liu <charlene.liu@amd.com>
Signed-off-by: Lo-An Chen <lo-an.chen@amd.com>
Signed-off-by: Zaeem Mohamed <zaeem.mohamed@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index 1f974ea3b0c65..1648226586e22 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -89,7 +89,7 @@
 #define mmCLK1_CLK4_ALLOW_DS 0x16EA8
 #define mmCLK1_CLK5_ALLOW_DS 0x16EB1
 
-#define mmCLK5_spll_field_8 0x1B04B
+#define mmCLK5_spll_field_8 0x1B24B
 #define mmDENTIST_DISPCLK_CNTL 0x0124
 #define regDENTIST_DISPCLK_CNTL 0x0064
 #define regDENTIST_DISPCLK_CNTL_BASE_IDX 1

From 9078a5bfa21e78ae68b6d7c365d1b92f26720c55 Mon Sep 17 00:00:00 2001
From: Prike Liang <Prike.Liang@amd.com>
Date: Tue, 14 Jan 2025 11:20:17 +0800
Subject: [PATCH 246/368] drm/amdkfd: only flush the validate MES contex

The following page fault was observed duringthe KFD process release.
In this particular error case, the HIP test (./MemcpyPerformance -h)
does not require the queue. As a result, the process_context_addr was
not assigned when the KFD process was released, ultimately leading to
this page fault during the execution of the function
kfd_process_dequeue_from_all_devices().

[345962.294891] amdgpu 0000:03:00.0: amdgpu: [gfxhub] page fault (src_id:0 ring:153 vmid:0 pasid:0)
[345962.295333] amdgpu 0000:03:00.0: amdgpu:   in page starting at address 0x0000000000000000 from client 10
[345962.295775] amdgpu 0000:03:00.0: amdgpu: GCVM_L2_PROTECTION_FAULT_STATUS:0x00000B33
[345962.296097] amdgpu 0000:03:00.0: amdgpu:     Faulty UTCL2 client ID: CPC (0x5)
[345962.296394] amdgpu 0000:03:00.0: amdgpu:     MORE_FAULTS: 0x1
[345962.296633] amdgpu 0000:03:00.0: amdgpu:     WALKER_ERROR: 0x1
[345962.296876] amdgpu 0000:03:00.0: amdgpu:     PERMISSION_FAULTS: 0x3
[345962.297135] amdgpu 0000:03:00.0: amdgpu:     MAPPING_ERROR: 0x1
[345962.297377] amdgpu 0000:03:00.0: amdgpu:     RW: 0x0
[345962.297682] amdgpu 0000:03:00.0: amdgpu: [gfxhub] page fault (src_id:0 ring:169 vmid:0 pasid:0)

Signed-off-by: Prike Liang <Prike.Liang@amd.com>
Reviewed-by: Jonathan Kim <jonathan.kim@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 9df56f8e09f91..bcddd989c7f39 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -86,9 +86,12 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
 
 	if (pdd->already_dequeued)
 		return;
-
+	/* The MES context flush needs to filter out the case which the
+	 * KFD process is created without setting up the MES context and
+	 * queue for creating a compute queue.
+	 */
 	dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
-	if (dev->kfd->shared_resources.enable_mes &&
+	if (dev->kfd->shared_resources.enable_mes && !!pdd->proc_ctx_gpu_addr &&
 	    down_read_trylock(&dev->adev->reset_domain->sem)) {
 		amdgpu_mes_flush_shader_debugger(dev->adev,
 						 pdd->proc_ctx_gpu_addr);

From 7f2b5237e313e39008a85b33ca94ab503a8fdff9 Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Mon, 27 Jan 2025 21:41:10 -0300
Subject: [PATCH 247/368] drm/amd/display: restore invalid MSA timing check for
 freesync

This restores the original behavior that gets min/max freq from EDID and
only set DP/eDP connector as freesync capable if "sink device is capable
of rendering incoming video stream without MSA timing parameters", i.e.,
`allow_invalid_MSA_timing_params` is true. The condition was mistakenly
removed by 0159f88a99c9 ("drm/amd/display: remove redundant freesync
parser for DP").

CC: Mario Limonciello <mario.limonciello@amd.com>
CC: Alex Hung <alex.hung@amd.com>
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3915
Fixes: 0159f88a99c9 ("drm/amd/display: remove redundant freesync parser for DP")
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index b0e66c05d8111..ac3fd81fecef2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -12326,10 +12326,14 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
 
 	if (edid && (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT ||
 		     sink->sink_signal == SIGNAL_TYPE_EDP)) {
-		amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq;
-		amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq;
-		if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10)
-			freesync_capable = true;
+		if (amdgpu_dm_connector->dc_link &&
+		    amdgpu_dm_connector->dc_link->dpcd_caps.allow_invalid_MSA_timing_param) {
+			amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq;
+			amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq;
+			if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10)
+				freesync_capable = true;
+		}
+
 		parse_amd_vsdb(amdgpu_dm_connector, edid, &vsdb_info);
 
 		if (vsdb_info.replay_mode) {

From 299276502d41cd86376f47b7e087d017eaa0f914 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Tue, 28 Jan 2025 20:56:09 +0000
Subject: [PATCH 248/368] io_uring: include all deps for alloc_cache.h

alloc_cache.h uses types it doesn't declare and thus depends on the
order in which it's included. Make it self contained and pull all needed
definitions.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Reviewed-by: Gabriel Krisman Bertazi <krisman@suse.de>
Link: https://lore.kernel.org/r/39569f3d5b250b4fe78bb609d57f67d3736ebcc4.1738087204.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/alloc_cache.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/io_uring/alloc_cache.h b/io_uring/alloc_cache.h
index cca96aff3277e..28436f413bd2c 100644
--- a/io_uring/alloc_cache.h
+++ b/io_uring/alloc_cache.h
@@ -1,6 +1,8 @@
 #ifndef IOU_ALLOC_CACHE_H
 #define IOU_ALLOC_CACHE_H
 
+#include <linux/io_uring_types.h>
+
 /*
  * Don't allow the cache to grow beyond this size.
  */

From 16ac51a0a7aa051fd3b82fa077597488b5572d41 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Tue, 28 Jan 2025 20:56:10 +0000
Subject: [PATCH 249/368] io_uring: dont ifdef io_alloc_cache_kasan()

Use IS_ENABLED in io_alloc_cache_kasan() so at least it gets compile
tested without KASAN.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Reviewed-by: Gabriel Krisman Bertazi <krisman@suse.de>
Link: https://lore.kernel.org/r/35e53e83f6e16478dca0028a64a6cc905dc764d3.1738087204.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/alloc_cache.h | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/io_uring/alloc_cache.h b/io_uring/alloc_cache.h
index 28436f413bd2c..9eb374ad7490c 100644
--- a/io_uring/alloc_cache.h
+++ b/io_uring/alloc_cache.h
@@ -8,18 +8,14 @@
  */
 #define IO_ALLOC_CACHE_MAX	128
 
-#if defined(CONFIG_KASAN)
-static inline void io_alloc_cache_kasan(struct iovec **iov, int *nr)
-{
-	kfree(*iov);
-	*iov = NULL;
-	*nr = 0;
-}
-#else
 static inline void io_alloc_cache_kasan(struct iovec **iov, int *nr)
 {
+	if (IS_ENABLED(CONFIG_KASAN)) {
+		kfree(*iov);
+		*iov = NULL;
+		*nr = 0;
+	}
 }
-#endif
 
 static inline bool io_alloc_cache_put(struct io_alloc_cache *cache,
 				      void *entry)

From d19af0e9366298aa60afc0fb51ffcbd6205edcee Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Tue, 28 Jan 2025 20:56:11 +0000
Subject: [PATCH 250/368] io_uring: add alloc_cache.c

Avoid inlining all and everything from alloc_cache.h and move cold bits
into a new file.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Reviewed-by: Gabriel Krisman Bertazi <krisman@suse.de>
Link: https://lore.kernel.org/r/06984c6cd58e703f7cfae5ab3067912f9f635a06.1738087204.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/Makefile      |  2 +-
 io_uring/alloc_cache.c | 44 ++++++++++++++++++++++++++++++++++++++++++
 io_uring/alloc_cache.h | 44 +++++++++---------------------------------
 3 files changed, 54 insertions(+), 36 deletions(-)
 create mode 100644 io_uring/alloc_cache.c

diff --git a/io_uring/Makefile b/io_uring/Makefile
index 53167bef37d77..d695b60dba4f0 100644
--- a/io_uring/Makefile
+++ b/io_uring/Makefile
@@ -13,7 +13,7 @@ obj-$(CONFIG_IO_URING)		+= io_uring.o opdef.o kbuf.o rsrc.o notif.o \
 					sync.o msg_ring.o advise.o openclose.o \
 					epoll.o statx.o timeout.o fdinfo.o \
 					cancel.o waitid.o register.o \
-					truncate.o memmap.o
+					truncate.o memmap.o alloc_cache.o
 obj-$(CONFIG_IO_WQ)		+= io-wq.o
 obj-$(CONFIG_FUTEX)		+= futex.o
 obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o
diff --git a/io_uring/alloc_cache.c b/io_uring/alloc_cache.c
new file mode 100644
index 0000000000000..58423888b736e
--- /dev/null
+++ b/io_uring/alloc_cache.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "alloc_cache.h"
+
+void io_alloc_cache_free(struct io_alloc_cache *cache,
+			 void (*free)(const void *))
+{
+	void *entry;
+
+	if (!cache->entries)
+		return;
+
+	while ((entry = io_alloc_cache_get(cache)) != NULL)
+		free(entry);
+
+	kvfree(cache->entries);
+	cache->entries = NULL;
+}
+
+/* returns false if the cache was initialized properly */
+bool io_alloc_cache_init(struct io_alloc_cache *cache,
+			 unsigned max_nr, unsigned int size,
+			 unsigned int init_bytes)
+{
+	cache->entries = kvmalloc_array(max_nr, sizeof(void *), GFP_KERNEL);
+	if (!cache->entries)
+		return true;
+
+	cache->nr_cached = 0;
+	cache->max_cached = max_nr;
+	cache->elem_size = size;
+	cache->init_clear = init_bytes;
+	return false;
+}
+
+void *io_cache_alloc_new(struct io_alloc_cache *cache, gfp_t gfp)
+{
+	void *obj;
+
+	obj = kmalloc(cache->elem_size, gfp);
+	if (obj && cache->init_clear)
+		memset(obj, 0, cache->init_clear);
+	return obj;
+}
diff --git a/io_uring/alloc_cache.h b/io_uring/alloc_cache.h
index 9eb374ad7490c..0dd17d8ba93a8 100644
--- a/io_uring/alloc_cache.h
+++ b/io_uring/alloc_cache.h
@@ -8,6 +8,14 @@
  */
 #define IO_ALLOC_CACHE_MAX	128
 
+void io_alloc_cache_free(struct io_alloc_cache *cache,
+			 void (*free)(const void *));
+bool io_alloc_cache_init(struct io_alloc_cache *cache,
+			 unsigned max_nr, unsigned int size,
+			 unsigned int init_bytes);
+
+void *io_cache_alloc_new(struct io_alloc_cache *cache, gfp_t gfp);
+
 static inline void io_alloc_cache_kasan(struct iovec **iov, int *nr)
 {
 	if (IS_ENABLED(CONFIG_KASAN)) {
@@ -57,41 +65,7 @@ static inline void *io_cache_alloc(struct io_alloc_cache *cache, gfp_t gfp)
 	obj = io_alloc_cache_get(cache);
 	if (obj)
 		return obj;
-
-	obj = kmalloc(cache->elem_size, gfp);
-	if (obj && cache->init_clear)
-		memset(obj, 0, cache->init_clear);
-	return obj;
-}
-
-/* returns false if the cache was initialized properly */
-static inline bool io_alloc_cache_init(struct io_alloc_cache *cache,
-				       unsigned max_nr, unsigned int size,
-				       unsigned int init_bytes)
-{
-	cache->entries = kvmalloc_array(max_nr, sizeof(void *), GFP_KERNEL);
-	if (cache->entries) {
-		cache->nr_cached = 0;
-		cache->max_cached = max_nr;
-		cache->elem_size = size;
-		cache->init_clear = init_bytes;
-		return false;
-	}
-	return true;
+	return io_cache_alloc_new(cache, gfp);
 }
 
-static inline void io_alloc_cache_free(struct io_alloc_cache *cache,
-				       void (*free)(const void *))
-{
-	void *entry;
-
-	if (!cache->entries)
-		return;
-
-	while ((entry = io_alloc_cache_get(cache)) != NULL)
-		free(entry);
-
-	kvfree(cache->entries);
-	cache->entries = NULL;
-}
 #endif

From fefcb0dcd02fd34f808e91b13ce25f9847e52eb9 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Tue, 28 Jan 2025 20:56:12 +0000
Subject: [PATCH 251/368] io_uring/net: make io_net_vec_assign() return void

io_net_vec_assign() can only return 0 and it doesn't make sense for it
to fail, so make it return void.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Reviewed-by: Gabriel Krisman Bertazi <krisman@suse.de>
Link: https://lore.kernel.org/r/7c1a2390c99e17d3ae4e8562063e572d3cdeb164.1738087204.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/net.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/io_uring/net.c b/io_uring/net.c
index 41eef286f8b9a..e72205802055f 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -168,7 +168,7 @@ static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req)
 }
 
 /* assign new iovec to kmsg, if we need to */
-static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg,
+static void io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg,
 			     struct iovec *iov)
 {
 	if (iov) {
@@ -178,7 +178,6 @@ static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg,
 			kfree(kmsg->free_iov);
 		kmsg->free_iov = iov;
 	}
-	return 0;
 }
 
 static inline void io_mshot_prep_retry(struct io_kiocb *req,
@@ -240,7 +239,8 @@ static int io_compat_msg_copy_hdr(struct io_kiocb *req,
 	if (unlikely(ret < 0))
 		return ret;
 
-	return io_net_vec_assign(req, iomsg, iov);
+	io_net_vec_assign(req, iomsg, iov);
+	return 0;
 }
 #endif
 
@@ -299,7 +299,8 @@ static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
 	if (unlikely(ret < 0))
 		return ret;
 
-	return io_net_vec_assign(req, iomsg, iov);
+	io_net_vec_assign(req, iomsg, iov);
+	return 0;
 }
 
 static int io_sendmsg_copy_hdr(struct io_kiocb *req,

From 2b350f756b7acf84afab31d65ce6e3d496213ae5 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Tue, 28 Jan 2025 20:56:13 +0000
Subject: [PATCH 252/368] io_uring/net: clean io_msg_copy_hdr()

Put msg->msg_iov into a local variable in io_msg_copy_hdr(), it reads
better and clearly shows the used types.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Reviewed-by: Gabriel Krisman Bertazi <krisman@suse.de>
Link: https://lore.kernel.org/r/6a5d4f7a96b10e571d6128be010166b3aaf7afd5.1738087204.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/net.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/io_uring/net.c b/io_uring/net.c
index e72205802055f..dedf274fc049a 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -280,11 +280,12 @@ static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
 			ret = -EINVAL;
 			goto ua_end;
 		} else {
+			struct iovec __user *uiov = msg->msg_iov;
+
 			/* we only need the length for provided buffers */
-			if (!access_ok(&msg->msg_iov[0].iov_len, sizeof(__kernel_size_t)))
+			if (!access_ok(&uiov->iov_len, sizeof(uiov->iov_len)))
 				goto ua_end;
-			unsafe_get_user(iov->iov_len, &msg->msg_iov[0].iov_len,
-					ua_end);
+			unsafe_get_user(iov->iov_len, &uiov->iov_len, ua_end);
 			sr->len = iov->iov_len;
 		}
 		ret = 0;

From 86e62354eef16993834be5bd218d38ec96c47f16 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Tue, 28 Jan 2025 20:56:14 +0000
Subject: [PATCH 253/368] io_uring/net: extract io_send_select_buffer()

Extract a helper out of io_send() for provided buffer selection to
improve readability as it has grown to take too many lines.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Reviewed-by: Gabriel Krisman Bertazi <krisman@suse.de>
Link: https://lore.kernel.org/r/26a769cdabd61af7f40c5d88a22469c5ad071796.1738087204.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/net.c | 87 +++++++++++++++++++++++++++++---------------------
 1 file changed, 50 insertions(+), 37 deletions(-)

diff --git a/io_uring/net.c b/io_uring/net.c
index dedf274fc049a..4d21f7bd2149e 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -566,6 +566,54 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
 	return IOU_OK;
 }
 
+static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags,
+				 struct io_async_msghdr *kmsg)
+{
+	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+
+	int ret;
+	struct buf_sel_arg arg = {
+		.iovs = &kmsg->fast_iov,
+		.max_len = min_not_zero(sr->len, INT_MAX),
+		.nr_iovs = 1,
+	};
+
+	if (kmsg->free_iov) {
+		arg.nr_iovs = kmsg->free_iov_nr;
+		arg.iovs = kmsg->free_iov;
+		arg.mode = KBUF_MODE_FREE;
+	}
+
+	if (!(sr->flags & IORING_RECVSEND_BUNDLE))
+		arg.nr_iovs = 1;
+	else
+		arg.mode |= KBUF_MODE_EXPAND;
+
+	ret = io_buffers_select(req, &arg, issue_flags);
+	if (unlikely(ret < 0))
+		return ret;
+
+	if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
+		kmsg->free_iov_nr = ret;
+		kmsg->free_iov = arg.iovs;
+		req->flags |= REQ_F_NEED_CLEANUP;
+	}
+	sr->len = arg.out_len;
+
+	if (ret == 1) {
+		sr->buf = arg.iovs[0].iov_base;
+		ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len,
+					&kmsg->msg.msg_iter);
+		if (unlikely(ret))
+			return ret;
+	} else {
+		iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE,
+				arg.iovs, ret, arg.out_len);
+	}
+
+	return 0;
+}
+
 int io_send(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
@@ -589,44 +637,9 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
 
 retry_bundle:
 	if (io_do_buffer_select(req)) {
-		struct buf_sel_arg arg = {
-			.iovs = &kmsg->fast_iov,
-			.max_len = min_not_zero(sr->len, INT_MAX),
-			.nr_iovs = 1,
-		};
-
-		if (kmsg->free_iov) {
-			arg.nr_iovs = kmsg->free_iov_nr;
-			arg.iovs = kmsg->free_iov;
-			arg.mode = KBUF_MODE_FREE;
-		}
-
-		if (!(sr->flags & IORING_RECVSEND_BUNDLE))
-			arg.nr_iovs = 1;
-		else
-			arg.mode |= KBUF_MODE_EXPAND;
-
-		ret = io_buffers_select(req, &arg, issue_flags);
-		if (unlikely(ret < 0))
+		ret = io_send_select_buffer(req, issue_flags, kmsg);
+		if (ret)
 			return ret;
-
-		if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
-			kmsg->free_iov_nr = ret;
-			kmsg->free_iov = arg.iovs;
-			req->flags |= REQ_F_NEED_CLEANUP;
-		}
-		sr->len = arg.out_len;
-
-		if (ret == 1) {
-			sr->buf = arg.iovs[0].iov_base;
-			ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len,
-						&kmsg->msg.msg_iter);
-			if (unlikely(ret))
-				return ret;
-		} else {
-			iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE,
-					arg.iovs, ret, arg.out_len);
-		}
 	}
 
 	/*

From 0d124578fed92cadeaca47d734da782beacdc1a7 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Tue, 28 Jan 2025 20:56:15 +0000
Subject: [PATCH 254/368] io_uring: remove !KASAN guards from cache free

Test setups (with KASAN) will avoid !KASAN sections, and so it's not
testing paths that would be exercised otherwise. That's bad as to be
sure that your code works you now have to specifically test both KASAN
and !KASAN configs.

Remove !CONFIG_KASAN guards from io_netmsg_cache_free() and
io_rw_cache_free(). The free functions should always be getting valid
entries, and even though for KASAN iovecs should already be cleared,
that's better than skipping the chunks completely.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Reviewed-by: Gabriel Krisman Bertazi <krisman@suse.de>
Link: https://lore.kernel.org/r/d6078a51c7137a243f9d00849bc3daa660873209.1738087204.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/net.c | 2 --
 io_uring/rw.c  | 2 --
 2 files changed, 4 deletions(-)

diff --git a/io_uring/net.c b/io_uring/net.c
index 4d21f7bd2149e..d89c39f853e39 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -1813,10 +1813,8 @@ void io_netmsg_cache_free(const void *entry)
 {
 	struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry;
 
-#if !defined(CONFIG_KASAN)
 	if (kmsg->free_iov)
 		io_netmsg_iovec_free(kmsg);
-#endif
 	kfree(kmsg);
 }
 #endif
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 991ecfbea88e3..c496f195aae2b 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -1309,9 +1309,7 @@ void io_rw_cache_free(const void *entry)
 {
 	struct io_async_rw *rw = (struct io_async_rw *) entry;
 
-#if !defined(CONFIG_KASAN)
 	if (rw->free_iovec)
 		io_rw_iovec_free(rw);
-#endif
 	kfree(rw);
 }

From d1fdab8c06791945d9454fb430951533eba9e175 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Tue, 28 Jan 2025 20:56:16 +0000
Subject: [PATCH 255/368] io_uring/rw: simplify io_rw_recycle()

Instead of freeing iovecs in case of IO_URING_F_UNLOCKED in
io_rw_recycle(), leave it be and rely on the core io_uring code to
call io_readv_writev_cleanup() later. This way the iovec will get
recycled and we can clean up io_rw_recycle() and kill
io_rw_iovec_free().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Reviewed-by: Gabriel Krisman Bertazi <krisman@suse.de>
Link: https://lore.kernel.org/r/14f83b112eb40078bea18e15d77a4f99fc981a44.1738087204.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/rw.c | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/io_uring/rw.c b/io_uring/rw.c
index c496f195aae2b..7aa1e4c9f64a3 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -146,23 +146,13 @@ static inline int io_import_iovec(int rw, struct io_kiocb *req,
 	return 0;
 }
 
-static void io_rw_iovec_free(struct io_async_rw *rw)
-{
-	if (rw->free_iovec) {
-		kfree(rw->free_iovec);
-		rw->free_iov_nr = 0;
-		rw->free_iovec = NULL;
-	}
-}
-
 static void io_rw_recycle(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_async_rw *rw = req->async_data;
 
-	if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) {
-		io_rw_iovec_free(rw);
+	if (unlikely(issue_flags & IO_URING_F_UNLOCKED))
 		return;
-	}
+
 	io_alloc_cache_kasan(&rw->free_iovec, &rw->free_iov_nr);
 	if (io_alloc_cache_put(&req->ctx->rw_cache, rw)) {
 		req->async_data = NULL;
@@ -1310,6 +1300,6 @@ void io_rw_cache_free(const void *entry)
 	struct io_async_rw *rw = (struct io_async_rw *) entry;
 
 	if (rw->free_iovec)
-		io_rw_iovec_free(rw);
+		kfree(rw->free_iovec);
 	kfree(rw);
 }

From 97274527e8dc709bbb4c7cb44279a12d085da9ef Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Mon, 27 Jan 2025 17:27:28 +0100
Subject: [PATCH 256/368] rtc: pcf2127: add BSM support

The pcf2127 encodes BSM, BLD and power fail detection in the same set of
bits so it is necessary to do some calculation when changing BSM to keep
the rest of the configuration as-is. However, when BSM is disabled, there
is no configuration with BLD enabled so this will be lost when coming back
to a mode with BSM enabled.

Link: https://lore.kernel.org/r/20250127162728.86234-1-alexandre.belloni@bootlin.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-pcf2127.c | 82 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)

diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c
index 9c04c4e1a49c3..31c7dca8f4692 100644
--- a/drivers/rtc/rtc-pcf2127.c
+++ b/drivers/rtc/rtc-pcf2127.c
@@ -20,6 +20,7 @@
 #include <linux/i2c.h>
 #include <linux/spi/spi.h>
 #include <linux/bcd.h>
+#include <linux/bitfield.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
 #include <linux/module.h>
@@ -48,6 +49,7 @@
 #define PCF2127_BIT_CTRL3_BLF			BIT(2)
 #define PCF2127_BIT_CTRL3_BF			BIT(3)
 #define PCF2127_BIT_CTRL3_BTSE			BIT(4)
+#define PCF2127_CTRL3_PM			GENMASK(7, 5)
 /* Time and date registers */
 #define PCF2127_REG_TIME_BASE		0x03
 #define PCF2127_BIT_SC_OSF			BIT(7)
@@ -331,6 +333,84 @@ static int pcf2127_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	return 0;
 }
 
+static int pcf2127_param_get(struct device *dev, struct rtc_param *param)
+{
+	struct pcf2127 *pcf2127 = dev_get_drvdata(dev);
+	u32 value;
+	int ret;
+
+	switch (param->param) {
+	case RTC_PARAM_BACKUP_SWITCH_MODE:
+		ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL3, &value);
+		if (ret < 0)
+			return ret;
+
+		value = FIELD_GET(PCF2127_CTRL3_PM, value);
+
+		if (value < 0x3)
+			param->uvalue = RTC_BSM_LEVEL;
+		else if (value < 0x6)
+			param->uvalue = RTC_BSM_DIRECT;
+		else
+			param->uvalue = RTC_BSM_DISABLED;
+
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int pcf2127_param_set(struct device *dev, struct rtc_param *param)
+{
+	struct pcf2127 *pcf2127 = dev_get_drvdata(dev);
+	u8 mode = 0;
+	u32 value;
+	int ret;
+
+	switch (param->param) {
+	case RTC_PARAM_BACKUP_SWITCH_MODE:
+		ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL3, &value);
+		if (ret < 0)
+			return ret;
+
+		value = FIELD_GET(PCF2127_CTRL3_PM, value);
+
+		if (value > 5)
+			value -= 5;
+		else if (value > 2)
+			value -= 3;
+
+		switch (param->uvalue) {
+		case RTC_BSM_LEVEL:
+			break;
+		case RTC_BSM_DIRECT:
+			mode = 3;
+			break;
+		case RTC_BSM_DISABLED:
+			if (value == 0)
+				value = 1;
+			mode = 5;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		return regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL3,
+					  PCF2127_CTRL3_PM,
+					  FIELD_PREP(PCF2127_CTRL3_PM, mode + value));
+
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int pcf2127_rtc_ioctl(struct device *dev,
 				unsigned int cmd, unsigned long arg)
 {
@@ -741,6 +821,8 @@ static const struct rtc_class_ops pcf2127_rtc_ops = {
 	.read_alarm       = pcf2127_rtc_read_alarm,
 	.set_alarm        = pcf2127_rtc_set_alarm,
 	.alarm_irq_enable = pcf2127_rtc_alarm_irq_enable,
+	.param_get        = pcf2127_param_get,
+	.param_set        = pcf2127_param_set,
 };
 
 /* sysfs interface */

From b865a8404642279e53644fc7288d172afd6a170e Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 28 Jan 2025 13:21:23 +0530
Subject: [PATCH 257/368] cpufreq: airoha: Depends on OF

The Airoha cpufreq depends on OF and must be marked as such. With the
kernel compiled without OF support, we get following warning:

drivers/cpufreq/airoha-cpufreq.c:109:34: warning: 'airoha_cpufreq_match_list' defined but not used [-Wunused-const-variable=]
    109 | static const struct of_device_id airoha_cpufreq_match_list[] __initconst = {
        |                                  ^~~~~~~~~~~~~~~~~~~~~~~~~

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202501251941.0fXlcd1D-lkp@intel.com/
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Link: https://patch.msgid.link/455e18c947bd9529701a2f1c796f0f934d1354d7.1738050679.git.viresh.kumar@linaro.org
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/Kconfig.arm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 704e84d006390..0ee5c691fb36b 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -17,7 +17,7 @@ config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM
 
 config ARM_AIROHA_SOC_CPUFREQ
 	tristate "Airoha EN7581 SoC CPUFreq support"
-	depends on ARCH_AIROHA || COMPILE_TEST
+	depends on (ARCH_AIROHA && OF) || COMPILE_TEST
 	select PM_OPP
 	default ARCH_AIROHA
 	help

From 3775fc538f535a7c5adaf11990c7932a0bd1f9eb Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 28 Jan 2025 20:24:41 +0100
Subject: [PATCH 258/368] PM: sleep: core: Synchronize runtime PM status of
 parents and children

Commit 6e176bf8d461 ("PM: sleep: core: Do not skip callbacks in the
resume phase") overlooked the case in which the parent of a device with
DPM_FLAG_SMART_SUSPEND set did not use that flag and could be runtime-
suspended before a transition into a system-wide sleep state.  In that
case, if the child is resumed during the subsequent transition from
that state into the working state, its runtime PM status will be set to
RPM_ACTIVE, but the runtime PM status of the parent will not be updated
accordingly, even though the parent will be resumed too, because of the
dev_pm_skip_suspend() check in device_resume_noirq().

Address this problem by tracking the need to set the runtime PM status
to RPM_ACTIVE during system-wide resume transitions for devices with
DPM_FLAG_SMART_SUSPEND set and all of the devices depended on by them.

Fixes: 6e176bf8d461 ("PM: sleep: core: Do not skip callbacks in the resume phase")
Closes: https://lore.kernel.org/linux-pm/Z30p2Etwf3F2AUvD@hovoldconsulting.com/
Reported-by: Johan Hovold <johan@kernel.org>
Tested-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Johan Hovold <johan+linaro@kernel.org>
Tested-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://patch.msgid.link/12619233.O9o76ZdvQC@rjwysocki.net
---
 drivers/base/power/main.c | 29 ++++++++++++++++++++---------
 include/linux/pm.h        |  1 +
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index cbc9a7a75def7..d497d448e4b2a 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -656,13 +656,15 @@ static void device_resume_noirq(struct device *dev, pm_message_t state, bool asy
 	 * so change its status accordingly.
 	 *
 	 * Otherwise, the device is going to be resumed, so set its PM-runtime
-	 * status to "active", but do that only if DPM_FLAG_SMART_SUSPEND is set
-	 * to avoid confusing drivers that don't use it.
+	 * status to "active" unless its power.set_active flag is clear, in
+	 * which case it is not necessary to update its PM-runtime status.
 	 */
-	if (skip_resume)
+	if (skip_resume) {
 		pm_runtime_set_suspended(dev);
-	else if (dev_pm_skip_suspend(dev))
+	} else if (dev->power.set_active) {
 		pm_runtime_set_active(dev);
+		dev->power.set_active = false;
+	}
 
 	if (dev->pm_domain) {
 		info = "noirq power domain ";
@@ -1189,18 +1191,24 @@ static pm_message_t resume_event(pm_message_t sleep_state)
 	return PMSG_ON;
 }
 
-static void dpm_superior_set_must_resume(struct device *dev)
+static void dpm_superior_set_must_resume(struct device *dev, bool set_active)
 {
 	struct device_link *link;
 	int idx;
 
-	if (dev->parent)
+	if (dev->parent) {
 		dev->parent->power.must_resume = true;
+		if (set_active)
+			dev->parent->power.set_active = true;
+	}
 
 	idx = device_links_read_lock();
 
-	list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node)
+	list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) {
 		link->supplier->power.must_resume = true;
+		if (set_active)
+			link->supplier->power.set_active = true;
+	}
 
 	device_links_read_unlock(idx);
 }
@@ -1278,8 +1286,11 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state, bool asy
 	      dev->power.may_skip_resume))
 		dev->power.must_resume = true;
 
-	if (dev->power.must_resume)
-		dpm_superior_set_must_resume(dev);
+	if (dev->power.must_resume) {
+		dev->power.set_active = dev->power.set_active ||
+			dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND);
+		dpm_superior_set_must_resume(dev, dev->power.set_active);
+	}
 
 Complete:
 	complete_all(&dev->power.completion);
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 0627a795892be..0d2597a76dfcc 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -679,6 +679,7 @@ struct dev_pm_info {
 	bool			no_pm_callbacks:1;	/* Owned by the PM core */
 	bool			async_in_progress:1;	/* Owned by the PM core */
 	bool			must_resume:1;		/* Owned by the PM core */
+	bool			set_active:1;		/* Owned by the PM core */
 	bool			may_skip_resume:1;	/* Set by subsystems */
 #else
 	bool			should_wakeup:1;

From fe6628608627424fb4a6d4c8d2235822457c5d9c Mon Sep 17 00:00:00 2001
From: Nilay Shroff <nilay@linux.ibm.com>
Date: Tue, 28 Jan 2025 20:04:13 +0530
Subject: [PATCH 259/368] block: get rid of request queue ->sysfs_dir_lock

The request queue uses ->sysfs_dir_lock for protecting the addition/
deletion of kobject entries under sysfs while we register/unregister
blk-mq. However kobject addition/deletion is already protected with
kernfs/sysfs internal synchronization primitives. So use of q->sysfs_
dir_lock seems redundant.

Moreover, q->sysfs_dir_lock is also used at few other callsites along
with q->sysfs_lock for protecting the addition/deletion of kojects.
One such example is when we register with sysfs a set of independent
access ranges for a disk. Here as well we could get rid off q->sysfs_
dir_lock and only use q->sysfs_lock.

The only variable which q->sysfs_dir_lock appears to protect is q->
mq_sysfs_init_done which is set/unset while registering/unregistering
blk-mq with sysfs. But use of q->mq_sysfs_init_done could be easily
replaced using queue registered bit QUEUE_FLAG_REGISTERED.

So with this patch we remove q->sysfs_dir_lock from each callsite
and replace q->mq_sysfs_init_done using QUEUE_FLAG_REGISTERED.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20250128143436.874357-2-nilay@linux.ibm.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       |  1 -
 block/blk-ia-ranges.c  |  4 ----
 block/blk-mq-sysfs.c   | 23 +++++------------------
 block/blk-sysfs.c      |  5 -----
 include/linux/blkdev.h |  3 ---
 5 files changed, 5 insertions(+), 31 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 32fb28a6372cd..d6c4fa3943b5c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -430,7 +430,6 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id)
 	refcount_set(&q->refs, 1);
 	mutex_init(&q->debugfs_mutex);
 	mutex_init(&q->sysfs_lock);
-	mutex_init(&q->sysfs_dir_lock);
 	mutex_init(&q->limits_lock);
 	mutex_init(&q->rq_qos_mutex);
 	spin_lock_init(&q->queue_lock);
diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c
index c9eb4241e0483..d479f5481b66a 100644
--- a/block/blk-ia-ranges.c
+++ b/block/blk-ia-ranges.c
@@ -111,7 +111,6 @@ int disk_register_independent_access_ranges(struct gendisk *disk)
 	struct request_queue *q = disk->queue;
 	int i, ret;
 
-	lockdep_assert_held(&q->sysfs_dir_lock);
 	lockdep_assert_held(&q->sysfs_lock);
 
 	if (!iars)
@@ -155,7 +154,6 @@ void disk_unregister_independent_access_ranges(struct gendisk *disk)
 	struct blk_independent_access_ranges *iars = disk->ia_ranges;
 	int i;
 
-	lockdep_assert_held(&q->sysfs_dir_lock);
 	lockdep_assert_held(&q->sysfs_lock);
 
 	if (!iars)
@@ -289,7 +287,6 @@ void disk_set_independent_access_ranges(struct gendisk *disk,
 {
 	struct request_queue *q = disk->queue;
 
-	mutex_lock(&q->sysfs_dir_lock);
 	mutex_lock(&q->sysfs_lock);
 	if (iars && !disk_check_ia_ranges(disk, iars)) {
 		kfree(iars);
@@ -313,6 +310,5 @@ void disk_set_independent_access_ranges(struct gendisk *disk,
 		disk_register_independent_access_ranges(disk);
 unlock:
 	mutex_unlock(&q->sysfs_lock);
-	mutex_unlock(&q->sysfs_dir_lock);
 }
 EXPORT_SYMBOL_GPL(disk_set_independent_access_ranges);
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 156e9bb07abf1..6113328abd708 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -223,8 +223,6 @@ int blk_mq_sysfs_register(struct gendisk *disk)
 	unsigned long i, j;
 	int ret;
 
-	lockdep_assert_held(&q->sysfs_dir_lock);
-
 	ret = kobject_add(q->mq_kobj, &disk_to_dev(disk)->kobj, "mq");
 	if (ret < 0)
 		goto out;
@@ -237,7 +235,6 @@ int blk_mq_sysfs_register(struct gendisk *disk)
 			goto unreg;
 	}
 
-	q->mq_sysfs_init_done = true;
 
 out:
 	return ret;
@@ -259,15 +256,12 @@ void blk_mq_sysfs_unregister(struct gendisk *disk)
 	struct blk_mq_hw_ctx *hctx;
 	unsigned long i;
 
-	lockdep_assert_held(&q->sysfs_dir_lock);
 
 	queue_for_each_hw_ctx(q, hctx, i)
 		blk_mq_unregister_hctx(hctx);
 
 	kobject_uevent(q->mq_kobj, KOBJ_REMOVE);
 	kobject_del(q->mq_kobj);
-
-	q->mq_sysfs_init_done = false;
 }
 
 void blk_mq_sysfs_unregister_hctxs(struct request_queue *q)
@@ -275,15 +269,11 @@ void blk_mq_sysfs_unregister_hctxs(struct request_queue *q)
 	struct blk_mq_hw_ctx *hctx;
 	unsigned long i;
 
-	mutex_lock(&q->sysfs_dir_lock);
-	if (!q->mq_sysfs_init_done)
-		goto unlock;
+	if (!blk_queue_registered(q))
+		return;
 
 	queue_for_each_hw_ctx(q, hctx, i)
 		blk_mq_unregister_hctx(hctx);
-
-unlock:
-	mutex_unlock(&q->sysfs_dir_lock);
 }
 
 int blk_mq_sysfs_register_hctxs(struct request_queue *q)
@@ -292,9 +282,8 @@ int blk_mq_sysfs_register_hctxs(struct request_queue *q)
 	unsigned long i;
 	int ret = 0;
 
-	mutex_lock(&q->sysfs_dir_lock);
-	if (!q->mq_sysfs_init_done)
-		goto unlock;
+	if (!blk_queue_registered(q))
+		goto out;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		ret = blk_mq_register_hctx(hctx);
@@ -302,8 +291,6 @@ int blk_mq_sysfs_register_hctxs(struct request_queue *q)
 			break;
 	}
 
-unlock:
-	mutex_unlock(&q->sysfs_dir_lock);
-
+out:
 	return ret;
 }
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index e09b455874bfd..7b970e6765e72 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -764,7 +764,6 @@ int blk_register_queue(struct gendisk *disk)
 	struct request_queue *q = disk->queue;
 	int ret;
 
-	mutex_lock(&q->sysfs_dir_lock);
 	kobject_init(&disk->queue_kobj, &blk_queue_ktype);
 	ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue");
 	if (ret < 0)
@@ -805,7 +804,6 @@ int blk_register_queue(struct gendisk *disk)
 	if (q->elevator)
 		kobject_uevent(&q->elevator->kobj, KOBJ_ADD);
 	mutex_unlock(&q->sysfs_lock);
-	mutex_unlock(&q->sysfs_dir_lock);
 
 	/*
 	 * SCSI probing may synchronously create and destroy a lot of
@@ -830,7 +828,6 @@ int blk_register_queue(struct gendisk *disk)
 	mutex_unlock(&q->sysfs_lock);
 out_put_queue_kobj:
 	kobject_put(&disk->queue_kobj);
-	mutex_unlock(&q->sysfs_dir_lock);
 	return ret;
 }
 
@@ -861,7 +858,6 @@ void blk_unregister_queue(struct gendisk *disk)
 	blk_queue_flag_clear(QUEUE_FLAG_REGISTERED, q);
 	mutex_unlock(&q->sysfs_lock);
 
-	mutex_lock(&q->sysfs_dir_lock);
 	/*
 	 * Remove the sysfs attributes before unregistering the queue data
 	 * structures that can be modified through sysfs.
@@ -878,7 +874,6 @@ void blk_unregister_queue(struct gendisk *disk)
 	/* Now that we've deleted all child objects, we can delete the queue. */
 	kobject_uevent(&disk->queue_kobj, KOBJ_REMOVE);
 	kobject_del(&disk->queue_kobj);
-	mutex_unlock(&q->sysfs_dir_lock);
 
 	blk_debugfs_remove(disk);
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 76f0a4e7c2e5d..248416ecd01c9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -561,7 +561,6 @@ struct request_queue {
 	struct list_head	flush_list;
 
 	struct mutex		sysfs_lock;
-	struct mutex		sysfs_dir_lock;
 	struct mutex		limits_lock;
 
 	/*
@@ -605,8 +604,6 @@ struct request_queue {
 	 * Serializes all debugfs metadata operations using the above dentries.
 	 */
 	struct mutex		debugfs_mutex;
-
-	bool			mq_sysfs_init_done;
 };
 
 /* Keep blk_queue_flag_name[] in sync with the definitions below */

From 14ef49657ff3b7156952b2eadcf2e5bafd735795 Mon Sep 17 00:00:00 2001
From: Nilay Shroff <nilay@linux.ibm.com>
Date: Tue, 28 Jan 2025 20:04:14 +0530
Subject: [PATCH 260/368] block: fix nr_hw_queue update racing with disk
 addition/removal

The nr_hw_queue update could potentially race with disk addtion/removal
while registering/unregistering hctx sysfs files. The __blk_mq_update_
nr_hw_queues() runs with q->tag_list_lock held and so to avoid it racing
with disk addition/removal we should acquire q->tag_list_lock while
registering/unregistering hctx sysfs files.

With this patch, blk_mq_sysfs_register() (called during disk addition)
and blk_mq_sysfs_unregister() (called during disk removal) now runs
with q->tag_list_lock held so that it avoids racing with __blk_mq_update
_nr_hw_queues().

Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20250128143436.874357-3-nilay@linux.ibm.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sysfs.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 6113328abd708..3feeeccf8a992 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -225,25 +225,25 @@ int blk_mq_sysfs_register(struct gendisk *disk)
 
 	ret = kobject_add(q->mq_kobj, &disk_to_dev(disk)->kobj, "mq");
 	if (ret < 0)
-		goto out;
+		return ret;
 
 	kobject_uevent(q->mq_kobj, KOBJ_ADD);
 
+	mutex_lock(&q->tag_set->tag_list_lock);
 	queue_for_each_hw_ctx(q, hctx, i) {
 		ret = blk_mq_register_hctx(hctx);
 		if (ret)
-			goto unreg;
+			goto out_unreg;
 	}
+	mutex_unlock(&q->tag_set->tag_list_lock);
+	return 0;
 
-
-out:
-	return ret;
-
-unreg:
+out_unreg:
 	queue_for_each_hw_ctx(q, hctx, j) {
 		if (j < i)
 			blk_mq_unregister_hctx(hctx);
 	}
+	mutex_unlock(&q->tag_set->tag_list_lock);
 
 	kobject_uevent(q->mq_kobj, KOBJ_REMOVE);
 	kobject_del(q->mq_kobj);
@@ -256,9 +256,10 @@ void blk_mq_sysfs_unregister(struct gendisk *disk)
 	struct blk_mq_hw_ctx *hctx;
 	unsigned long i;
 
-
+	mutex_lock(&q->tag_set->tag_list_lock);
 	queue_for_each_hw_ctx(q, hctx, i)
 		blk_mq_unregister_hctx(hctx);
+	mutex_unlock(&q->tag_set->tag_list_lock);
 
 	kobject_uevent(q->mq_kobj, KOBJ_REMOVE);
 	kobject_del(q->mq_kobj);

From 438e2116d7bd3095184d1997b367380c4f465164 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Mon, 14 Oct 2024 13:00:48 +0200
Subject: [PATCH 261/368] cifs: Change translation of STATUS_PRIVILEGE_NOT_HELD
 to -EPERM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

STATUS_PRIVILEGE_NOT_HELD indicates that user does not have privilege to
issue some operation, for example to create symlink.

Currently STATUS_PRIVILEGE_NOT_HELD is translated to -EIO. Change it to
-EPERM which better describe this error code.

Note that there is no ERR* code usable in ntstatus_to_dos_map[] table which
can be used to -EPERM translation, so do explicit translation in
map_smb_to_linux_error() function.

Signed-off-by: Pali Rohár <pali@kernel.org>
Acked-by: Tom Talpey <tom@talpey.com>
Acked-by: Paulo Alcantara (Red Hat) <pc@manguebit.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/netmisc.c      | 3 ++-
 fs/smb/client/smb2maperror.c | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/smb/client/netmisc.c b/fs/smb/client/netmisc.c
index 9dc16211e7a13..9ec20601cee2e 100644
--- a/fs/smb/client/netmisc.c
+++ b/fs/smb/client/netmisc.c
@@ -313,7 +313,6 @@ static const struct {
 	ERRDOS, 2215, NT_STATUS_NO_LOGON_SERVERS}, {
 	ERRHRD, ERRgeneral, NT_STATUS_NO_SUCH_LOGON_SESSION}, {
 	ERRHRD, ERRgeneral, NT_STATUS_NO_SUCH_PRIVILEGE}, {
-	ERRDOS, ERRnoaccess, NT_STATUS_PRIVILEGE_NOT_HELD}, {
 	ERRHRD, ERRgeneral, NT_STATUS_INVALID_ACCOUNT_NAME}, {
 	ERRHRD, ERRgeneral, NT_STATUS_USER_EXISTS},
 /*	{ This NT error code was 'sqashed'
@@ -876,6 +875,8 @@ map_smb_to_linux_error(char *buf, bool logErr)
 		__u32 err = le32_to_cpu(smb->Status.CifsError);
 		if (err == (NT_STATUS_NOT_A_REPARSE_POINT))
 			rc = -ENODATA;
+		else if (err == (NT_STATUS_PRIVILEGE_NOT_HELD))
+			rc = -EPERM;
 	}
 
 	cifs_dbg(FYI, "Mapping smb error code 0x%x to POSIX err %d\n",
diff --git a/fs/smb/client/smb2maperror.c b/fs/smb/client/smb2maperror.c
index 612e7b5181b6c..12c2b868789fd 100644
--- a/fs/smb/client/smb2maperror.c
+++ b/fs/smb/client/smb2maperror.c
@@ -380,7 +380,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
 	{STATUS_NO_LOGON_SERVERS, -EIO, "STATUS_NO_LOGON_SERVERS"},
 	{STATUS_NO_SUCH_LOGON_SESSION, -EIO, "STATUS_NO_SUCH_LOGON_SESSION"},
 	{STATUS_NO_SUCH_PRIVILEGE, -EIO, "STATUS_NO_SUCH_PRIVILEGE"},
-	{STATUS_PRIVILEGE_NOT_HELD, -EIO, "STATUS_PRIVILEGE_NOT_HELD"},
+	{STATUS_PRIVILEGE_NOT_HELD, -EPERM, "STATUS_PRIVILEGE_NOT_HELD"},
 	{STATUS_INVALID_ACCOUNT_NAME, -EIO, "STATUS_INVALID_ACCOUNT_NAME"},
 	{STATUS_USER_EXISTS, -EIO, "STATUS_USER_EXISTS"},
 	{STATUS_NO_SUCH_USER, -EIO, "STATUS_NO_SUCH_USER"},

From ef201e8759d20bf82b5943101147072de12bc524 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Thu, 26 Dec 2024 15:20:39 +0100
Subject: [PATCH 262/368] cifs: Validate EAs for WSL reparse points
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Major and minor numbers for char and block devices are mandatory for stat.
So check that the WSL EA $LXDEV is present for WSL CHR and BLK reparse
points.

WSL reparse point tag determinate type of the file. But file type is
present also in the WSL EA $LXMOD. So check that both file types are same.

Fixes: 78e26bec4d6d ("smb: client: parse uid, gid, mode and dev from WSL reparse points")
Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/reparse.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c
index d88b41133e00c..b387dfbaf16b0 100644
--- a/fs/smb/client/reparse.c
+++ b/fs/smb/client/reparse.c
@@ -747,11 +747,12 @@ int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb,
 	return parse_reparse_point(buf, plen, cifs_sb, full_path, true, data);
 }
 
-static void wsl_to_fattr(struct cifs_open_info_data *data,
+static bool wsl_to_fattr(struct cifs_open_info_data *data,
 			 struct cifs_sb_info *cifs_sb,
 			 u32 tag, struct cifs_fattr *fattr)
 {
 	struct smb2_file_full_ea_info *ea;
+	bool have_xattr_dev = false;
 	u32 next = 0;
 
 	switch (tag) {
@@ -794,13 +795,24 @@ static void wsl_to_fattr(struct cifs_open_info_data *data,
 			fattr->cf_uid = wsl_make_kuid(cifs_sb, v);
 		else if (!strncmp(name, SMB2_WSL_XATTR_GID, nlen))
 			fattr->cf_gid = wsl_make_kgid(cifs_sb, v);
-		else if (!strncmp(name, SMB2_WSL_XATTR_MODE, nlen))
+		else if (!strncmp(name, SMB2_WSL_XATTR_MODE, nlen)) {
+			/* File type in reparse point tag and in xattr mode must match. */
+			if (S_DT(fattr->cf_mode) != S_DT(le32_to_cpu(*(__le32 *)v)))
+				return false;
 			fattr->cf_mode = (umode_t)le32_to_cpu(*(__le32 *)v);
-		else if (!strncmp(name, SMB2_WSL_XATTR_DEV, nlen))
+		} else if (!strncmp(name, SMB2_WSL_XATTR_DEV, nlen)) {
 			fattr->cf_rdev = reparse_mkdev(v);
+			have_xattr_dev = true;
+		}
 	} while (next);
 out:
+
+	/* Major and minor numbers for char and block devices are mandatory. */
+	if (!have_xattr_dev && (tag == IO_REPARSE_TAG_LX_CHR || tag == IO_REPARSE_TAG_LX_BLK))
+		return false;
+
 	fattr->cf_dtype = S_DT(fattr->cf_mode);
+	return true;
 }
 
 static bool posix_reparse_to_fattr(struct cifs_sb_info *cifs_sb,
@@ -874,7 +886,9 @@ bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb,
 	case IO_REPARSE_TAG_AF_UNIX:
 	case IO_REPARSE_TAG_LX_CHR:
 	case IO_REPARSE_TAG_LX_BLK:
-		wsl_to_fattr(data, cifs_sb, tag, fattr);
+		ok = wsl_to_fattr(data, cifs_sb, tag, fattr);
+		if (!ok)
+			return false;
 		break;
 	case IO_REPARSE_TAG_NFS:
 		ok = posix_reparse_to_fattr(cifs_sb, fattr, data);

From 25f6184e24b3991eae977a29ecf27d537cc930b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Wed, 25 Dec 2024 14:00:39 +0100
Subject: [PATCH 263/368] cifs: Remove intermediate object of failed create SFU
 call
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Check if the server honored ATTR_SYSTEM flag by CREATE_OPTION_SPECIAL
option. If not then server does not support ATTR_SYSTEM and newly
created file is not SFU compatible, which means that the call failed.

If CREATE was successful but either setting ATTR_SYSTEM failed or
writing type/data information failed then remove the intermediate
object created by CREATE. Otherwise intermediate empty object stay
on the server.

This ensures that if the creating of SFU files with system attribute is
unsupported by the server then no empty file stay on the server as a result
of unsupported operation.

This is for example case with Samba server and Linux tmpfs storage without
enabled xattr support (where Samba stores ATTR_SYSTEM bit).

Cc: stable@vger.kernel.org
Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/smb2ops.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index d640dcabc305e..77309217dab45 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -5077,6 +5077,7 @@ int __cifs_sfu_make_node(unsigned int xid, struct inode *inode,
 {
 	struct TCP_Server_Info *server = tcon->ses->server;
 	struct cifs_open_parms oparms;
+	struct cifs_open_info_data idata;
 	struct cifs_io_parms io_parms = {};
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 	struct cifs_fid fid;
@@ -5146,10 +5147,20 @@ int __cifs_sfu_make_node(unsigned int xid, struct inode *inode,
 			     CREATE_OPTION_SPECIAL, ACL_NO_MODE);
 	oparms.fid = &fid;
 
-	rc = server->ops->open(xid, &oparms, &oplock, NULL);
+	rc = server->ops->open(xid, &oparms, &oplock, &idata);
 	if (rc)
 		goto out;
 
+	/*
+	 * Check if the server honored ATTR_SYSTEM flag by CREATE_OPTION_SPECIAL
+	 * option. If not then server does not support ATTR_SYSTEM and newly
+	 * created file is not SFU compatible, which means that the call failed.
+	 */
+	if (!(le32_to_cpu(idata.fi.Attributes) & ATTR_SYSTEM)) {
+		rc = -EOPNOTSUPP;
+		goto out_close;
+	}
+
 	if (type_len + data_len > 0) {
 		io_parms.pid = current->tgid;
 		io_parms.tcon = tcon;
@@ -5164,8 +5175,18 @@ int __cifs_sfu_make_node(unsigned int xid, struct inode *inode,
 					     iov, ARRAY_SIZE(iov)-1);
 	}
 
+out_close:
 	server->ops->close(xid, tcon, &fid);
 
+	/*
+	 * If CREATE was successful but either setting ATTR_SYSTEM failed or
+	 * writing type/data information failed then remove the intermediate
+	 * object created by CREATE. Otherwise intermediate empty object stay
+	 * on the server.
+	 */
+	if (rc)
+		server->ops->unlink(xid, tcon, full_path, cifs_sb, NULL);
+
 out:
 	kfree(symname_utf16);
 	return rc;

From 8b19dfb34d17e77a0809d433cc128b779282131b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Mon, 14 Oct 2024 13:43:23 +0200
Subject: [PATCH 264/368] cifs: Fix getting and setting SACLs over SMB1
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SMB1 callback get_cifs_acl_by_fid() currently ignores its last argument and
therefore ignores request for SACL_SECINFO. Fix this issue by correctly
propagating info argument from get_cifs_acl() and get_cifs_acl_by_fid() to
CIFSSMBGetCIFSACL() function and pass SACL_SECINFO when requested.

For accessing SACLs it is needed to open object with SYSTEM_SECURITY
access. Pass this flag when trying to get or set SACLs.

Same logic is in the SMB2+ code path.

This change fixes getting and setting of "system.cifs_ntsd_full" and
"system.smb3_ntsd_full" xattrs over SMB1 as currently it silentely ignored
SACL part of passed xattr buffer.

Fixes: 3970acf7ddb9 ("SMB3: Add support for getting and setting SACLs")
Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/cifsacl.c   | 25 +++++++++++++++----------
 fs/smb/client/cifsproto.h |  2 +-
 fs/smb/client/cifssmb.c   |  4 ++--
 3 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c
index ba79aa2107cc9..699a3f76d0834 100644
--- a/fs/smb/client/cifsacl.c
+++ b/fs/smb/client/cifsacl.c
@@ -1395,7 +1395,7 @@ static int build_sec_desc(struct smb_ntsd *pntsd, struct smb_ntsd *pnntsd,
 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
 struct smb_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb,
 				      const struct cifs_fid *cifsfid, u32 *pacllen,
-				      u32 __maybe_unused unused)
+				      u32 info)
 {
 	struct smb_ntsd *pntsd = NULL;
 	unsigned int xid;
@@ -1407,7 +1407,7 @@ struct smb_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb,
 
 	xid = get_xid();
 	rc = CIFSSMBGetCIFSACL(xid, tlink_tcon(tlink), cifsfid->netfid, &pntsd,
-				pacllen);
+				pacllen, info);
 	free_xid(xid);
 
 	cifs_put_tlink(tlink);
@@ -1419,7 +1419,7 @@ struct smb_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb,
 }
 
 static struct smb_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb,
-		const char *path, u32 *pacllen)
+		const char *path, u32 *pacllen, u32 info)
 {
 	struct smb_ntsd *pntsd = NULL;
 	int oplock = 0;
@@ -1446,9 +1446,12 @@ static struct smb_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb,
 		.fid = &fid,
 	};
 
+	if (info & SACL_SECINFO)
+		oparms.desired_access |= SYSTEM_SECURITY;
+
 	rc = CIFS_open(xid, &oparms, &oplock, NULL);
 	if (!rc) {
-		rc = CIFSSMBGetCIFSACL(xid, tcon, fid.netfid, &pntsd, pacllen);
+		rc = CIFSSMBGetCIFSACL(xid, tcon, fid.netfid, &pntsd, pacllen, info);
 		CIFSSMBClose(xid, tcon, fid.netfid);
 	}
 
@@ -1472,7 +1475,7 @@ struct smb_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb,
 	if (inode)
 		open_file = find_readable_file(CIFS_I(inode), true);
 	if (!open_file)
-		return get_cifs_acl_by_path(cifs_sb, path, pacllen);
+		return get_cifs_acl_by_path(cifs_sb, path, pacllen, info);
 
 	pntsd = get_cifs_acl_by_fid(cifs_sb, &open_file->fid, pacllen, info);
 	cifsFileInfo_put(open_file);
@@ -1485,7 +1488,7 @@ int set_cifs_acl(struct smb_ntsd *pnntsd, __u32 acllen,
 {
 	int oplock = 0;
 	unsigned int xid;
-	int rc, access_flags;
+	int rc, access_flags = 0;
 	struct cifs_tcon *tcon;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 	struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
@@ -1498,10 +1501,12 @@ int set_cifs_acl(struct smb_ntsd *pnntsd, __u32 acllen,
 	tcon = tlink_tcon(tlink);
 	xid = get_xid();
 
-	if (aclflag == CIFS_ACL_OWNER || aclflag == CIFS_ACL_GROUP)
-		access_flags = WRITE_OWNER;
-	else
-		access_flags = WRITE_DAC;
+	if (aclflag & CIFS_ACL_OWNER || aclflag & CIFS_ACL_GROUP)
+		access_flags |= WRITE_OWNER;
+	if (aclflag & CIFS_ACL_SACL)
+		access_flags |= SYSTEM_SECURITY;
+	if (aclflag & CIFS_ACL_DACL)
+		access_flags |= WRITE_DAC;
 
 	oparms = (struct cifs_open_parms) {
 		.tcon = tcon,
diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h
index 223e5e231f428..fcc9da838b70f 100644
--- a/fs/smb/client/cifsproto.h
+++ b/fs/smb/client/cifsproto.h
@@ -557,7 +557,7 @@ extern int CIFSSMBSetEA(const unsigned int xid, struct cifs_tcon *tcon,
 		const struct nls_table *nls_codepage,
 		struct cifs_sb_info *cifs_sb);
 extern int CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon,
-			__u16 fid, struct smb_ntsd **acl_inf, __u32 *buflen);
+			__u16 fid, struct smb_ntsd **acl_inf, __u32 *buflen, __u32 info);
 extern int CIFSSMBSetCIFSACL(const unsigned int, struct cifs_tcon *, __u16,
 			struct smb_ntsd *pntsd, __u32 len, int aclflag);
 extern int cifs_do_get_acl(const unsigned int xid, struct cifs_tcon *tcon,
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 7f1cacc89dbb0..3feaa0f681699 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -3369,7 +3369,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
 /* Get Security Descriptor (by handle) from remote server for a file or dir */
 int
 CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid,
-		  struct smb_ntsd **acl_inf, __u32 *pbuflen)
+		  struct smb_ntsd **acl_inf, __u32 *pbuflen, __u32 info)
 {
 	int rc = 0;
 	int buf_type = 0;
@@ -3392,7 +3392,7 @@ CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid,
 	pSMB->MaxSetupCount = 0;
 	pSMB->Fid = fid; /* file handle always le */
 	pSMB->AclFlags = cpu_to_le32(CIFS_ACL_OWNER | CIFS_ACL_GROUP |
-				     CIFS_ACL_DACL);
+				     CIFS_ACL_DACL | info);
 	pSMB->ByteCount = cpu_to_le16(11); /* 3 bytes pad + 8 bytes parm */
 	inc_rfc1001_len(pSMB, 11);
 	iov[0].iov_base = (char *)pSMB;

From 24cf72976acee4b731c7c3ef2080e8535441fa3b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Sun, 6 Oct 2024 19:44:50 +0200
Subject: [PATCH 265/368] cifs: Remove unicode parameter from
 parse_reparse_point() function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This parameter is always true, so remove it and also remove dead code which
is never called (for all false code paths).

Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/cifsproto.h |  2 +-
 fs/smb/client/reparse.c   | 25 +++++++++++--------------
 fs/smb/client/smb1ops.c   |  2 +-
 fs/smb/client/smb2file.c  |  1 -
 fs/smb/client/smb2proto.h |  2 +-
 5 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h
index fcc9da838b70f..81680001944df 100644
--- a/fs/smb/client/cifsproto.h
+++ b/fs/smb/client/cifsproto.h
@@ -656,7 +656,7 @@ char *extract_sharename(const char *unc);
 int parse_reparse_point(struct reparse_data_buffer *buf,
 			u32 plen, struct cifs_sb_info *cifs_sb,
 			const char *full_path,
-			bool unicode, struct cifs_open_info_data *data);
+			struct cifs_open_info_data *data);
 int __cifs_sfu_make_node(unsigned int xid, struct inode *inode,
 			 struct dentry *dentry, struct cifs_tcon *tcon,
 			 const char *full_path, umode_t mode, dev_t dev,
diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c
index b387dfbaf16b0..0e47b8e097a09 100644
--- a/fs/smb/client/reparse.c
+++ b/fs/smb/client/reparse.c
@@ -536,7 +536,7 @@ static int parse_reparse_posix(struct reparse_posix_data *buf,
 }
 
 int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len,
-			      bool unicode, bool relative,
+			      bool relative,
 			      const char *full_path,
 			      struct cifs_sb_info *cifs_sb)
 {
@@ -547,26 +547,24 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len,
 	int rc;
 	int i;
 
-	/* Check that length it valid for unicode/non-unicode mode */
-	if (!len || (unicode && (len % 2))) {
+	/* Check that length it valid */
+	if (!len || (len % 2)) {
 		cifs_dbg(VFS, "srv returned malformed symlink buffer\n");
 		rc = -EIO;
 		goto out;
 	}
 
 	/*
-	 * Check that buffer does not contain UTF-16 null codepoint in unicode
-	 * mode or null byte in non-unicode mode because Linux cannot process
-	 * symlink with null byte.
+	 * Check that buffer does not contain UTF-16 null codepoint
+	 * because Linux cannot process symlink with null byte.
 	 */
-	if ((unicode && UniStrnlen((wchar_t *)buf, len/2) != len/2) ||
-	    (!unicode && strnlen(buf, len) != len)) {
+	if (UniStrnlen((wchar_t *)buf, len/2) != len/2) {
 		cifs_dbg(VFS, "srv returned null byte in native symlink target location\n");
 		rc = -EIO;
 		goto out;
 	}
 
-	smb_target = cifs_strndup_from_utf16(buf, len, unicode, cifs_sb->local_nls);
+	smb_target = cifs_strndup_from_utf16(buf, len, true, cifs_sb->local_nls);
 	if (!smb_target) {
 		rc = -ENOMEM;
 		goto out;
@@ -621,7 +619,7 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len,
 }
 
 static int parse_reparse_symlink(struct reparse_symlink_data_buffer *sym,
-				 u32 plen, bool unicode,
+				 u32 plen,
 				 struct cifs_sb_info *cifs_sb,
 				 const char *full_path,
 				 struct cifs_open_info_data *data)
@@ -641,7 +639,6 @@ static int parse_reparse_symlink(struct reparse_symlink_data_buffer *sym,
 	return smb2_parse_native_symlink(&data->symlink_target,
 					 sym->PathBuffer + offs,
 					 len,
-					 unicode,
 					 le32_to_cpu(sym->Flags) & SYMLINK_FLAG_RELATIVE,
 					 full_path,
 					 cifs_sb);
@@ -696,7 +693,7 @@ static int parse_reparse_wsl_symlink(struct reparse_wsl_symlink_data_buffer *buf
 int parse_reparse_point(struct reparse_data_buffer *buf,
 			u32 plen, struct cifs_sb_info *cifs_sb,
 			const char *full_path,
-			bool unicode, struct cifs_open_info_data *data)
+			struct cifs_open_info_data *data)
 {
 	struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
 
@@ -710,7 +707,7 @@ int parse_reparse_point(struct reparse_data_buffer *buf,
 	case IO_REPARSE_TAG_SYMLINK:
 		return parse_reparse_symlink(
 			(struct reparse_symlink_data_buffer *)buf,
-			plen, unicode, cifs_sb, full_path, data);
+			plen, cifs_sb, full_path, data);
 	case IO_REPARSE_TAG_LX_SYMLINK:
 		return parse_reparse_wsl_symlink(
 			(struct reparse_wsl_symlink_data_buffer *)buf,
@@ -744,7 +741,7 @@ int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb,
 
 	buf = (struct reparse_data_buffer *)((u8 *)io +
 					     le32_to_cpu(io->OutputOffset));
-	return parse_reparse_point(buf, plen, cifs_sb, full_path, true, data);
+	return parse_reparse_point(buf, plen, cifs_sb, full_path, data);
 }
 
 static bool wsl_to_fattr(struct cifs_open_info_data *data,
diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c
index 749a83cd0deb0..55014c22f0828 100644
--- a/fs/smb/client/smb1ops.c
+++ b/fs/smb/client/smb1ops.c
@@ -1010,7 +1010,7 @@ static int cifs_parse_reparse_point(struct cifs_sb_info *cifs_sb,
 
 	buf = (struct reparse_data_buffer *)((__u8 *)&io->hdr.Protocol +
 					     le32_to_cpu(io->DataOffset));
-	return parse_reparse_point(buf, plen, cifs_sb, full_path, true, data);
+	return parse_reparse_point(buf, plen, cifs_sb, full_path, data);
 }
 
 static bool
diff --git a/fs/smb/client/smb2file.c b/fs/smb/client/smb2file.c
index 9ec44eab8dbca..c5e689b2fc497 100644
--- a/fs/smb/client/smb2file.c
+++ b/fs/smb/client/smb2file.c
@@ -89,7 +89,6 @@ int smb2_parse_symlink_response(struct cifs_sb_info *cifs_sb, const struct kvec
 	return smb2_parse_native_symlink(path,
 					 (char *)sym->PathBuffer + sub_offs,
 					 sub_len,
-					 true,
 					 le32_to_cpu(sym->Flags) & SYMLINK_FLAG_RELATIVE,
 					 full_path,
 					 cifs_sb);
diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h
index 09349fa8da039..10f5e37d15309 100644
--- a/fs/smb/client/smb2proto.h
+++ b/fs/smb/client/smb2proto.h
@@ -112,7 +112,7 @@ extern int smb3_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
 			  const unsigned char *path, char *pbuf,
 			  unsigned int *pbytes_read);
 int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len,
-			      bool unicode, bool relative,
+			      bool relative,
 			      const char *full_path,
 			      struct cifs_sb_info *cifs_sb);
 int smb2_parse_symlink_response(struct cifs_sb_info *cifs_sb,

From 65ccccee4eb1a8147a3242238f7730bd8359077e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Sat, 28 Sep 2024 14:13:44 +0200
Subject: [PATCH 266/368] cifs: Remove struct reparse_posix_data from struct
 cifs_open_info_data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Linux SMB client already supports more reparse point types but only the
reparse_posix_data is defined in union of struct cifs_open_info_data.
This union is currently used as implicit casting between point types.

With this code style, it hides information that union is used for pointer
casting, and just in mknod_nfs() and posix_reparse_to_fattr() functions.

Other reparse point buffers do not use this kind of casting. So remove
reparse_posix_data from reparse part of struct cifs_open_info_data and for
all cases of reparse buffer use just struct reparse_data_buffer *buf.

Signed-off-by: Pali Rohár <pali@kernel.org>
Reviewed-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/cifsglob.h | 5 +----
 fs/smb/client/reparse.c  | 5 ++---
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 49ffc040f736c..63f23a845e15d 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -226,10 +226,7 @@ struct cifs_open_info_data {
 			struct kvec iov;
 		} io;
 		__u32 tag;
-		union {
-			struct reparse_data_buffer *buf;
-			struct reparse_posix_data *posix;
-		};
+		struct reparse_data_buffer *buf;
 	} reparse;
 	struct {
 		__u8		eas[SMB2_WSL_MAX_QUERY_EA_RESP_SIZE];
diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c
index 0e47b8e097a09..77f891f718c0c 100644
--- a/fs/smb/client/reparse.c
+++ b/fs/smb/client/reparse.c
@@ -294,7 +294,7 @@ static int mknod_nfs(unsigned int xid, struct inode *inode,
 
 	data = (struct cifs_open_info_data) {
 		.reparse_point = true,
-		.reparse = { .tag = IO_REPARSE_TAG_NFS, .posix = p, },
+		.reparse = { .tag = IO_REPARSE_TAG_NFS, .buf = (struct reparse_data_buffer *)p, },
 	};
 
 	new = smb2_get_reparse_inode(&data, inode->i_sb, xid,
@@ -816,8 +816,7 @@ static bool posix_reparse_to_fattr(struct cifs_sb_info *cifs_sb,
 				   struct cifs_fattr *fattr,
 				   struct cifs_open_info_data *data)
 {
-	struct reparse_posix_data *buf = data->reparse.posix;
-
+	struct reparse_posix_data *buf = (struct reparse_posix_data *)data->reparse.buf;
 
 	if (buf == NULL)
 		return true;

From b6d002f0a345218edbe9de049693004482a81327 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Fri, 13 Sep 2024 11:46:35 +0200
Subject: [PATCH 267/368] cifs: Rename struct reparse_posix_data to
 reparse_nfs_data_buffer and move to common/smb2pdu.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Function parse_reparse_posix() parses NFS-style reparse points, which are
used only by Windows NFS server since Windows Server 2012 version. This
style is not understood by Microsoft POSIX/Interix/SFU/SUA subsystems.

So make it clear that parse_reparse_posix() function and reparse_posix_data
structure are not POSIX general, but rather NFS specific.

All reparse buffer structures are defined in common/smb2pdu.h and have
_buffer suffix. So move struct reparse_posix_data from client/cifspdu.h to
common/smb2pdu.h and rename it to reparse_nfs_data_buffer for consistency.
Note that also SMB specification in [MS-FSCC] document, section 2.1.2.6
defines it under name "Network File System (NFS) Reparse Data Buffer".
So use this name for consistency.

Having this structure in common/smb2pdu.h can be useful for ksmbd server
code as NFS-style reparse points is the preferred way for implementing
support for special files.

Signed-off-by: Pali Rohár <pali@kernel.org>
Acked-by: Paulo Alcantara (Red Hat) <pc@manguebit.com>
Reviewed-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/cifspdu.h | 14 --------------
 fs/smb/client/reparse.c | 12 ++++++------
 fs/smb/common/smb2pdu.h | 14 +++++++++++++-
 3 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h
index 5c047b00516f2..c285f0e5ce0f0 100644
--- a/fs/smb/client/cifspdu.h
+++ b/fs/smb/client/cifspdu.h
@@ -1484,20 +1484,6 @@ struct file_notify_information {
 	__u8  FileName[];
 } __attribute__((packed));
 
-/* For IO_REPARSE_TAG_NFS */
-#define NFS_SPECFILE_LNK	0x00000000014B4E4C
-#define NFS_SPECFILE_CHR	0x0000000000524843
-#define NFS_SPECFILE_BLK	0x00000000004B4C42
-#define NFS_SPECFILE_FIFO	0x000000004F464946
-#define NFS_SPECFILE_SOCK	0x000000004B434F53
-struct reparse_posix_data {
-	__le32	ReparseTag;
-	__le16	ReparseDataLength;
-	__u16	Reserved;
-	__le64	InodeType; /* LNK, FIFO, CHR etc. */
-	__u8	DataBuffer[];
-} __attribute__((packed));
-
 struct cifs_quota_data {
 	__u32	rsrvd1;  /* 0 */
 	__u32	sid_size;
diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c
index 77f891f718c0c..3be2173a026d0 100644
--- a/fs/smb/client/reparse.c
+++ b/fs/smb/client/reparse.c
@@ -242,7 +242,7 @@ static int detect_directory_symlink_target(struct cifs_sb_info *cifs_sb,
 	return 0;
 }
 
-static int nfs_set_reparse_buf(struct reparse_posix_data *buf,
+static int nfs_set_reparse_buf(struct reparse_nfs_data_buffer *buf,
 			       mode_t mode, dev_t dev,
 			       struct kvec *iov)
 {
@@ -281,13 +281,13 @@ static int mknod_nfs(unsigned int xid, struct inode *inode,
 		     const char *full_path, umode_t mode, dev_t dev)
 {
 	struct cifs_open_info_data data;
-	struct reparse_posix_data *p;
+	struct reparse_nfs_data_buffer *p;
 	struct inode *new;
 	struct kvec iov;
 	__u8 buf[sizeof(*p) + sizeof(__le64)];
 	int rc;
 
-	p = (struct reparse_posix_data *)buf;
+	p = (struct reparse_nfs_data_buffer *)buf;
 	rc = nfs_set_reparse_buf(p, mode, dev, &iov);
 	if (rc)
 		return rc;
@@ -474,7 +474,7 @@ int smb2_mknod_reparse(unsigned int xid, struct inode *inode,
 }
 
 /* See MS-FSCC 2.1.2.6 for the 'NFS' style reparse tags */
-static int parse_reparse_posix(struct reparse_posix_data *buf,
+static int parse_reparse_nfs(struct reparse_nfs_data_buffer *buf,
 			       struct cifs_sb_info *cifs_sb,
 			       struct cifs_open_info_data *data)
 {
@@ -702,7 +702,7 @@ int parse_reparse_point(struct reparse_data_buffer *buf,
 	/* See MS-FSCC 2.1.2 */
 	switch (le32_to_cpu(buf->ReparseTag)) {
 	case IO_REPARSE_TAG_NFS:
-		return parse_reparse_posix((struct reparse_posix_data *)buf,
+		return parse_reparse_nfs((struct reparse_nfs_data_buffer *)buf,
 					   cifs_sb, data);
 	case IO_REPARSE_TAG_SYMLINK:
 		return parse_reparse_symlink(
@@ -816,7 +816,7 @@ static bool posix_reparse_to_fattr(struct cifs_sb_info *cifs_sb,
 				   struct cifs_fattr *fattr,
 				   struct cifs_open_info_data *data)
 {
-	struct reparse_posix_data *buf = (struct reparse_posix_data *)data->reparse.buf;
+	struct reparse_nfs_data_buffer *buf = (struct reparse_nfs_data_buffer *)data->reparse.buf;
 
 	if (buf == NULL)
 		return true;
diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h
index 3c7c706c797d2..3336df2ea5d4a 100644
--- a/fs/smb/common/smb2pdu.h
+++ b/fs/smb/common/smb2pdu.h
@@ -1550,7 +1550,19 @@ struct reparse_symlink_data_buffer {
 	__u8	PathBuffer[]; /* Variable Length */
 } __packed;
 
-/* See MS-FSCC 2.1.2.6 and cifspdu.h for struct reparse_posix_data */
+/* For IO_REPARSE_TAG_NFS - see MS-FSCC 2.1.2.6 */
+#define NFS_SPECFILE_LNK	0x00000000014B4E4C
+#define NFS_SPECFILE_CHR	0x0000000000524843
+#define NFS_SPECFILE_BLK	0x00000000004B4C42
+#define NFS_SPECFILE_FIFO	0x000000004F464946
+#define NFS_SPECFILE_SOCK	0x000000004B434F53
+struct reparse_nfs_data_buffer {
+	__le32	ReparseTag;
+	__le16	ReparseDataLength;
+	__u16	Reserved;
+	__le64	InodeType; /* NFS_SPECFILE_* */
+	__u8	DataBuffer[];
+} __packed;
 
 /* For IO_REPARSE_TAG_LX_SYMLINK */
 struct reparse_wsl_symlink_data_buffer {

From a46221fcdd40a29eb08900221797ad63d0271118 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Tue, 24 Dec 2024 15:31:22 +0100
Subject: [PATCH 268/368] cifs: Update description about ACL permissions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are some incorrect information about individual SMB permission
constants like WRITE_DAC can change ownership, or incomplete information to
distinguish between ACL types (discretionary vs system) and there is
completely missing information how permissions apply for directory objects
and what is meaning of GENERIC_* bits.

Also there is missing constant for MAXIMUM_ALLOWED permission.

Fix and extend description of all SMB permission constants to match the
reality, how the reference Windows SMB / NTFS implementation handles them.

Links to official Microsoft documentation related to permissions:
https://learn.microsoft.com/en-us/windows/win32/fileio/file-access-rights-constants
https://learn.microsoft.com/en-us/windows/win32/secauthz/access-mask
https://learn.microsoft.com/en-us/windows/win32/secauthz/standard-access-rights
https://learn.microsoft.com/en-us/windows/win32/secauthz/generic-access-rights
https://learn.microsoft.com/en-us/windows/win32/api/winternl/nf-winternl-ntcreatefile
https://learn.microsoft.com/en-us/windows-hardware/drivers/ddi/ntifs/nf-ntifs-ntcreatefile

Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/cifspdu.h | 82 ++++++++++++++++++++++++++++++-----------
 1 file changed, 61 insertions(+), 21 deletions(-)

diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h
index c285f0e5ce0f0..84743f3d7c512 100644
--- a/fs/smb/client/cifspdu.h
+++ b/fs/smb/client/cifspdu.h
@@ -190,42 +190,82 @@
  */
 
 #define FILE_READ_DATA        0x00000001  /* Data can be read from the file   */
+					  /* or directory child entries can   */
+					  /* be listed together with the      */
+					  /* associated child attributes      */
+					  /* (so the FILE_READ_ATTRIBUTES on  */
+					  /* the child entry is not needed)   */
 #define FILE_WRITE_DATA       0x00000002  /* Data can be written to the file  */
+					  /* or new file can be created in    */
+					  /* the directory                    */
 #define FILE_APPEND_DATA      0x00000004  /* Data can be appended to the file */
+					  /* (for non-local files over SMB it */
+					  /* is same as FILE_WRITE_DATA)      */
+					  /* or new subdirectory can be       */
+					  /* created in the directory         */
 #define FILE_READ_EA          0x00000008  /* Extended attributes associated   */
 					  /* with the file can be read        */
 #define FILE_WRITE_EA         0x00000010  /* Extended attributes associated   */
 					  /* with the file can be written     */
 #define FILE_EXECUTE          0x00000020  /*Data can be read into memory from */
 					  /* the file using system paging I/O */
-#define FILE_DELETE_CHILD     0x00000040
+					  /* for executing the file / script  */
+					  /* or right to traverse directory   */
+					  /* (but by default all users have   */
+					  /* directory bypass traverse        */
+					  /* privilege and do not need this   */
+					  /* permission on directories at all)*/
+#define FILE_DELETE_CHILD     0x00000040  /* Child entry can be deleted from  */
+					  /* the directory (so the DELETE on  */
+					  /* the child entry is not needed)   */
 #define FILE_READ_ATTRIBUTES  0x00000080  /* Attributes associated with the   */
-					  /* file can be read                 */
+					  /* file or directory can be read    */
 #define FILE_WRITE_ATTRIBUTES 0x00000100  /* Attributes associated with the   */
-					  /* file can be written              */
-#define DELETE                0x00010000  /* The file can be deleted          */
-#define READ_CONTROL          0x00020000  /* The access control list and      */
-					  /* ownership associated with the    */
-					  /* file can be read                 */
-#define WRITE_DAC             0x00040000  /* The access control list and      */
-					  /* ownership associated with the    */
-					  /* file can be written.             */
+					  /* file or directory can be written */
+#define DELETE                0x00010000  /* The file or dir can be deleted   */
+#define READ_CONTROL          0x00020000  /* The discretionary access control */
+					  /* list and ownership associated    */
+					  /* with the file or dir can be read */
+#define WRITE_DAC             0x00040000  /* The discretionary access control */
+					  /* list associated with the file or */
+					  /* directory can be written         */
 #define WRITE_OWNER           0x00080000  /* Ownership information associated */
-					  /* with the file can be written     */
+					  /* with the file/dir can be written */
 #define SYNCHRONIZE           0x00100000  /* The file handle can waited on to */
 					  /* synchronize with the completion  */
 					  /* of an input/output request       */
 #define SYSTEM_SECURITY       0x01000000  /* The system access control list   */
-					  /* can be read and changed          */
-#define GENERIC_ALL           0x10000000
-#define GENERIC_EXECUTE       0x20000000
-#define GENERIC_WRITE         0x40000000
-#define GENERIC_READ          0x80000000
-					 /* In summary - Relevant file       */
-					 /* access flags from CIFS are       */
-					 /* file_read_data, file_write_data  */
-					 /* file_execute, file_read_attributes*/
-					 /* write_dac, and delete.           */
+					  /* associated with the file or      */
+					  /* directory can be read or written */
+					  /* (cannot be in DACL, can in SACL) */
+#define MAXIMUM_ALLOWED       0x02000000  /* Maximal subset of GENERIC_ALL    */
+					  /* permissions which can be granted */
+					  /* (cannot be in DACL nor SACL)     */
+#define GENERIC_ALL           0x10000000  /* Same as: GENERIC_EXECUTE |       */
+					  /*          GENERIC_WRITE |         */
+					  /*          GENERIC_READ |          */
+					  /*          FILE_DELETE_CHILD |     */
+					  /*          DELETE |                */
+					  /*          WRITE_DAC |             */
+					  /*          WRITE_OWNER             */
+					  /* So GENERIC_ALL contains all bits */
+					  /* mentioned above except these two */
+					  /* SYSTEM_SECURITY  MAXIMUM_ALLOWED */
+#define GENERIC_EXECUTE       0x20000000  /* Same as: FILE_EXECUTE |          */
+					  /*          FILE_READ_ATTRIBUTES |  */
+					  /*          READ_CONTROL |          */
+					  /*          SYNCHRONIZE             */
+#define GENERIC_WRITE         0x40000000  /* Same as: FILE_WRITE_DATA |       */
+					  /*          FILE_APPEND_DATA |      */
+					  /*          FILE_WRITE_EA |         */
+					  /*          FILE_WRITE_ATTRIBUTES | */
+					  /*          READ_CONTROL |          */
+					  /*          SYNCHRONIZE             */
+#define GENERIC_READ          0x80000000  /* Same as: FILE_READ_DATA |        */
+					  /*          FILE_READ_EA |          */
+					  /*          FILE_READ_ATTRIBUTES |  */
+					  /*          READ_CONTROL |          */
+					  /*          SYNCHRONIZE             */
 
 #define FILE_READ_RIGHTS (FILE_READ_DATA | FILE_READ_EA | FILE_READ_ATTRIBUTES)
 #define FILE_WRITE_RIGHTS (FILE_WRITE_DATA | FILE_APPEND_DATA \

From 65c49767dd4fc058673f9259fda1772fd398eaa7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Thu, 26 Dec 2024 14:50:38 +0100
Subject: [PATCH 269/368] cifs: Remove symlink member from cifs_open_info_data
 union
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Member 'symlink' is part of the union in struct cifs_open_info_data. Its
value is assigned on few places, but is always read through another union
member 'reparse_point'. So to make code more readable, always use only
'reparse_point' member and drop whole union structure. No function change.

Signed-off-by: Pali Rohár <pali@kernel.org>
Acked-by: Paulo Alcantara (Red Hat) <pc@manguebit.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/cifsglob.h | 5 +----
 fs/smb/client/inode.c    | 2 +-
 fs/smb/client/smb1ops.c  | 4 ++--
 3 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 63f23a845e15d..2266b5b9a19fb 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -215,10 +215,7 @@ struct cifs_cred {
 
 struct cifs_open_info_data {
 	bool adjust_tz;
-	union {
-		bool reparse_point;
-		bool symlink;
-	};
+	bool reparse_point;
 	struct {
 		/* ioctl response buffer */
 		struct {
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index 93e9188b2632f..8896c88320c8d 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -990,7 +990,7 @@ cifs_get_file_info(struct file *filp)
 		/* TODO: add support to query reparse tag */
 		data.adjust_tz = false;
 		if (data.symlink_target) {
-			data.symlink = true;
+			data.reparse_point = true;
 			data.reparse.tag = IO_REPARSE_TAG_SYMLINK;
 		}
 		path = build_path_from_dentry(dentry, page);
diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c
index 55014c22f0828..236289ff14edd 100644
--- a/fs/smb/client/smb1ops.c
+++ b/fs/smb/client/smb1ops.c
@@ -551,7 +551,7 @@ static int cifs_query_path_info(const unsigned int xid,
 	int rc;
 	FILE_ALL_INFO fi = {};
 
-	data->symlink = false;
+	data->reparse_point = false;
 	data->adjust_tz = false;
 
 	/* could do find first instead but this returns more info */
@@ -592,7 +592,7 @@ static int cifs_query_path_info(const unsigned int xid,
 		/* Need to check if this is a symbolic link or not */
 		tmprc = CIFS_open(xid, &oparms, &oplock, NULL);
 		if (tmprc == -EOPNOTSUPP)
-			data->symlink = true;
+			data->reparse_point = true;
 		else if (tmprc == 0)
 			CIFSSMBClose(xid, tcon, fid.netfid);
 	}

From 32ba03042ab2618f2622e4dae57ca802ac982e39 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Thu, 26 Dec 2024 14:56:33 +0100
Subject: [PATCH 270/368] cifs: Simplify reparse point check in
 cifs_query_path_info() function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For checking if path is reparse point and setting data->reparse_point
member, it is enough to check if ATTR_REPARSE is present.

It is not required to call CIFS_open() without OPEN_REPARSE_POINT and
checking for -EOPNOTSUPP error code.

Signed-off-by: Pali Rohár <pali@kernel.org>
Acked-by: Paulo Alcantara (Red Hat) <pc@manguebit.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/smb1ops.c | 26 +-------------------------
 1 file changed, 1 insertion(+), 25 deletions(-)

diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c
index 236289ff14edd..9756b876a75e1 100644
--- a/fs/smb/client/smb1ops.c
+++ b/fs/smb/client/smb1ops.c
@@ -569,32 +569,8 @@ static int cifs_query_path_info(const unsigned int xid,
 	}
 
 	if (!rc) {
-		int tmprc;
-		int oplock = 0;
-		struct cifs_fid fid;
-		struct cifs_open_parms oparms;
-
 		move_cifs_info_to_smb2(&data->fi, &fi);
-
-		if (!(le32_to_cpu(fi.Attributes) & ATTR_REPARSE))
-			return 0;
-
-		oparms = (struct cifs_open_parms) {
-			.tcon = tcon,
-			.cifs_sb = cifs_sb,
-			.desired_access = FILE_READ_ATTRIBUTES,
-			.create_options = cifs_create_options(cifs_sb, 0),
-			.disposition = FILE_OPEN,
-			.path = full_path,
-			.fid = &fid,
-		};
-
-		/* Need to check if this is a symbolic link or not */
-		tmprc = CIFS_open(xid, &oparms, &oplock, NULL);
-		if (tmprc == -EOPNOTSUPP)
-			data->reparse_point = true;
-		else if (tmprc == 0)
-			CIFSSMBClose(xid, tcon, fid.netfid);
+		data->reparse_point = le32_to_cpu(fi.Attributes) & ATTR_REPARSE;
 	}
 
 	return rc;

From 12b466eb52d926802b6898d2cb7e67386467f54a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Mon, 23 Sep 2024 23:01:59 +0200
Subject: [PATCH 271/368] cifs: Fix creating and resolving absolute NT-style
 symlinks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If the SMB symlink is stored on NT server in absolute form then it points
to the NT object hierarchy, which is different from POSIX one and needs
some conversion / mapping.

To make interoperability with Windows SMB server and WSL subsystem, reuse
its logic of mapping between NT paths and POSIX paths into Linux SMB
client.

WSL subsystem on Windows uses for -t drvfs mount option -o symlinkroot=
which specifies the POSIX path where are expected to be mounted lowercase
Windows drive letters (without colon).

Do same for Linux SMB client and add a new mount option -o symlinkroot=
which mimics the drvfs mount option of the same name. It specifies where in
the Linux VFS hierarchy is the root of the DOS / Windows drive letters, and
translates between absolute NT-style symlinks and absolute Linux VFS
symlinks. Default value of symlinkroot is "/mnt", same what is using WSL.

Note that DOS / Windows drive letter symlinks are just subset of all
possible NT-style symlinks. Drive letters live in NT subtree \??\ and
important details about NT paths and object hierarchy are in the comments
in this change.

When symlink target location from non-POSIX SMB server is in absolute form
(indicated by absence of SYMLINK_FLAG_RELATIVE) then it is converted to
Linux absolute symlink according to symlinkroot configuration.

And when creating a new symlink on non-POSIX SMB server in absolute form
then Linux absolute target is converted to NT-style according to
symlinkroot configuration.

When SMB server is POSIX, then this change does not affect neither reading
target location of symlink, nor creating a new symlink. It is expected that
POSIX SMB server works with POSIX paths where the absolute root is /.

This change improves interoperability of absolute SMB symlinks with Windows
SMB servers.

Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/fs_context.c |  22 +++
 fs/smb/client/fs_context.h |   2 +
 fs/smb/client/reparse.c    | 267 ++++++++++++++++++++++++++++++++++---
 3 files changed, 273 insertions(+), 18 deletions(-)

diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index 5381f05420bc2..d7d2f6c607b52 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -185,6 +185,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
 	fsparam_string("cache", Opt_cache),
 	fsparam_string("reparse", Opt_reparse),
 	fsparam_string("upcall_target", Opt_upcalltarget),
+	fsparam_string("symlinkroot", Opt_symlinkroot),
 
 	/* Arguments that should be ignored */
 	fsparam_flag("guest", Opt_ignore),
@@ -386,6 +387,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx
 	new_ctx->iocharset = NULL;
 	new_ctx->leaf_fullpath = NULL;
 	new_ctx->dns_dom = NULL;
+	new_ctx->symlinkroot = NULL;
 	/*
 	 * Make sure to stay in sync with smb3_cleanup_fs_context_contents()
 	 */
@@ -401,6 +403,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx
 	DUP_CTX_STR(iocharset);
 	DUP_CTX_STR(leaf_fullpath);
 	DUP_CTX_STR(dns_dom);
+	DUP_CTX_STR(symlinkroot);
 
 	return 0;
 }
@@ -1727,6 +1730,16 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
 		if (parse_reparse_flavor(fc, param->string, ctx))
 			goto cifs_parse_mount_err;
 		break;
+	case Opt_symlinkroot:
+		if (param->string[0] != '/') {
+			cifs_errorf(fc, "symlinkroot mount options must be absolute path\n");
+			goto cifs_parse_mount_err;
+		}
+		kfree(ctx->symlinkroot);
+		ctx->symlinkroot = kstrdup(param->string, GFP_KERNEL);
+		if (!ctx->symlinkroot)
+			goto cifs_parse_mount_err;
+		break;
 	}
 	/* case Opt_ignore: - is ignored as expected ... */
 
@@ -1735,6 +1748,13 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
 		goto cifs_parse_mount_err;
 	}
 
+	/*
+	 * By default resolve all native absolute symlinks relative to "/mnt/".
+	 * Same default has drvfs driver running in WSL for resolving SMB shares.
+	 */
+	if (!ctx->symlinkroot)
+		ctx->symlinkroot = kstrdup("/mnt/", GFP_KERNEL);
+
 	return 0;
 
  cifs_parse_mount_err:
@@ -1867,6 +1887,8 @@ smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx)
 	ctx->leaf_fullpath = NULL;
 	kfree(ctx->dns_dom);
 	ctx->dns_dom = NULL;
+	kfree(ctx->symlinkroot);
+	ctx->symlinkroot = NULL;
 }
 
 void
diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h
index 8813533345ee7..43bc3119af218 100644
--- a/fs/smb/client/fs_context.h
+++ b/fs/smb/client/fs_context.h
@@ -166,6 +166,7 @@ enum cifs_param {
 	Opt_cache,
 	Opt_reparse,
 	Opt_upcalltarget,
+	Opt_symlinkroot,
 
 	/* Mount options to be ignored */
 	Opt_ignore,
@@ -296,6 +297,7 @@ struct smb3_fs_context {
 	enum cifs_reparse_type reparse_type;
 	bool dfs_conn:1; /* set for dfs mounts */
 	char *dns_dom;
+	char *symlinkroot; /* top level directory for native SMB symlinks in absolute format */
 };
 
 extern const struct fs_parameter_spec smb3_fs_parameters[];
diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c
index 3be2173a026d0..344371dd895cb 100644
--- a/fs/smb/client/reparse.c
+++ b/fs/smb/client/reparse.c
@@ -25,36 +25,131 @@ int smb2_create_reparse_symlink(const unsigned int xid, struct inode *inode,
 				const char *full_path, const char *symname)
 {
 	struct reparse_symlink_data_buffer *buf = NULL;
-	struct cifs_open_info_data data;
+	struct cifs_open_info_data data = {};
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 	struct inode *new;
 	struct kvec iov;
-	__le16 *path;
+	__le16 *path = NULL;
 	bool directory;
-	char *sym, sep = CIFS_DIR_SEP(cifs_sb);
-	u16 len, plen;
+	char *symlink_target = NULL;
+	char *sym = NULL;
+	char sep = CIFS_DIR_SEP(cifs_sb);
+	u16 len, plen, poff, slen;
 	int rc = 0;
 
 	if (strlen(symname) > REPARSE_SYM_PATH_MAX)
 		return -ENAMETOOLONG;
 
-	sym = kstrdup(symname, GFP_KERNEL);
-	if (!sym)
-		return -ENOMEM;
+	symlink_target = kstrdup(symname, GFP_KERNEL);
+	if (!symlink_target) {
+		rc = -ENOMEM;
+		goto out;
+	}
 
 	data = (struct cifs_open_info_data) {
 		.reparse_point = true,
 		.reparse = { .tag = IO_REPARSE_TAG_SYMLINK, },
-		.symlink_target = sym,
+		.symlink_target = symlink_target,
 	};
 
-	convert_delimiter(sym, sep);
+	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') {
+		/*
+		 * This is a request to create an absolute symlink on the server
+		 * which does not support POSIX paths, and expects symlink in
+		 * NT-style path. So convert absolute Linux symlink target path
+		 * to the absolute NT-style path. Root of the NT-style path for
+		 * symlinks is specified in "symlinkroot" mount option. This will
+		 * ensure compatibility of this symlink stored in absolute form
+		 * on the SMB server.
+		 */
+		if (!strstarts(symname, cifs_sb->ctx->symlinkroot)) {
+			/*
+			 * If the absolute Linux symlink target path is not
+			 * inside "symlinkroot" location then there is no way
+			 * to convert such Linux symlink to NT-style path.
+			 */
+			cifs_dbg(VFS,
+				 "absolute symlink '%s' cannot be converted to NT format "
+				 "because it is outside of symlinkroot='%s'\n",
+				 symname, cifs_sb->ctx->symlinkroot);
+			rc = -EINVAL;
+			goto out;
+		}
+		len = strlen(cifs_sb->ctx->symlinkroot);
+		if (cifs_sb->ctx->symlinkroot[len-1] != '/')
+			len++;
+		if (symname[len] >= 'a' && symname[len] <= 'z' &&
+		    (symname[len+1] == '/' || symname[len+1] == '\0')) {
+			/*
+			 * Symlink points to Linux target /symlinkroot/x/path/...
+			 * where 'x' is the lowercase local Windows drive.
+			 * NT-style path for 'x' has common form \??\X:\path\...
+			 * with uppercase local Windows drive.
+			 */
+			int common_path_len = strlen(symname+len+1)+1;
+			sym = kzalloc(6+common_path_len, GFP_KERNEL);
+			if (!sym) {
+				rc = -ENOMEM;
+				goto out;
+			}
+			memcpy(sym, "\\??\\", 4);
+			sym[4] = symname[len] - ('a'-'A');
+			sym[5] = ':';
+			memcpy(sym+6, symname+len+1, common_path_len);
+		} else {
+			/* Unhandled absolute symlink. Report an error. */
+			cifs_dbg(
+				 VFS,
+				 "absolute symlink '%s' cannot be converted to NT format "
+				 "because it points to unknown target\n",
+				 symname);
+			rc = -EINVAL;
+			goto out;
+		}
+	} else {
+		/*
+		 * This is request to either create an absolute symlink on
+		 * server which expects POSIX paths or it is an request to
+		 * create a relative symlink from the current directory.
+		 * These paths have same format as relative SMB symlinks,
+		 * so no conversion is needed. So just take symname as-is.
+		 */
+		sym = kstrdup(symname, GFP_KERNEL);
+		if (!sym) {
+			rc = -ENOMEM;
+			goto out;
+		}
+	}
+
+	if (sep == '\\')
+		convert_delimiter(sym, sep);
+
+	/*
+	 * For absolute NT symlinks it is required to pass also leading
+	 * backslash and to not mangle NT object prefix "\\??\\" and not to
+	 * mangle colon in drive letter. But cifs_convert_path_to_utf16()
+	 * removes leading backslash and replaces '?' and ':'. So temporary
+	 * mask these characters in NT object prefix by '_' and then change
+	 * them back.
+	 */
+	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/')
+		sym[0] = sym[1] = sym[2] = sym[5] = '_';
+
 	path = cifs_convert_path_to_utf16(sym, cifs_sb);
 	if (!path) {
 		rc = -ENOMEM;
 		goto out;
 	}
 
+	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') {
+		sym[0] = '\\';
+		sym[1] = sym[2] = '?';
+		sym[5] = ':';
+		path[0] = cpu_to_le16('\\');
+		path[1] = path[2] = cpu_to_le16('?');
+		path[5] = cpu_to_le16(':');
+	}
+
 	/*
 	 * SMB distinguish between symlink to directory and symlink to file.
 	 * They cannot be exchanged (symlink of file type which points to
@@ -67,8 +162,18 @@ int smb2_create_reparse_symlink(const unsigned int xid, struct inode *inode,
 	if (rc < 0)
 		goto out;
 
-	plen = 2 * UniStrnlen((wchar_t *)path, REPARSE_SYM_PATH_MAX);
-	len = sizeof(*buf) + plen * 2;
+	slen = 2 * UniStrnlen((wchar_t *)path, REPARSE_SYM_PATH_MAX);
+	poff = 0;
+	plen = slen;
+	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') {
+		/*
+		 * For absolute NT symlinks skip leading "\\??\\" in PrintName as
+		 * PrintName is user visible location in DOS/Win32 format (not in NT format).
+		 */
+		poff = 4;
+		plen -= 2 * poff;
+	}
+	len = sizeof(*buf) + plen + slen;
 	buf = kzalloc(len, GFP_KERNEL);
 	if (!buf) {
 		rc = -ENOMEM;
@@ -77,17 +182,17 @@ int smb2_create_reparse_symlink(const unsigned int xid, struct inode *inode,
 
 	buf->ReparseTag = cpu_to_le32(IO_REPARSE_TAG_SYMLINK);
 	buf->ReparseDataLength = cpu_to_le16(len - sizeof(struct reparse_data_buffer));
+
 	buf->SubstituteNameOffset = cpu_to_le16(plen);
-	buf->SubstituteNameLength = cpu_to_le16(plen);
-	memcpy(&buf->PathBuffer[plen], path, plen);
+	buf->SubstituteNameLength = cpu_to_le16(slen);
+	memcpy(&buf->PathBuffer[plen], path, slen);
+
 	buf->PrintNameOffset = 0;
 	buf->PrintNameLength = cpu_to_le16(plen);
-	memcpy(buf->PathBuffer, path, plen);
+	memcpy(buf->PathBuffer, path+poff, plen);
+
 	buf->Flags = cpu_to_le32(*symname != '/' ? SYMLINK_FLAG_RELATIVE : 0);
-	if (*sym != sep)
-		buf->Flags = cpu_to_le32(SYMLINK_FLAG_RELATIVE);
 
-	convert_delimiter(sym, '/');
 	iov.iov_base = buf;
 	iov.iov_len = len;
 	new = smb2_get_reparse_inode(&data, inode->i_sb, xid,
@@ -98,6 +203,7 @@ int smb2_create_reparse_symlink(const unsigned int xid, struct inode *inode,
 	else
 		rc = PTR_ERR(new);
 out:
+	kfree(sym);
 	kfree(path);
 	cifs_free_open_info(&data);
 	kfree(buf);
@@ -543,6 +649,9 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len,
 	char sep = CIFS_DIR_SEP(cifs_sb);
 	char *linux_target = NULL;
 	char *smb_target = NULL;
+	int symlinkroot_len;
+	int abs_path_len;
+	char *abs_path;
 	int levels;
 	int rc;
 	int i;
@@ -570,7 +679,123 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len,
 		goto out;
 	}
 
-	if (smb_target[0] == sep && relative) {
+	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && !relative) {
+		/*
+		 * This is an absolute symlink from the server which does not
+		 * support POSIX paths, so the symlink is in NT-style path.
+		 * So convert it to absolute Linux symlink target path. Root of
+		 * the NT-style path for symlinks is specified in "symlinkroot"
+		 * mount option.
+		 *
+		 * Root of the DOS and Win32 paths is at NT path \??\
+		 * It means that DOS/Win32 path C:\folder\file.txt is
+		 * NT path \??\C:\folder\file.txt
+		 *
+		 * NT systems have some well-known object symlinks in their NT
+		 * hierarchy, which is needed to take into account when resolving
+		 * other symlinks. Most commonly used symlink paths are:
+		 * \?? -> \GLOBAL??
+		 * \DosDevices -> \??
+		 * \GLOBAL??\GLOBALROOT -> \
+		 * \GLOBAL??\Global -> \GLOBAL??
+		 * \GLOBAL??\NUL -> \Device\Null
+		 * \GLOBAL??\UNC -> \Device\Mup
+		 * \GLOBAL??\PhysicalDrive0 -> \Device\Harddisk0\DR0 (for each harddisk)
+		 * \GLOBAL??\A: -> \Device\Floppy0 (if A: is the first floppy)
+		 * \GLOBAL??\C: -> \Device\HarddiskVolume1 (if C: is the first harddisk)
+		 * \GLOBAL??\D: -> \Device\CdRom0 (if D: is first cdrom)
+		 * \SystemRoot -> \Device\Harddisk0\Partition1\WINDOWS (or where is NT system installed)
+		 * \Volume{...} -> \Device\HarddiskVolume1 (where ... is system generated guid)
+		 *
+		 * In most common cases, absolute NT symlinks points to path on
+		 * DOS/Win32 drive letter, system-specific Volume or on UNC share.
+		 * Here are few examples of commonly used absolute NT symlinks
+		 * created by mklink.exe tool:
+		 * \??\C:\folder\file.txt
+		 * \??\\C:\folder\file.txt
+		 * \??\UNC\server\share\file.txt
+		 * \??\\UNC\server\share\file.txt
+		 * \??\Volume{b75e2c83-0000-0000-0000-602f00000000}\folder\file.txt
+		 *
+		 * It means that the most common path prefix \??\ is also NT path
+		 * symlink (to \GLOBAL??). It is less common that second path
+		 * separator is double backslash, but it is valid.
+		 *
+		 * Volume guid is randomly generated by the target system and so
+		 * only the target system knows the mapping between guid and the
+		 * hardisk number. Over SMB it is not possible to resolve this
+		 * mapping, therefore symlinks pointing to target location of
+		 * volume guids are totally unusable over SMB.
+		 *
+		 * For now parse only symlink paths available for DOS and Win32.
+		 * Those are paths with \??\ prefix or paths which points to \??\
+		 * via other NT symlink (\DosDevices\, \GLOBAL??\, ...).
+		 */
+		abs_path = smb_target;
+globalroot:
+		if (strstarts(abs_path, "\\??\\"))
+			abs_path += sizeof("\\??\\")-1;
+		else if (strstarts(abs_path, "\\DosDevices\\"))
+			abs_path += sizeof("\\DosDevices\\")-1;
+		else if (strstarts(abs_path, "\\GLOBAL??\\"))
+			abs_path += sizeof("\\GLOBAL??\\")-1;
+		else {
+			/* Unhandled absolute symlink, points outside of DOS/Win32 */
+			cifs_dbg(VFS,
+				 "absolute symlink '%s' cannot be converted from NT format "
+				 "because points to unknown target\n",
+				 smb_target);
+			rc = -EIO;
+			goto out;
+		}
+
+		/* Sometimes path separator after \?? is double backslash */
+		if (abs_path[0] == '\\')
+			abs_path++;
+
+		while (strstarts(abs_path, "Global\\"))
+			abs_path += sizeof("Global\\")-1;
+
+		if (strstarts(abs_path, "GLOBALROOT\\")) {
+			/* Label globalroot requires path with leading '\\', so do not trim '\\' */
+			abs_path += sizeof("GLOBALROOT")-1;
+			goto globalroot;
+		}
+
+		/* For now parse only paths to drive letters */
+		if (((abs_path[0] >= 'A' && abs_path[0] <= 'Z') ||
+		     (abs_path[0] >= 'a' && abs_path[0] <= 'z')) &&
+		    abs_path[1] == ':' &&
+		    (abs_path[2] == '\\' || abs_path[2] == '\0')) {
+			/* Convert drive letter to lowercase and drop colon */
+			char drive_letter = abs_path[0];
+			if (drive_letter >= 'A' && drive_letter <= 'Z')
+				drive_letter += 'a'-'A';
+			abs_path++;
+			abs_path[0] = drive_letter;
+		} else {
+			/* Unhandled absolute symlink. Report an error. */
+			cifs_dbg(VFS,
+				 "absolute symlink '%s' cannot be converted from NT format "
+				 "because points to unknown target\n",
+				 smb_target);
+			rc = -EIO;
+			goto out;
+		}
+
+		abs_path_len = strlen(abs_path)+1;
+		symlinkroot_len = strlen(cifs_sb->ctx->symlinkroot);
+		if (cifs_sb->ctx->symlinkroot[symlinkroot_len-1] == '/')
+			symlinkroot_len--;
+		linux_target = kmalloc(symlinkroot_len + 1 + abs_path_len, GFP_KERNEL);
+		if (!linux_target) {
+			rc = -ENOMEM;
+			goto out;
+		}
+		memcpy(linux_target, cifs_sb->ctx->symlinkroot, symlinkroot_len);
+		linux_target[symlinkroot_len] = '/';
+		memcpy(linux_target + symlinkroot_len + 1, abs_path, abs_path_len);
+	} else if (smb_target[0] == sep && relative) {
 		/*
 		 * This is a relative SMB symlink from the top of the share,
 		 * which is the top level directory of the Linux mount point.
@@ -599,6 +824,12 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len,
 		}
 		memcpy(linux_target + levels*3, smb_target+1, smb_target_len); /* +1 to skip leading sep */
 	} else {
+		/*
+		 * This is either an absolute symlink in POSIX-style format
+		 * or relative SMB symlink from the current directory.
+		 * These paths have same format as Linux symlinks, so no
+		 * conversion is needed.
+		 */
 		linux_target = smb_target;
 		smb_target = NULL;
 	}

From 660618dde2b4c372132a6be62f11ab68a0a1571a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Fri, 11 Oct 2024 11:20:56 +0200
Subject: [PATCH 272/368] cifs: Add mount option -o symlink= for choosing
 symlink create type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently Linux CIFS client creates a new symlink of the first flavor which
is allowed by mount options, parsed in this order: -o (no)mfsymlinks,
-o (no)sfu, -o (no)unix (+ its aliases) and -o reparse=[type].

Introduce a new mount option -o symlink= for explicitly choosing a symlink
flavor. Possible options are:

  -o symlink=default    - The default behavior, like before this change.
  -o symlink=none       - Disallow creating a new symlinks
  -o symlink=native     - Create as native SMB symlink reparse point
  -o symlink=unix       - Create via SMB1 unix extension command
  -o symlink=mfsymlinks - Create as regular file of mfsymlinks format
  -o symlink=sfu        - Create as regular system file of SFU format
  -o symlink=nfs        - Create as NFS reparse point
  -o symlink=wsl        - Create as WSL reparse point

So for example specifying -o sfu,mfsymlinks,symlink=native will allow to
parse symlinks also of SFU and mfsymlinks types (which are disabled by
default unless mount option is explicitly specified), but new symlinks will
be created under native SMB type (which parsing is always enabled).

Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/cifsfs.c     |  2 ++
 fs/smb/client/cifsglob.h   | 33 ++++++++++++++++++
 fs/smb/client/connect.c    |  2 ++
 fs/smb/client/fs_context.c | 71 ++++++++++++++++++++++++++++++++++++++
 fs/smb/client/fs_context.h | 16 +++++++++
 fs/smb/client/link.c       | 60 ++++++++++++++++++++++++--------
 fs/smb/client/reparse.c    | 52 ++++++++++++++++++++++------
 fs/smb/client/reparse.h    |  2 ++
 8 files changed, 214 insertions(+), 24 deletions(-)

diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index b800c9f585d8d..f2c852c9d6a11 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -715,6 +715,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
 					    cifs_sb->ctx->backupgid));
 	seq_show_option(s, "reparse",
 			cifs_reparse_type_str(cifs_sb->ctx->reparse_type));
+	seq_show_option(s, "symlink",
+			cifs_symlink_type_str(get_cifs_symlink_type(cifs_sb)));
 
 	seq_printf(s, ",rsize=%u", cifs_sb->ctx->rsize);
 	seq_printf(s, ",wsize=%u", cifs_sb->ctx->wsize);
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 2266b5b9a19fb..9a96f69e67d04 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -177,6 +177,39 @@ static inline const char *cifs_reparse_type_str(enum cifs_reparse_type type)
 	}
 }
 
+enum cifs_symlink_type {
+	CIFS_SYMLINK_TYPE_DEFAULT,
+	CIFS_SYMLINK_TYPE_NONE,
+	CIFS_SYMLINK_TYPE_NATIVE,
+	CIFS_SYMLINK_TYPE_UNIX,
+	CIFS_SYMLINK_TYPE_MFSYMLINKS,
+	CIFS_SYMLINK_TYPE_SFU,
+	CIFS_SYMLINK_TYPE_NFS,
+	CIFS_SYMLINK_TYPE_WSL,
+};
+
+static inline const char *cifs_symlink_type_str(enum cifs_symlink_type type)
+{
+	switch (type) {
+	case CIFS_SYMLINK_TYPE_NONE:
+		return "none";
+	case CIFS_SYMLINK_TYPE_NATIVE:
+		return "native";
+	case CIFS_SYMLINK_TYPE_UNIX:
+		return "unix";
+	case CIFS_SYMLINK_TYPE_MFSYMLINKS:
+		return "mfsymlinks";
+	case CIFS_SYMLINK_TYPE_SFU:
+		return "sfu";
+	case CIFS_SYMLINK_TYPE_NFS:
+		return "nfs";
+	case CIFS_SYMLINK_TYPE_WSL:
+		return "wsl";
+	default:
+		return "unknown";
+	}
+}
+
 struct session_key {
 	unsigned int len;
 	char *response;
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 880d7cf8b730d..ebd20f48f6aac 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -2849,6 +2849,8 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data)
 		return 0;
 	if (old->ctx->reparse_type != new->ctx->reparse_type)
 		return 0;
+	if (old->ctx->symlink_type != new->ctx->symlink_type)
+		return 0;
 
 	return 1;
 }
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index d7d2f6c607b52..5a9a5e04fb051 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -185,6 +185,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
 	fsparam_string("cache", Opt_cache),
 	fsparam_string("reparse", Opt_reparse),
 	fsparam_string("upcall_target", Opt_upcalltarget),
+	fsparam_string("symlink", Opt_symlink),
 	fsparam_string("symlinkroot", Opt_symlinkroot),
 
 	/* Arguments that should be ignored */
@@ -360,6 +361,55 @@ static int parse_reparse_flavor(struct fs_context *fc, char *value,
 	return 0;
 }
 
+static const match_table_t symlink_flavor_tokens = {
+	{ Opt_symlink_default,		"default" },
+	{ Opt_symlink_none,		"none" },
+	{ Opt_symlink_native,		"native" },
+	{ Opt_symlink_unix,		"unix" },
+	{ Opt_symlink_mfsymlinks,	"mfsymlinks" },
+	{ Opt_symlink_sfu,		"sfu" },
+	{ Opt_symlink_nfs,		"nfs" },
+	{ Opt_symlink_wsl,		"wsl" },
+	{ Opt_symlink_err,		NULL },
+};
+
+static int parse_symlink_flavor(struct fs_context *fc, char *value,
+				struct smb3_fs_context *ctx)
+{
+	substring_t args[MAX_OPT_ARGS];
+
+	switch (match_token(value, symlink_flavor_tokens, args)) {
+	case Opt_symlink_default:
+		ctx->symlink_type = CIFS_SYMLINK_TYPE_DEFAULT;
+		break;
+	case Opt_symlink_none:
+		ctx->symlink_type = CIFS_SYMLINK_TYPE_NONE;
+		break;
+	case Opt_symlink_native:
+		ctx->symlink_type = CIFS_SYMLINK_TYPE_NATIVE;
+		break;
+	case Opt_symlink_unix:
+		ctx->symlink_type = CIFS_SYMLINK_TYPE_UNIX;
+		break;
+	case Opt_symlink_mfsymlinks:
+		ctx->symlink_type = CIFS_SYMLINK_TYPE_MFSYMLINKS;
+		break;
+	case Opt_symlink_sfu:
+		ctx->symlink_type = CIFS_SYMLINK_TYPE_SFU;
+		break;
+	case Opt_symlink_nfs:
+		ctx->symlink_type = CIFS_SYMLINK_TYPE_NFS;
+		break;
+	case Opt_symlink_wsl:
+		ctx->symlink_type = CIFS_SYMLINK_TYPE_WSL;
+		break;
+	default:
+		cifs_errorf(fc, "bad symlink= option: %s\n", value);
+		return 1;
+	}
+	return 0;
+}
+
 #define DUP_CTX_STR(field)						\
 do {									\
 	if (ctx->field) {						\
@@ -1730,6 +1780,10 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
 		if (parse_reparse_flavor(fc, param->string, ctx))
 			goto cifs_parse_mount_err;
 		break;
+	case Opt_symlink:
+		if (parse_symlink_flavor(fc, param->string, ctx))
+			goto cifs_parse_mount_err;
+		break;
 	case Opt_symlinkroot:
 		if (param->string[0] != '/') {
 			cifs_errorf(fc, "symlinkroot mount options must be absolute path\n");
@@ -1765,6 +1819,22 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
 	return -EINVAL;
 }
 
+enum cifs_symlink_type get_cifs_symlink_type(struct cifs_sb_info *cifs_sb)
+{
+	if (cifs_sb->ctx->symlink_type == CIFS_SYMLINK_TYPE_DEFAULT) {
+		if (cifs_sb->ctx->mfsymlinks)
+			return CIFS_SYMLINK_TYPE_MFSYMLINKS;
+		else if (cifs_sb->ctx->sfu_emul)
+			return CIFS_SYMLINK_TYPE_SFU;
+		else if (cifs_sb->ctx->linux_ext && !cifs_sb->ctx->no_linux_ext)
+			return CIFS_SYMLINK_TYPE_UNIX;
+		else
+			return CIFS_SYMLINK_TYPE_NATIVE;
+	} else {
+		return cifs_sb->ctx->symlink_type;
+	}
+}
+
 int smb3_init_fs_context(struct fs_context *fc)
 {
 	struct smb3_fs_context *ctx;
@@ -1841,6 +1911,7 @@ int smb3_init_fs_context(struct fs_context *fc)
 
 	ctx->retrans = 1;
 	ctx->reparse_type = CIFS_REPARSE_TYPE_DEFAULT;
+	ctx->symlink_type = CIFS_SYMLINK_TYPE_DEFAULT;
 
 /*
  *	short int override_uid = -1;
diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h
index 43bc3119af218..204643428068c 100644
--- a/fs/smb/client/fs_context.h
+++ b/fs/smb/client/fs_context.h
@@ -48,6 +48,18 @@ enum cifs_reparse_parm {
 	Opt_reparse_err
 };
 
+enum cifs_symlink_parm {
+	Opt_symlink_default,
+	Opt_symlink_none,
+	Opt_symlink_native,
+	Opt_symlink_unix,
+	Opt_symlink_mfsymlinks,
+	Opt_symlink_sfu,
+	Opt_symlink_nfs,
+	Opt_symlink_wsl,
+	Opt_symlink_err
+};
+
 enum cifs_sec_param {
 	Opt_sec_krb5,
 	Opt_sec_krb5i,
@@ -166,6 +178,7 @@ enum cifs_param {
 	Opt_cache,
 	Opt_reparse,
 	Opt_upcalltarget,
+	Opt_symlink,
 	Opt_symlinkroot,
 
 	/* Mount options to be ignored */
@@ -295,6 +308,7 @@ struct smb3_fs_context {
 	struct cifs_ses *dfs_root_ses;
 	bool dfs_automount:1; /* set for dfs automount only */
 	enum cifs_reparse_type reparse_type;
+	enum cifs_symlink_type symlink_type;
 	bool dfs_conn:1; /* set for dfs mounts */
 	char *dns_dom;
 	char *symlinkroot; /* top level directory for native SMB symlinks in absolute format */
@@ -302,6 +316,8 @@ struct smb3_fs_context {
 
 extern const struct fs_parameter_spec smb3_fs_parameters[];
 
+extern enum cifs_symlink_type get_cifs_symlink_type(struct cifs_sb_info *cifs_sb);
+
 extern int smb3_init_fs_context(struct fs_context *fc);
 extern void smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx);
 extern void smb3_cleanup_fs_context(struct smb3_fs_context *ctx);
diff --git a/fs/smb/client/link.c b/fs/smb/client/link.c
index 47ddeb7fa1116..6e6c09cc5ce7a 100644
--- a/fs/smb/client/link.c
+++ b/fs/smb/client/link.c
@@ -18,6 +18,7 @@
 #include "cifs_unicode.h"
 #include "smb2proto.h"
 #include "cifs_ioctl.h"
+#include "fs_context.h"
 
 /*
  * M-F Symlink Functions - Begin
@@ -604,22 +605,53 @@ cifs_symlink(struct mnt_idmap *idmap, struct inode *inode,
 	cifs_dbg(FYI, "symname is %s\n", symname);
 
 	/* BB what if DFS and this volume is on different share? BB */
-	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) {
-		rc = create_mf_symlink(xid, pTcon, cifs_sb, full_path, symname);
-	} else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
-		rc = __cifs_sfu_make_node(xid, inode, direntry, pTcon,
-					  full_path, S_IFLNK, 0, symname);
+	rc = -EOPNOTSUPP;
+	switch (get_cifs_symlink_type(cifs_sb)) {
+	case CIFS_SYMLINK_TYPE_DEFAULT:
+		/* should not happen, get_cifs_symlink_type() resolves the default */
+		break;
+
+	case CIFS_SYMLINK_TYPE_NONE:
+		break;
+
+	case CIFS_SYMLINK_TYPE_UNIX:
 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
-	} else if (pTcon->unix_ext) {
-		rc = CIFSUnixCreateSymLink(xid, pTcon, full_path, symname,
-					   cifs_sb->local_nls,
-					   cifs_remap(cifs_sb));
+		if (pTcon->unix_ext) {
+			rc = CIFSUnixCreateSymLink(xid, pTcon, full_path,
+						   symname,
+						   cifs_sb->local_nls,
+						   cifs_remap(cifs_sb));
+		}
 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
-	} else if (server->ops->create_reparse_symlink) {
-		rc =  server->ops->create_reparse_symlink(xid, inode, direntry,
-							  pTcon, full_path,
-							  symname);
-		goto symlink_exit;
+		break;
+
+	case CIFS_SYMLINK_TYPE_MFSYMLINKS:
+		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) {
+			rc = create_mf_symlink(xid, pTcon, cifs_sb,
+					       full_path, symname);
+		}
+		break;
+
+	case CIFS_SYMLINK_TYPE_SFU:
+		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
+			rc = __cifs_sfu_make_node(xid, inode, direntry, pTcon,
+						  full_path, S_IFLNK,
+						  0, symname);
+		}
+		break;
+
+	case CIFS_SYMLINK_TYPE_NATIVE:
+	case CIFS_SYMLINK_TYPE_NFS:
+	case CIFS_SYMLINK_TYPE_WSL:
+		if (server->ops->create_reparse_symlink) {
+			rc = server->ops->create_reparse_symlink(xid, inode,
+								 direntry,
+								 pTcon,
+								 full_path,
+								 symname);
+			goto symlink_exit;
+		}
+		break;
 	}
 
 	if (rc == 0) {
diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c
index 344371dd895cb..24a5f563df26a 100644
--- a/fs/smb/client/reparse.c
+++ b/fs/smb/client/reparse.c
@@ -14,6 +14,20 @@
 #include "fs_context.h"
 #include "reparse.h"
 
+static int mknod_nfs(unsigned int xid, struct inode *inode,
+		     struct dentry *dentry, struct cifs_tcon *tcon,
+		     const char *full_path, umode_t mode, dev_t dev,
+		     const char *symname);
+
+static int mknod_wsl(unsigned int xid, struct inode *inode,
+		     struct dentry *dentry, struct cifs_tcon *tcon,
+		     const char *full_path, umode_t mode, dev_t dev,
+		     const char *symname);
+
+static int create_native_symlink(const unsigned int xid, struct inode *inode,
+				 struct dentry *dentry, struct cifs_tcon *tcon,
+				 const char *full_path, const char *symname);
+
 static int detect_directory_symlink_target(struct cifs_sb_info *cifs_sb,
 					   const unsigned int xid,
 					   const char *full_path,
@@ -23,6 +37,22 @@ static int detect_directory_symlink_target(struct cifs_sb_info *cifs_sb,
 int smb2_create_reparse_symlink(const unsigned int xid, struct inode *inode,
 				struct dentry *dentry, struct cifs_tcon *tcon,
 				const char *full_path, const char *symname)
+{
+	switch (get_cifs_symlink_type(CIFS_SB(inode->i_sb))) {
+	case CIFS_SYMLINK_TYPE_NATIVE:
+		return create_native_symlink(xid, inode, dentry, tcon, full_path, symname);
+	case CIFS_SYMLINK_TYPE_NFS:
+		return mknod_nfs(xid, inode, dentry, tcon, full_path, S_IFLNK, 0, symname);
+	case CIFS_SYMLINK_TYPE_WSL:
+		return mknod_wsl(xid, inode, dentry, tcon, full_path, S_IFLNK, 0, symname);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int create_native_symlink(const unsigned int xid, struct inode *inode,
+				 struct dentry *dentry, struct cifs_tcon *tcon,
+				 const char *full_path, const char *symname)
 {
 	struct reparse_symlink_data_buffer *buf = NULL;
 	struct cifs_open_info_data data = {};
@@ -366,6 +396,7 @@ static int nfs_set_reparse_buf(struct reparse_nfs_data_buffer *buf,
 	case NFS_SPECFILE_SOCK:
 		dlen = 0;
 		break;
+	case NFS_SPECFILE_LNK: /* TODO: add support for NFS symlinks */
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -384,7 +415,8 @@ static int nfs_set_reparse_buf(struct reparse_nfs_data_buffer *buf,
 
 static int mknod_nfs(unsigned int xid, struct inode *inode,
 		     struct dentry *dentry, struct cifs_tcon *tcon,
-		     const char *full_path, umode_t mode, dev_t dev)
+		     const char *full_path, umode_t mode, dev_t dev,
+		     const char *symname)
 {
 	struct cifs_open_info_data data;
 	struct reparse_nfs_data_buffer *p;
@@ -424,6 +456,7 @@ static int wsl_set_reparse_buf(struct reparse_data_buffer *buf,
 	case IO_REPARSE_TAG_LX_FIFO:
 	case IO_REPARSE_TAG_AF_UNIX:
 		break;
+	case IO_REPARSE_TAG_LX_SYMLINK: /* TODO: add support for WSL symlinks */
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -521,7 +554,8 @@ static int wsl_set_xattrs(struct inode *inode, umode_t _mode,
 
 static int mknod_wsl(unsigned int xid, struct inode *inode,
 		     struct dentry *dentry, struct cifs_tcon *tcon,
-		     const char *full_path, umode_t mode, dev_t dev)
+		     const char *full_path, umode_t mode, dev_t dev,
+		     const char *symname)
 {
 	struct cifs_open_info_data data;
 	struct reparse_data_buffer buf;
@@ -566,17 +600,15 @@ int smb2_mknod_reparse(unsigned int xid, struct inode *inode,
 		       const char *full_path, umode_t mode, dev_t dev)
 {
 	struct smb3_fs_context *ctx = CIFS_SB(inode->i_sb)->ctx;
-	int rc = -EOPNOTSUPP;
 
 	switch (ctx->reparse_type) {
 	case CIFS_REPARSE_TYPE_NFS:
-		rc = mknod_nfs(xid, inode, dentry, tcon, full_path, mode, dev);
-		break;
+		return mknod_nfs(xid, inode, dentry, tcon, full_path, mode, dev, NULL);
 	case CIFS_REPARSE_TYPE_WSL:
-		rc = mknod_wsl(xid, inode, dentry, tcon, full_path, mode, dev);
-		break;
+		return mknod_wsl(xid, inode, dentry, tcon, full_path, mode, dev, NULL);
+	default:
+		return -EOPNOTSUPP;
 	}
-	return rc;
 }
 
 /* See MS-FSCC 2.1.2.6 for the 'NFS' style reparse tags */
@@ -849,7 +881,7 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len,
 	return rc;
 }
 
-static int parse_reparse_symlink(struct reparse_symlink_data_buffer *sym,
+static int parse_reparse_native_symlink(struct reparse_symlink_data_buffer *sym,
 				 u32 plen,
 				 struct cifs_sb_info *cifs_sb,
 				 const char *full_path,
@@ -936,7 +968,7 @@ int parse_reparse_point(struct reparse_data_buffer *buf,
 		return parse_reparse_nfs((struct reparse_nfs_data_buffer *)buf,
 					   cifs_sb, data);
 	case IO_REPARSE_TAG_SYMLINK:
-		return parse_reparse_symlink(
+		return parse_reparse_native_symlink(
 			(struct reparse_symlink_data_buffer *)buf,
 			plen, cifs_sb, full_path, data);
 	case IO_REPARSE_TAG_LX_SYMLINK:
diff --git a/fs/smb/client/reparse.h b/fs/smb/client/reparse.h
index ff05b0e75c928..5a753fec7e2c2 100644
--- a/fs/smb/client/reparse.h
+++ b/fs/smb/client/reparse.h
@@ -50,6 +50,7 @@ static inline kgid_t wsl_make_kgid(struct cifs_sb_info *cifs_sb,
 static inline u64 reparse_mode_nfs_type(mode_t mode)
 {
 	switch (mode & S_IFMT) {
+	case S_IFLNK: return NFS_SPECFILE_LNK;
 	case S_IFBLK: return NFS_SPECFILE_BLK;
 	case S_IFCHR: return NFS_SPECFILE_CHR;
 	case S_IFIFO: return NFS_SPECFILE_FIFO;
@@ -61,6 +62,7 @@ static inline u64 reparse_mode_nfs_type(mode_t mode)
 static inline u32 reparse_mode_wsl_tag(mode_t mode)
 {
 	switch (mode & S_IFMT) {
+	case S_IFLNK: return IO_REPARSE_TAG_LX_SYMLINK;
 	case S_IFBLK: return IO_REPARSE_TAG_LX_BLK;
 	case S_IFCHR: return IO_REPARSE_TAG_LX_CHR;
 	case S_IFIFO: return IO_REPARSE_TAG_LX_FIFO;

From 78f69467cbbfd24da5ce9917c4b738b38a615f8b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Sat, 21 Sep 2024 16:15:30 +0200
Subject: [PATCH 273/368] cifs: Add mount option -o reparse=none
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This new mount option allows to completely disable creating new reparse
points. When -o sfu or -o mfsymlinks or -o symlink= is not specified then
creating any special file (fifo, socket, symlink, block and char) will fail
with -EOPNOTSUPP error.

Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/cifsglob.h   | 3 +++
 fs/smb/client/fs_context.c | 8 +++++++-
 fs/smb/client/fs_context.h | 1 +
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 9a96f69e67d04..ee9754fad3e8a 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -160,6 +160,7 @@ enum upcall_target_enum {
 };
 
 enum cifs_reparse_type {
+	CIFS_REPARSE_TYPE_NONE,
 	CIFS_REPARSE_TYPE_NFS,
 	CIFS_REPARSE_TYPE_WSL,
 	CIFS_REPARSE_TYPE_DEFAULT = CIFS_REPARSE_TYPE_NFS,
@@ -168,6 +169,8 @@ enum cifs_reparse_type {
 static inline const char *cifs_reparse_type_str(enum cifs_reparse_type type)
 {
 	switch (type) {
+	case CIFS_REPARSE_TYPE_NONE:
+		return "none";
 	case CIFS_REPARSE_TYPE_NFS:
 		return "nfs";
 	case CIFS_REPARSE_TYPE_WSL:
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index 5a9a5e04fb051..821eb149e4b88 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -334,6 +334,7 @@ cifs_parse_cache_flavor(struct fs_context *fc, char *value, struct smb3_fs_conte
 
 static const match_table_t reparse_flavor_tokens = {
 	{ Opt_reparse_default,	"default" },
+	{ Opt_reparse_none,	"none" },
 	{ Opt_reparse_nfs,	"nfs" },
 	{ Opt_reparse_wsl,	"wsl" },
 	{ Opt_reparse_err,	NULL },
@@ -348,6 +349,9 @@ static int parse_reparse_flavor(struct fs_context *fc, char *value,
 	case Opt_reparse_default:
 		ctx->reparse_type = CIFS_REPARSE_TYPE_DEFAULT;
 		break;
+	case Opt_reparse_none:
+		ctx->reparse_type = CIFS_REPARSE_TYPE_NONE;
+		break;
 	case Opt_reparse_nfs:
 		ctx->reparse_type = CIFS_REPARSE_TYPE_NFS;
 		break;
@@ -1828,8 +1832,10 @@ enum cifs_symlink_type get_cifs_symlink_type(struct cifs_sb_info *cifs_sb)
 			return CIFS_SYMLINK_TYPE_SFU;
 		else if (cifs_sb->ctx->linux_ext && !cifs_sb->ctx->no_linux_ext)
 			return CIFS_SYMLINK_TYPE_UNIX;
-		else
+		else if (cifs_sb->ctx->reparse_type != CIFS_REPARSE_TYPE_NONE)
 			return CIFS_SYMLINK_TYPE_NATIVE;
+		else
+			return CIFS_SYMLINK_TYPE_NONE;
 	} else {
 		return cifs_sb->ctx->symlink_type;
 	}
diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h
index 204643428068c..2ccdda350267f 100644
--- a/fs/smb/client/fs_context.h
+++ b/fs/smb/client/fs_context.h
@@ -43,6 +43,7 @@ enum {
 
 enum cifs_reparse_parm {
 	Opt_reparse_default,
+	Opt_reparse_none,
 	Opt_reparse_nfs,
 	Opt_reparse_wsl,
 	Opt_reparse_err

From a314f52a0210730d0d556de76bb7388e76d4597d Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 20 Jan 2025 16:59:14 +0900
Subject: [PATCH 274/368] kconfig: fix file name in warnings when loading
 KCONFIG_DEFCONFIG_LIST

Most 'make *config' commands use .config as the base configuration file.

When .config does not exist, Kconfig tries to load a file listed in
KCONFIG_DEFCONFIG_LIST instead.

However, since commit b75b0a819af9 ("kconfig: change defconfig_list
option to environment variable"), warning messages have displayed an
incorrect file name in such cases.

Below is a demonstration using Debian Trixie. While loading
/boot/config-6.12.9-amd64, the warning messages incorrectly show .config
as the file name.

With this commit, the correct file name is displayed in warnings.

[Before]

  $ rm -f .config
  $ make config
  #
  # using defaults found in /boot/config-6.12.9-amd64
  #
  .config:6804:warning: symbol value 'm' invalid for FB_BACKLIGHT
  .config:9895:warning: symbol value 'm' invalid for ANDROID_BINDER_IPC

[After]

  $ rm -f .config
  $ make config
  #
  # using defaults found in /boot/config-6.12.9-amd64
  #
  /boot/config-6.12.9-amd64:6804:warning: symbol value 'm' invalid for FB_BACKLIGHT
  /boot/config-6.12.9-amd64:9895:warning: symbol value 'm' invalid for ANDROID_BINDER_IPC

Fixes: b75b0a819af9 ("kconfig: change defconfig_list option to environment variable")
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/kconfig/confdata.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
index 4286d5e7f95dc..3b55e7a4131d9 100644
--- a/scripts/kconfig/confdata.c
+++ b/scripts/kconfig/confdata.c
@@ -360,10 +360,12 @@ int conf_read_simple(const char *name, int def)
 
 			*p = '\0';
 
-			in = zconf_fopen(env);
+			name = env;
+
+			in = zconf_fopen(name);
 			if (in) {
 				conf_message("using defaults found in %s",
-					     env);
+					     name);
 				goto load;
 			}
 

From a409fc1463d664002ea9bf700ae4674df03de111 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 20 Jan 2025 17:10:31 +0900
Subject: [PATCH 275/368] kconfig: fix memory leak in sym_warn_unmet_dep()

The string allocated in sym_warn_unmet_dep() is never freed, leading
to a memory leak when an unmet dependency is detected.

Fixes: f8f69dc0b4e0 ("kconfig: make unmet dependency warnings readable")
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Petr Vorel <pvorel@suse.cz>
---
 scripts/kconfig/symbol.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c
index 89b84bf8e21fa..7beb59dec5a08 100644
--- a/scripts/kconfig/symbol.c
+++ b/scripts/kconfig/symbol.c
@@ -388,6 +388,7 @@ static void sym_warn_unmet_dep(const struct symbol *sym)
 			       "  Selected by [m]:\n");
 
 	fputs(str_get(&gs), stderr);
+	str_free(&gs);
 	sym_warnings++;
 }
 

From 35fcac7a7c25cc04f730b9570c737f31295fa92d Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhuacai@loongson.cn>
Date: Wed, 29 Jan 2025 20:06:52 +0800
Subject: [PATCH 276/368] audit: Initialize lsmctx to avoid memory allocation
 error

When audit is enabled in a kernel build, and there are no LSMs active
that support LSM labeling, it is possible that local variable lsmctx
in the AUDIT_SIGNAL_INFO handler in audit_receive_msg() could be used
before it is properly initialize. Then kmalloc() will try to allocate
a large amount of memory with the uninitialized length.

This patch corrects this problem by initializing the lsmctx to a safe
value when it is declared, which avoid errors like:

 WARNING: CPU: 2 PID: 443 at mm/page_alloc.c:4727 __alloc_pages_noprof
        ...
    ra: 9000000003059644 ___kmalloc_large_node+0x84/0x1e0
   ERA: 900000000304d588 __alloc_pages_noprof+0x4c8/0x1040
  CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE)
  PRMD: 00000004 (PPLV0 +PIE -PWE)
  EUEN: 00000007 (+FPE +SXE +ASXE -BTE)
  ECFG: 00071c1d (LIE=0,2-4,10-12 VS=7)
 ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0)
  PRID: 0014c010 (Loongson-64bit, Loongson-3A5000)
 CPU: 2 UID: 0 PID: 443 Comm: auditd Not tainted 6.13.0-rc1+ #1899
        ...
 Call Trace:
 [<9000000002def6a8>] show_stack+0x30/0x148
 [<9000000002debf58>] dump_stack_lvl+0x68/0xa0
 [<9000000002e0fe18>] __warn+0x80/0x108
 [<900000000407486c>] report_bug+0x154/0x268
 [<90000000040ad468>] do_bp+0x2a8/0x320
 [<9000000002dedda0>] handle_bp+0x120/0x1c0
 [<900000000304d588>] __alloc_pages_noprof+0x4c8/0x1040
 [<9000000003059640>] ___kmalloc_large_node+0x80/0x1e0
 [<9000000003061504>] __kmalloc_noprof+0x2c4/0x380
 [<9000000002f0f7ac>] audit_receive_msg+0x764/0x1530
 [<9000000002f1065c>] audit_receive+0xe4/0x1c0
 [<9000000003e5abe8>] netlink_unicast+0x340/0x450
 [<9000000003e5ae9c>] netlink_sendmsg+0x1a4/0x4a0
 [<9000000003d9ffd0>] __sock_sendmsg+0x48/0x58
 [<9000000003da32f0>] __sys_sendto+0x100/0x170
 [<9000000003da3374>] sys_sendto+0x14/0x28
 [<90000000040ad574>] do_syscall+0x94/0x138
 [<9000000002ded318>] handle_syscall+0xb8/0x158

Fixes: 6fba89813ccf333d ("lsm: ensure the correct LSM context releaser")
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
[PM: resolved excessive line length in the backtrace]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 kernel/audit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/audit.c b/kernel/audit.c
index 13d0144efaa3c..5f5bf85bcc905 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1221,7 +1221,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct audit_buffer	*ab;
 	u16			msg_type = nlh->nlmsg_type;
 	struct audit_sig_info   *sig_data;
-	struct lsm_context	lsmctx;
+	struct lsm_context	lsmctx = { NULL, 0, 0 };
 
 	err = audit_netlink_ok(skb, msg_type);
 	if (err)

From 101971298be2aa4706c8602bd81066a0f6f2ced5 Mon Sep 17 00:00:00 2001
From: Yunhui Cui <cuiyunhui@bytedance.com>
Date: Wed, 14 Aug 2024 14:26:25 +0800
Subject: [PATCH 277/368] riscv: add a warning when physical memory address
 overflows

The part of physical memory that exceeds the size of the linear mapping
will be discarded. When the system starts up normally, a warning message
will be printed to prevent confusion caused by the mismatch between the
system memory and the actual physical memory.

Signed-off-by: Yunhui Cui <cuiyunhui@bytedance.com>
Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Tested-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Link: https://lore.kernel.org/r/20240814062625.19794-1-cuiyunhui@bytedance.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/mm/init.c                  |   8 ++++++--
 scripts/selinux/genheaders/genheaders | Bin 0 -> 90112 bytes
 2 files changed, 6 insertions(+), 2 deletions(-)
 create mode 100755 scripts/selinux/genheaders/genheaders

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 0e8c20adcd98d..9641e4ad387f4 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -256,8 +256,12 @@ static void __init setup_bootmem(void)
 	 */
 	if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_MMU)) {
 		max_mapped_addr = __pa(PAGE_OFFSET) + KERN_VIRT_SIZE;
-		memblock_cap_memory_range(phys_ram_base,
-					  max_mapped_addr - phys_ram_base);
+		if (memblock_end_of_DRAM() > max_mapped_addr) {
+			memblock_cap_memory_range(phys_ram_base,
+						  max_mapped_addr - phys_ram_base);
+			pr_warn("Physical memory overflows the linear mapping size: region above %pa removed",
+				&max_mapped_addr);
+		}
 	}
 
 	/*
diff --git a/scripts/selinux/genheaders/genheaders b/scripts/selinux/genheaders/genheaders
new file mode 100755
index 0000000000000000000000000000000000000000..3fc32a664a7930b12a38d02449aec78d49690dfe
GIT binary patch
literal 90112
zcmeI54VYWidFQXqM}!HsAV3BA;1X~VGz`MnRD8IRv5n=#7zLYDVcSUZjK)$tBMC`k
z%cLnHP^yXRL`~8@lP2BHlcn8hyWI-frUOqCluw3sHxX$*)FdTJmIS#ZsnShSku1!<
z=ic`(>;7Xu*(8vziNu57dw%zvbI*P6@7#MeGt#|t>y8^&u2^B=b&~ZfmMPUg;gX21
zh{TW9iCAIl3@c(?V7<aRRo0&@{}1Y+zQ*=ScLC9-{3MB{UBE0HBfiYV79zH8qG@-$
zSK|S94Wi|D%ck8aX0d7hkyq3CcMw<Tzz~PqIooS#eTua;E=L@0XL8ee!d=htFZa28
zNh8{sbeVRYo7_GAp{R-IXhc5E7|s7-%_ql*tTV5O^RH!byNb5sXls2$Cl|tYeXTt4
zlWZ@h?c$HS9dlf-+e5_mUMKBLUjAQSdf2U7bbFkCltY?-L`|!8#Z-3B)$6aAnz(!_
zo13XzUI|})`PJ*kO8K#M&JfJFLh`?HYTM3Rt(8@)X_%%_=FkeKjQ&<?S*kcMQ`d~q
ztT*k=%e2+$;>5G0Y}Pybz2YuevQ`@Q68ZnJ^e3mU`L{!u9%h~AW!jm#{CcH;WAUBG
z;qN*Q|DteP{^!@70*=-HHOJxi9*6(Ca9jT8*Es@?)&Et;;g=kT|ITrE<T(6PNvJLB
z`1L9Q#~RQ6<M6%0%`xCtQoxzk`BpV18<H31r<F*Imk$=wiE@6XP)HXOQ~7aWg<>{W
zo=l8q_F0p~blOT+vSq7OE>6r8tjT;Koil|g35mqyP~AjAv>s2C%Bf;GF`deqY7^;V
z(QKuXN=#;Rsj2K8Y13F}zGzL3PvuK#YqBs?9=9eB6tm^D)RlF)yctQVoXzJf)2Epk
zS6kb5Y`<Z1V%^wPW9yGDA6;BGw&5tY65Ds*l#s2b_hw7wbaD4ho2T-*^zPK2DYK2e
z)A^hZG~re~W<@h&PLea<NitJTa{gN(bJATpWx&kZldbzCuQ)H1&&!@>X77Jj_Se7r
z`!dZ=wjR>*%=u>hN!G`;{E}?;4707rq-<U|f8aP39&vg52{(d=ly`}rrycc(2U>3X
zK5<*?`@ZJw|Mbtf<pA-^w7yOJ3gtoK-!KzeULoSwYW*<ri1G;Wy<c+sStPzq>o<vG
zf0u~gto2*OZ&ThTo>kr<UQym9ey{Q#@pmfk6Mw&Q>+A0R;rRZgav$-Bl>3Q)M0tSt
zCzad8A6FhE{x8Z~#Gh8)Ccfg+?zlU|ga7FAF7Z>fevkNS<$dDkDz~2W=I0g4eZ+rJ
zxu5v9&$#Ua#4p$SHu0;K2Z_I0d5HLHl!uAmqC7(UHsw*`DdjQZ8Rc=}Ips;>`;}*i
z|EBT+@wX|j5dS^pRpRecUL*b|%Im}%%IAoGNclYRN0l!Se@uCU_!pHg5`RK@llZrl
zFA-l>-XVS(d?tRD@*eRE!Mk2>|3N+M6TeF9TSvV0uUGCP9#!rqzDs$4_#Wjp@!OSK
z|Ki>5Ta^2V*OdE-zg>BN_<NPx#Q$7*koW`2L&VS2;}RzRpw^ENZz_)xe?)nV_@|V|
ziGNvnl6X&fhWN9}3&eeT+$+TW%B#fBQ(hx}k@7n6G39f_!^-E0Z&toQe5djT@woCu
z;(L@giSJXsM7*rLMf@)1ZQ^fJ-XZ=T<z3=`q`XJ`kCpd{->=+y%Ip8b%KgM2Q63<E
z@Uw1voA{@-evtSVl!u7pd>$tLRjnT({w?KE;(g^Y;wyEWapI>dPZB>H`}GZPesF&n
zB!0fu4-wz2^CC=q&*$BKBh2-FBT5|ki4nK4U&PPWJS2%DpBduFXMuQ7`>zncLV1<=
zRmy9`Hz=<YzgGDi@lDF-iEmfFK>T&e8^qtBe3AI1@+R?p%9n`4hZgaY)^8KPQ+bDY
zO?j92obn#=`;_;I|Bdp{H@$v#l!uAK&j@k&86^%sW6aghIC1!yBo03_#NlUwIQ*;-
zho4pA@Uuo7e%6V@&pG1obDlW-Tp$iV8^qz~B60ZHBo059h{MkoaroIL4nI4@;b)gP
z{Ol2jpMB!+)B2Xzx2F2$BmSszKk?5h4-mgk&yzOs$F+Wh_!G*b#Gh0i`&aKcpHdzt
zj(L$Jj(L$Gj(JgFuJfWo9P^?|9P^?^9P^@19P?t1IOfGXam<Sa;+Pi=;+PkU#4#_L
z#4#_Hh+|%~h+|%~iDO=Lh+|%KiDO>$h+|&#iDO<^J+BX#7e3;c7k=WH7Xjj!7fIsZ
zQQtDe;ah<?e5(+LZ&l{%Ta7q;s}qNBbHw4>JaPE8Kpeg`h{Lx<;_$6W9KJ0Phi@(7
z@U2Z8zIBMhw=Qw`)*}wz`o!Uz^=+>|@XbdYzWIs6w*YbYW)p{RLE`YOK>Qi?twJ2W
zRf)s58gckmXa1PG|8vCQ+dOgjwm=-dHHgEvMdI+SNgTc{5r=Or;_$6a9KLmk!?!MR
z_|_v1-}=PioAqyAf8d*sIDGRHhi?Jm@XaO;--5*9TZlM(3loQLRpO`W&&g}V;ai<J
ze48T<-{zS==H_dGIDBgmhi{9-;aih9d|M(8-&(}sTbnq1>kx-;UE=VqM;yNOiNiPR
ze|!CbZ$9Gi%}*S@1&G5pn>c(65{GXg;_xj@9KJ<}!?!4L_*N(GSKsD{!?$_j@NI!O
zd}}aQ-xi6(w<dA;wnQAhwTQ#FHgWjYAr9ZV#Nk_yIDG39hi}$*y#By9A948RCl22N
z#NnGw9KHpK!?zG|_!cG(-y+1}Ta-9_ixG!!apLf8o_I-rf44v!zBP!$w?*Rct;t+{
zTOtnMTEyX7n>c*y5QlGF;_$6U9KQ95!#C@@UVpMWAAQ72%KgO8R~{gKp>mseP<fE}
z70N@zuT~x={%YkB;!)*M;x{XA5PyU6MdI+GNgO^b5r+>g=ITS6IDF_3hYwxi@S#T>
zKJ<yh2kU#@;|m{r#NmUVID7~YhYvP!_z)xxA40_8Lzp;xh!BSlQR46+MjSrGiNl8^
z@rlp5`N|N_DQ^;gv+^b4@TWx_{<Mk1pAK{Nr%N3E^oYZsK5_VCnLikk7k)1fe|*H@
zkDoaF2@r=rHu3D|-0cO4!=Dgw_!B06q4pCY4u7J=;ZKY>{D~8XKS|>7Cqo?m6o|v0
z7V&#^{B7d!q(dB@bcw^09&`1iPaK|DefK!vxWf}4ad_e<4o?Ea;fYNgo&<@*lMr!u
z5+)8$BE;cIlsG(z5r-#n;_xI%9G+x|!;=DWcv2w_PpZV>NryQ8E~rZ!+wBp@c6(2I
zk1zfnsZab}>Y??0Z~gaz|G>-NuiQr*-uj8d+W>KRYcp4GgT&!&h&a3r6Nk4E;_x;~
z9Nxx=!`nD<c$*{+Z!^T<ZGkwvtq_N|RpRirMjYPOiNo7D;_zplIQ&^44sZSc$LquW
z>O+7ye6WebhahqI5Mr)Ago(q42yyrjB@Q2A#Nk7nIDAMFhYuOz@S#8)K2(UqhbnRS
zP$Lc>>crv09C7$CPaHlh5Qh&9;_zXSIDD|5@%r$v`Vb@zA40_8Lzp;xh%i?lqQv1t
zj5vIV6Ne8;;_x9u96l6?!-ooS_)sMdA8N$mL!CH$m?I7!=840H1>*3bK^#6T5{C~>
z;_zXKID7~_>-FL9)rT-~_z)ouAELzJLyWol5GM{FlEmRdhB$mE5Qh&H;_#tL96r>D
z!-qO?_%KHtKFkw`4-3TMLxVVcSR@V~n#AG55^?y@A`Tzg#Nk7S_&=);k!7zZPb!ZR
zhbJ-O@FY$go+O#8CmG`Kq(B^=REWcqDsgyHBMwjM#No*tad<LM9G)x?hbIl<@MMuV
zJZTb#CriZPNsBl<X%mMh9pdn$OB|lW<dX*Gh4&**s}FJFd-OOai9e(DGsFkSm-yiL
z5{Fk+;_#|Q9Dj#aCw|K3UBBmuuTh@V&kkaLH^Wcj=WG1}@e9CLdh>RP@(OX}tx6nu
zs}V=u>co+^IpWCMJaOc0fjIKkAdb8(5=Y*e#9yWRTRq8}hxN*9#F2+OapYl+IPx%0
z9C=tEjyyDoBM*zjk%uO6<Y9?8^3WoV^HTj}Zyv&$hdJWN!#r{1VSzaE&>)UHED}c^
zn#7TZCF00Ki#YPoCXPJJpW@x$>vew@h~qlaAdYb^62~~3%yphF5l4Pn#F3viaU8D>
zaa>2b#Bm+z5yy3;PaM|~%X|QD=)8*Sh>tj~BYxtzjs%F~I${&YbtFg}*O3r$Tt~vh
zaUF>e$8{u1d~h8h{yOy`PW;!DH;8AHFA|48P2%upi8%aeF;{=u#NkheIQ;1nhd({y
z@TX54{#Y;a`T&1?#Nm&hIQ$6^hd(xP_!A@!e?r9JPnbCTi4ccBQR46?MjZaciNl{H
zarl!V4u6`&?^J)5h{K;2aro0F4u3k#)t@eL_|qc}fBMAXkLC0F0DpYM;g6p<{0R_;
zKQ?jr6C@6QLd4-um^l225Qjfe;_xR%9R9?K!=EH^_>&<He_F)ftv<Ag!-o!W_|PQ|
zA9~Eyhdy!mV4dbYzVN|E96tDo!-oKI_+S%<4?*JaAw(QLgo(q42yyrjB@Q2A#Nk7n
zIDAMFhYuOz@S#8)K2(UqhbnQLPin+*KIsr&P=C6_;ZKh^{OJ>iKh`R*54aEX5r;p1
z;_xRx9RAqE;ZKk_{0R|<KVjnVCqf+lM2W+n7;*R$Ck}s-#NkhdIQ%IPhd&kK@TW=~
z{?v%WpC0jt)rUTD_+XvxJ-+b4M;t!*nd|u^KpZ~U#Nk7bID7~ZhYw-m@F7ARK17Mb
zhZu4A5GM{FlEmRdhB$mE5Qh&H;_#tL96r>D!-qO?_%KHtK3FgI9^cQX4?g1X!A~4M
z1c<{2o4NWBBn}@!#Nk7jIDCi@hYwNW@F7MVKE#Q`ha_?MkRc8q3dG?<g*beu5{C~p
z;_#tP96rnuhY$0_;ll!P_|PDZ>&qf>Twj{RkElOO#J{V&Mf~Kp`*SV-8D0<1RvsV@
z4{hS`Fi0F8hM22|VdC&GLL44OiNnJfad;Rf4iA&W;bDe2JS-50hZW-Ruu2>r)`-Ky
zI&pY7M;so`6NiTj#NlCsI6Pb=4iD|sULVHPhahqI5F!pA!o=Z2gt__<B@Q2A#Nk7n
zIDAMFhYuOz@S#8)K2(UqhbnRSP$Lc>>crv09C7$CPaHlh5Qh&9;_zXSIDBXlhYw4{
z;X`PR*N1KDLzp;xh!BSlQR46+#$0`f6Ne8;;_x9u96l6?!-ooS_)sMdA8N$mL!CH$
zm?I7!=840H1>*3bK^#6T5{C~>;_zXKIDBXkhYxMy@FC*&`jAu~qQv1tj5vIV6Ne8;
z=ITR+ID9A&hYuCv@S#c^KGcZAhdOciFh?9d%oB$X3&i0=gE)LxBn}^%#Nopdarn?8
z4j<aY;X{WweCQH~53w`7K9tpmIC1ooB#!N7h*!0r0`Wu2lV^Fi`)=hK;_$FQ93EDP
z!^0|b^{_@99@dG&!#U#caGp3kTp$h)8^qz^B5`=wBn}Ukh{MAcad_A!4i7uT;bE6J
zJnRvNhkfGk!Fq}J_`)9_ad=oD{($;WAr2p^#Nk7YIDDuxS0Cnx!-sj|@L_>Cd}t7d
z4~xX%Lz6gsSRxJ|TEyW)n>c*v5Qh(4;_#tI96t1k!w2hZ@9~8XKH~7fPaHl3h{J~}
z@kiB%8gckgCk`Lxh{K0@=IX-&arn?64j&eY!-pnu_^?DAKD3C#hc<Ee&>;>Vy2Rl_
zk2rkj6NeAhIo{(7AAH2&gP%Bj2oQ%4HgWh6Bn}_y#J{dS%n^qV^Tgr90&)1zV6Hwa
z5{C~>;_zXKIDBXkhYxMy@S#H-K6Hu0haPeG&?gQbtaH7`7e4ri!v{Zc_z)ltA8g|A
zAxIoPgowk3Fmd=WPyGAp!vb;m&>#*U7Ky`$CUf;+i8y>{5r+?L;_#tE96of3!-pPm
z_|PW~AFN;S9$)z2BMu+@#Nk7LIDD{)!-pVo_z)rvAHu}pLxebdh!Ten4dVWPa(_Oy
zNE|*iiNl8_;_#uxTzzO0hYua%@S#f_KJ<vghdy!mU<JI#7e4ri!v{Zc_z)ltA8g|A
zAxIoPgowk3Fmd=0Ar2p+#Nk7XIDCi`hYwBS7pf0S#PN51E#g6~-zJXycZgrE^}EEw
z%3J4okJlHz==#|vzD4VIh{Mk=aroIIj_-Hs6CZp&{e16u;HQr`elOV}z8(8Z9P_J3
z9OLg3$M~(6ddGw9`iNtG`H5rv0pgfnHgU|aAaNYu5OK_}FmcSU2yx7>C~?fM7;((6
zIC0FcByr5I3~|h_0&&c*3USP@DsjxO8gb08I&pk%ZjLxUpW7pTtNPO?o>XqV-0Q=e
zl>3Ndp88+mt)JKW0pf2~ZWFI54-$W;@(}U&C=V0APkDs+1InYsKd3xL{Nu{w#2-~|
zU*O%}FDVZae@c0X_z%EsZ$GDh!96d9iDO<zh+|$yiDO>JnCo>jP8{<(NgVS!LmczE
zKpgYBLLBqDN*wdLMjZ3HP8{=kjyUG^JaNqH1>%_34dR&Bi^MUno5V4%mxyCtw}@k2
zw~1q3cZi>>K143`dUBESDDlgb$B198JWl*N<w@eNML)0fZg)5OA&&2hP7=rWMQ4cP
z`=SfP@qN)1;`qMkD)BQv?ar4P@dy9W<#potzUVpP_`c|Q;`qMk1>*R==mv3oU-Tkz
zd|z~vIKD4>i8#J5x<&lF&**&%aeQBNhxnM*?-CCy?-9rMMfZvC)cV#%ULWGheZ==D
z_Y=qWMF)tNwZ2XKF6BYu_`c{6@%L!`FmZfebc8s*FFH#6e(fhl9N!n6B%aWDks*%j
zU4b~RcNOBe-c^a?dRHTk>s_5Vu6J|9alM--j_ch5aa`{T7kiKUKHc96aa^aW#BrUf
z5yy3^&RnlkbHs6-nkSCy)B<r_ry9g@omwQ0>r|6Cu2W0Iah+-r$91Yr9M`E1aa^am
z#BrVK5yy3^PaM}NYpwTq;X36bj_Z`4IIdFx;<!%P#Bu)_B#!&f5OMsTt4jQUdQ~G1
zuj<6%)f{nnHP2kVS|ARu8pPq%B5`=tBo42Zh{LNEad_1x4zD`I;Z>J7yy_8$SAF8}
z%KAmGC-BNg9A5c}!>a&scx4lZS3%<NDnuM!)rsG&p3D)4C-cPN$pUeB(qOKhEE0z&
zP2%umi8wrI5r-#j;_#$H9G-NE!;>Cyc+w{hPpnJ4#~q&dh{F><ad;9S4o__2@FYka
zo`i_QlQ403GEe*+>cawY_|PB@9~OzjhbD9NVTm|=Xc31GZQ}5uLmWPIiNl8;arn?D
z4j-(b_xQpGA948LCk`J1#NmTY96khz!-o)Y_z)%zA0ouzLzFmtXb}Gc^<j}Xd}tDf
z4@<=1LyNik&?XKaI>g~ampFXr5r+?b;_$(`)O&p4gO50T@Dqm*0pjq%CJrBh#Nk7T
zID7~bhYu0r@F7YZKE#N_hd6Qg&?LT~J}eQ34=v*Gp-mh<beO9TUE=VeM;t!%iNgo$
zGVk$)4?g1X!A~4M1c<{2n>c(35{C~V;_x9%96m&d!-ptw_z)uwAL7K}Ly|ar$PkAQ
zE#mj64{hS`p+g)#bcw@<9&`1fPaHm2zvMl>@WDqMKKO~lhX8T-U=xQALE`WsL>xYZ
ziNl8oarh7=4j*E~;X|A_d`J?94;kX{p+FoyREWce4)G7D4_)H$p+_7(^ohd<>s8+4
zJNSJHarodT4j%%<;e$;aJ_L!whY)f25GD>EBE;cClsJ5d5r+?P;_x9!96n@-!-oQK
z_)sAZAF9ORLyb6m=n?;j`p_p1AFRv0#}_{Mh{Fdzb3H!<h{Fe)ID7~ahYum*@F7eb
zK17JahbVFQ5F-vB;>6)Yk~n<G5Qh&1;_#tD96nTu!-pDi_)sSfALfX|2W!lGd|T>+
zk2rkr6Ne80;_$&{u08~b!-o)Y_z)%zA0ouzLzFmth!KYmapLeHNgO_8h{J~harjUn
z4j-z-;X{o$e5ezL4|Bxf!#r{Lus|F>_^<H#@VNRAAPygF;_x9z96p4Ys}Et~@F7AR
zK17MbhZu4A5GM{FlEmRdhB$mE5Qh&H;_#tL96r>D!-qO?_%KHtKFkw`4-3TMLxVVc
zSR@V~?2y-oBkDttID7~ZhYw-m@FBuneTWi=4>98KAx<1VB#Fa^3~~5SAPyfY#Nk7g
zIDDuPhYxk)@L`TPe3&N=9~OwihX!%@ut*#}G>OB9CF1ZQbfwpa@2U@B;_x9t96m&e
z!-p7i^&w6iJ|v05hYWG}P#_K;D#YPKl{kE;5r+?T;_zXPIDD8V4j&eX!-ocO_^?PE
zJ~WBLhb7|hp+y`%w28xq$U3hNEA{WqqQv1tj5vIV6Ne8;=ITR+ID9A&hYuCv@S#c^
zKGcZAhdOciFh?9d%oB$X3&i0=gE)LxBn}^%#Nopdarn?84j<aY;X{WweCQH~53#Gf
zKAfRG#EHX)BysqVAr2o3%+-eqarjUr4j*d7;X|D`e3&B+ALfa}hXvyBp+OuzEE0zg
zP2%uji8y>{5r+?L;_#tE96of3!-pPm_|PW~ACl|6J_OW<3~~5SAPyfY#Nk7gx%yBe
z4j<~o;lmtp_%Kf#J}eN24-Mk*VUaj|XcC7HOT^(ri#U8}6Ne8S;_#tM96t1j!-qa`
z_+V}D9$)z2BMu)5#4lDKD#YPKl{kE;5r+?T=IX;7ariJ#96l@%hYt<n@L`cSd}tDf
z4@<=1LyI_kXcLDI9pdnzOB_D*h{K0Iarj_e?LEHm!ABfE_=&@Z0CD(GCB8v@s1b(`
zb>i@0jyQanXRbah5Qh&9;_zXSIDBXlhYw4{;X{i!d}tGg4;|w0p-UV-^oYZUK5_V9
z{j&G?!UrF5_~0iF9|FYTgH0Sh1c}3kI`K{F!yIw=Fi#vlED(ne4d&{@B60Z8Bn}^z
zh{J~#arn?C4j($i;X{`=eCQE}4}IeB!3ukiFMRM3hYx<@@F74PKG?+JLy$Op2oZ-5
zVdC&%p7>7nVSzY&Xb^`Fi^SnWlezk^L>xY}h{K0Aarn?74j;P2;X{u&eCQL057sr_
z;|m{r#NmUVID7~YhYvP!_z)xxA40_8Lzp;xh!BSlQR48SLHsuLVUaj|XcC7HOT^(r
zi@EyHCJrAu#Nk7iIDF_4hYx+?@WHy)dwk)8k2rkr6Ne80;_$&H4j+QV;X{Zxd<YYV
z4-w+<Axa!R#E8R(IC1#UB%V<pmWacL7IFB{CJrAu%+-f3arn?94j=l&;e&OZ_xQpG
zA948LCk`J1#NmTY96khz!-o)Y_z)%zA0ouzLzFmth!KYmapLeHNgO_8h{J~#@jKLq
zHgWjSAr2q9#Nk7ax%$v24j-)Ry~h_m_=v*?KXLdFAPygF;_x9z96p4I!-p_&_z)ou
zAELzJLyS0lh!ckoN#gJ!LmWO7h{J~parn?7ey{q_B@Q2Y#Nk7qIDD`+dXMkm?;nW6
z2S0K65Fid8Y~t`CNE|+dh{K04arh7+4j-b#;X{l#e25c=4@u(iAwwKK6o|uz3UT;Q
zB@Q2I#Nk7a`0uI@ed6%J`W5f-g%3XB@WIbq&kq6O@WCbyAA-c;Lx?zh2or}75#sP6
zN*q4Kh{K0CarlrV4j(ea;X{Eqe5eqI4^`sup++1&)QQ7~IpXladbRiXzE6Gd5r+?c
z;_x9r96s30)rTN)_z)rvAHu}pLxebdh!TenG2-wcP8>cYiNl8sarjUm4j(GS;X{=;
ze5etJ4|U@3VU9R_m?sV&7Kp<Kf5hv<pQ;Z5;_$&H4j+QV;X{bI`Vb}#A0ouzLzFmt
zh!KYmapLeHNgO_8h{J~harjUn4j-z-;X{o$e5ezL4|Bxf!#r{Lus|F>G>F58MdI+m
z-sJV+uhoYjarh7-4j;nA;X{PE`Vb`!A7aGeL!3B#ND_w+8RGDvKpZ|)h{J~}arjUp
z4j<~o;lmtp_%Kf#J}eN24-Mk*VUaj|XcC7HOT^(r=mxJ3A5|a1#Nk7PIDCi_hYvC4
z>O-73d`J?94;kX{p+FoyREWceDslKwBMu+x#NopnariJ#96l@%hYt<n@L`cSd}tDf
z4@<=1LyI_kXcLDIk<DHoKB+!Li6=hmex5zH#asVztsf`;ZRN?W-un3ba)$V_)-MoW
z^(A+^72+>ZUM2o=<u&4$C@<XT9Zv}T6Nmp*;_$yl9RAmttN(Mv;r~2w_`g6L{x^uj
z|3%{PzeybaFA<0TE#mOMO&tDrh{OLbaroaO4*&ba;lH)bd%WPkk2w7I6Nmo+;_$yp
z{5tqU96r>E!-qNI@L`_0`mjJ8J~W8KhehJ>p-CJ*ED?tfE#mN@O&mUSh{K02arn?9
z4j=l&;e!?R9$)z2BMu+@#Nk7LIDD{)!-pVo_)sUlO?{XnKB?!!dE&Qd{RQIZ>2Yrm
zf4$aUBo42d#0Oq&_vS5y?GlH#4dUp3kvRHqGS~4h5r?-e;_$Xj9Nu<_!`m)#c-tcm
zZ~MgIt@Rpjey-Bv<s**odGiy$R_h0dZ&Gd(->y7J{B_Dh#NVJiOdMWCi0{+-QQ~JS
zj}b2^j}yOBd6IZdd4~9$@&@rKcuRand6W1(%9n^ALjBizkJmd;pE!O#S%`V-qujjN
z<)_MR!%6O~gY)mw5I<#zw}$v>L%co2*9`H_5I<{(S9Lt5@0Sd5OZkh0xg&E|`iA(*
zq5A$Ie)13x3~{q>c-ceTYy+?05XUo{&AQMKUxmsN!$bV^As!jxFCOC2A%4aXj}7tF
zLp(mj{X;xC#Lpb!qs37MMj04oV3dJT21XeeWnh$nQ3ggC7-e9TfuAe`(YwFliyk`V
z<L|Jn=x;U4E0<f*yFcdp=%BOZ4Sz4Ime>9d`FG6)5m_?jj9GuAvn;Q*A2DU~7WPQX
zDL-V&=B?|Irc-{vl+9c2BMqngXQpi4!XBA-%I`O2^Y-{i-6{WpDVw*lN2*Twou+Kw
z!X7C&<+qu#dFy&4>6G7U%H}Qlk(g7y!<5Zi*dr0ATr_3#*7ZopDZj~-&0E$Zwp0GX
z+bwIdE@kJn=~p+se$%a+cHg>7G82p*IyX97zStj~J!f2+-Mzf>&X+%I_BDEV!=9H~
z)|JifYo!whbyKnwJ+wt?AKvg+@-u+wp^C42y@9hImE5km=Wk`Nqq84==vwLEL#CH2
zzq#Y^hBrw+I}Weiyw<Wd&3<#o?7#0gyz#f?Epqol)AaDhx5@hG>_0uU0j;_|S+Qew
z;$oky-7PiEaq0e*q;S*SA78O!_WRM<FUw}4vmf6&+jY0o+<w=`{IbRFKR;`R^RL~l
z61N}vyNi3K{9DU2%^inKf3uH2Y%;g$gJyKyq*;0R@&{jO2DovJY{}W`nhUmA)@u&^
z#y7>7<#Moljci#qDI1e*Z=d~`Y+JY5%}EbCW}obS?ityf3~u|~&%A8rV-Lz6xe1VF
zS2m-E&v{IKRN#!Dd%fAF?Cq}%svO?fJYNR$5?x`;a=MqD|Ee_WeqZ`FEzTKQ@rP!`
zSImmT8~>{;oAm*#?=HD3&XW~SU9{$cs;rP2|B4)4Y6g2WH|3q(Uv&=LBa+eX^|JEr
z{wcrtsp#CHbAI`-G(5cJl<!~TpZU<5zuoXlQl7k~aPrLk(b*%iTU(t?%Pu+t?k1(}
zQ@?M<vIb+3@vS$N4qv`eLU+uJOx7EnWkhbX^AwsRgFQE6?^?&I{nY<)yD&2hwZA92
zcR#Hw<S-6aeATRYP*)6Rwry6-n-zyQeoB_jpv;_eNA%lHi!XOCkSHgO>$|7Q!l7{Y
zj|@MwG59-!Eu6FEJj>cX+jQr}p{P%0%p0WE!)AEhE1df7jJZp4A~VL{T_te)?8luM
z^I`GOGh<|WbWf8CJ7$kOv<>5tF`gouktBZ0mR^tiK-M36i*?g)Ew8!fi&AOF>~i<r
zvUky0IfNhW9+dUbLvQgJdw*lo-(BL=_>GGLo1C57bm_;on7Os}$|r7`ee$U@)sO9W
zKO*)n+vPWQw@X)BW}h}^&K+XStkmzx!M<P4F-{Nv*tu!;E0VC-7C9LNWQQ|mKr#zj
z-(OyqdGm}vew&$d|IN(PyFTvB%fFFTN4Dt+B(Qz9FC88fW6eB1wDI$A)2^knmr7?3
zo6}|Y+tNmMJtW&T={S0Ly0St>)~!l(4$od$ki%m+JHBxiy~#HI_!m5f$1fXrNGd;U
z&cEHdEM2+Wot4nN*zMwj5@bL(4t4R?b3I*{BYZ$=J#2L9-elT#pLRBXmuY+WoLjv0
z{YPFZyFIz)f_tRCIVEnNee8|V*{??LexeuKz4glGm5<6!rlsxjIcsJu>pPQUsB6x9
zvhG2l(Zi=aV8S*1@;P!j`&LbsW!Fzz?&$IRDCYU$jk8Yu4R1B!(r2S`@IL&lS4ThG
zKPkH66Vb<>EuSqNd{8^^Eq7#STPzFhs~cY@?W~zAZjIi(@jpu~IYeJ8zgTkeO5xqV
zkX`L&WUQY!<x(lH_;s{HJo09<w}V}%JJ%Us_f~0V27G;V&4&VVp1J3X<x9HtzDc&6
zmAgk996I|w(Yu=~YO!Tg^PVrxe9K&~rOJQ$^^eLR{=$s=&>0^v;qW;Z`OGECowNT-
zI(yh$hPw~SlI2|YrRC8py{sC&Mj04oV3dJT21XeeW#H!`11qeR*4omwww<3T+xba*
zIz62)9$aOW(&ID5Z26#d(L{POn@iifwr<|MW7Do(iM5wsbxp}$yKZ9D@Vf0gx9{Gy
zearCLRYOg&dgrY-*=wccX?ybdDZ7*(-)C>MCsR|U^mVI*mJVdg;~6`+FI~(f%Hvb1
zQt49r&9Y)VRT>^b=~`i@Y4dV%Mp{dGPcfa^XO?8hshO#=sq_}pZ82S*DdwEb$?CVP
zvQ}M`&P`+|kM6<RQbLA)S;-Dn)`r5BTUS|T2<a(nCO4JZlb%XXSlL{*Y^8Ds6K1pN
zvXx7hvy<lE>C#?nBE3I1Gc{$6=cfxZ<#Zyof3P?%qbjEdMM?0UbkQ2mq{sIq#`C#y
zx>B~L@~MeLAwQKJKRBpZOgmd1lxIrPtduUNbCdbvc-kzQB$_A(tB@+Drp>a`aWQ>+
zp_ngQvgtv;`%_cd2^mSETukLkmaHF7mr9m&w$CbM_l{?#CQRIymEDk7D(4F(I=f|J
zYRcLxo09&e9u^FdUb6X|Sy|Ydb=Emi%Ht{NLPI81Oq)8Xa@pODN_yO^oGfXAa`}|m
zkr}Bbo6F`hW^HjQJ1uE<gJBZ~bI$m)WwTAw`Ao5x&XujS^e4G>VyT=O-{&l4((VxU
zr4Q<6q=`9zT9N_jCNv-GtX!_(rpVnyE?=6-P7IT5VLXwQgJ6{omZtK16DG-6p3V;z
z<=B?;Q)x@k%v4%x-9D2pl~FWD+>-pH_GG8bG#k(459F+g)OaGlKV2+lC(=&AIe?{f
zsyLpJBXuAr3QWo#xM|jG%kWGh3MSiA+1yMek)58NDW~?x#zoJ>o@{O+A@c0ciZu=>
z%Eo0zISwaM6Vut8!;7f{mf5)}$&a((ZcuhJ;qHXl%Y@X*I{Tg&&lK|_nSlj4D8@;%
zm>M5<q1&@zBC3mWW(-a*=~8~i(O6cOv(qjumk*jbF`3<KP4CM|E~nBm@vYR%M7Eqb
zAm<}2n~76&<)1YBJ(-<KOSfeapeSds84+S8=SH1Jlf}GQ=gg^c-jXv*emrX?=It~2
za>^VxXF-Ip4y0rjS=sz}*_mCAv8h5qP8rs;Y)eiF+1x(qE0>aC8}DSsyRAh@XQL9`
zF-tDYB+E-<*HGDx)4gQW4ANaL?a>7}{bdq`ba7hC=J1!Wi9{~1b60#X9xUXu(m`rs
zBH@g~?DqaNmgRI{CPi_2A}hvBSaR~PrgHldCQss8(OuXxrNqn#7fog-^X}4AR?;c^
zA$w3Lo0*%JAf7oPGgF>0!WPrx`<={|Bs-E;GZaTSt2|y9oGJ4Y3Z2z*>~d*QWM*Pe
z!#Fi4WOI9qCTVFmh_NQ$=uGsIvoA_Audz6j%T^MlvZQPpNVg>8a#l|4m2)K4l|+>Z
znUKZ&L3gb)jh%X~Xl9Sh{-v|~WfGV6-fpFHGKFWv)XcP*9%jxsBP+_aGmU<8Q&QJG
zC%B7lZ>TG2naHO04r*%`xk+;-IeKMr`mIbB2g61CQd(BY6sM}$(s(J$&{YsMbzD=a
zgVxb87K+*ZS~PRdX))NZ$?P6E7v%TN3=R!Op^42Gd6QB>6f38HXvIXXI5=pSdpLvZ
zMNqDP)6UGpR`%uhyh%h!%%t~=%)F_+X*nBa#~GVMI9G^5{^%CbV{u|S#SO+~DwXq~
zGS8>vA|(?;^XB^Ih;FWx2U5ieIr|S5<<MLEv*`oY#Q0I+<vL_8zee(DIb$U5NEh=~
zA#E=EgERg*IhPEb>`h^Cz7!Ya1h7wTBc@VvF4$vE$cfTS$vFkNN7b=a&J>f=a<17^
zm~?L_oO6Q6Cl_zIjJmflgHykp0}`py(d&zQ33MiVNmf|pnH+XnPI;;EOv2UOTt8&!
z*=gBaDW7w0W>V%RL8hTO56cZqVtQ}Ua<V?@?qtH;t;k)5?C&}(m&VHjk+S2{gXL6Z
z-3DwpTNqYbPT5$IDo+>o4-R%QFZ-egXd*o>hZiNOR~o7}QNoIBabj@j3xz^{u$`%?
zLAT?nK?cg^La1BVGc_Zpy}X>76=Y||_hY$ODITqvElmu1C}i?O<I1Mf>2PS>l^fQh
z;dm-LiRIMP(aAqK*zSI_IaJv<KFG!GGPYuQFvUvKgBEgIBDbk~QgXpB$f;58SqEh~
zo0%CQ*5y)rC3!P@WOU|wme5-vv*=i57TlATNW53h0CK0|EEQ*RR^{kPdwS@28WR)F
zG_}SHGe!luYPc7uTs|igMlJzzMNP<iiriicw>W0ga(ye^E>mCbou{4aRMEMHm1N$f
z<sH~eNk)6Ey|!de%DaJmV;MWO-?>6&ox9G`nDeVX%i5funVPV3d3h5fv&5;NFWP1|
zWP|edW^Kv*`oyxXxXj+2$x3f>A+#l?X3BXv>tx5p=!3R%P7%Q;#%y~_-svtaw;p5m
zWmmY(t&6ggG8-oC#ICJ7w(q<(p14uo6umYPO$^@c{O}snbd}tb%2w>Gxp_$2?%N-I
zkCn*6(uDJtCpdHE)%NgPAnD-f{*`8&dx}YUyK&GSpOTp%J2%+F9~$4L*C%3IZ@Fpv
zu3g)2-nr|T1O4GDtE}Dmye+C6w9Va!=GrdjC2wWf2U$qvZ1?8I-Y@4l$#tr12iKNf
zC4;$iY?Za*r6*mtUiRDkref(U%ga@X3twGcUXZvb@f#Au-R0$&#Nd(T<)%dQI@=A_
ziaTzxR#f~eUi#t}`RXfP<Tw0f@_*;a<>gmu9TSdS-?A>2b*6qqw%w6vt@huzI`En`
zryuZDtyiCa-DOu@>~v$=%a8Z1zSNVSmJeP*S^72kKPh88VCbgR{(Dz$S{=Ciq%EuM
zm627e16x-6H?8(<I^DWnR@dbJk4YZ~%nskM+W(G~H>?i4{iGXK+lNoSVRi6#PT90N
zbnmH~R)_C?(U#T8lkAgMXWqEFaNFw4rqxMlF?x+MFv`Fv1EUQ5lo=>I;&u?yi0<$?
zyFkk&wtj96>rek2mfU!muD?R#H_RUr$m?1yM>OvJlFNVA|8CRmV}EYe@@*Qk8Y>#_
z)%Z@0@7MU38Xwa55sjbJ__)S@(fG68&mVVtPwO~W=yT<R`W*RFwY*y6xf)-g@fS62
z`;5EA%e8#9##d{6jmBFv-lj37F{2Tmf84L--_-avjlZYyeH#BnV?*PIG(M{FF^yl;
z_=Lu9Yh2dI`8rMa|16CcYUFx9FXeyG<9n6n;d+fxjk`4N(RjPY7ixaqqWf3V_;!u&
z)%fQcAJBNFdiJ1}n;IX{_$iHF*4WedtVW;a->>mJjTdPg(-_vcS>sNPagBR4?$cP-
zc$dbvX?%~yKhpTe8t>Qmutpr;N3?uUKi~RkEq_5H<~2T-jL#wWb^S_hce+M=4j28O
zXc5;%d|n)%569=h@wr>%1)uN6=eapg_<S}#kB!e?<MY<|d^J8#jn7Zx^V0ZyG(HcF
z&p#v2_<S=y&y3G6<MYb*>2diRjX&=3=xDnaN<NypKaXnstj7CvzCNzy<K^iIUH{~d
zsCT^fFEsv?=HZ3n$4`5FzB9^)Cxy?9^5MzHsrtTaG+*4b_(%Eh_)*gL-OraF_}(IX
zPZ7SC2;W15?;XPT4B>l)@I6BK-XMHWkZ17EL*Wh2vmeiOJ|=X$IgQVCO5S{e=Ht1}
z1Nysnln<oBiTL>(&sR^SpQ8ePe)%r-<h{?+ygX4-`u-DazC7P~y?>MsPl_KN<-?PY
zzdyn9@y|M5&rUptg(pw2d_0G{e$1|()_maSg3oA~e?G`RN5s!hPx-vt?HY~8+{g(8
zH|y`C&e#39;03Z@Kj|U3MEC!>^5az}cs|!Z$N9j|-G9>K^B>(V{2b@wKYD+D+U;Mj
z`8ZL2{_#2;@2_e6Y4@MoXGZz(q~y*MEFYf1KM#d>pJ4fTzURxr36_uNK?zTn50CQU
z$;W3-uzc`*IilnLuEvwwp56Pg3ukNl6XkmQWAEUP*<fsx4^PInjq>5iM{<-8Pd>_;
z5By#bzjvrAKcvyq=mlJO_s9>=(S2Z)4^KWG9p%H5kFSsN;mODMNBQvN!~aiiaddy`
z=HWu*L+}5CTE^dHU#{h_Mo%(dfQ2vU-%D-L{pH`Y;O~0z_tyA(ZTvj`g_e@-nuil*
zzC7Q5FLkTtBl&YOAAXe2Z_+&Ayz!&zztGp`H6L%*Skw4UjqlNTpT-9?eo!Nx<NBzU
zPxO3!N%!L^jX!vS?AK3vTu%Rj+ww#?pPsAZ{Ym%tN42|1`@c-%)f%tU_*#uWs-G9i
z`rW#}qt6q%M>L`N7@beueC*SFoG9lHz8}NS_nxcydf)`j$8)v++|S)7SU!0C&vp47
zqkMQ$`Uj(Yc=E9@%7-T(_l)x4$;Ssq`S9f9BcptH^3fXQ!;_E4NBQvN<H#r<o_u_F
zln+lnR*pWO?a9X(qkMSs5g6selaGr>`S9dp!zdq~d~6!!!;_DlqkMSsaoZ>#o_u6R
z`S9f9j!`~5`M7tK4^KXRca#rLKHfLVhbJF@I?9J9AAdc{hbJE&9p%H5k56ho5~J@w
zb@KAK=HuHMJ<0kxEa30qmNg%%^mzd<(fD$Wm;47kN8o3DNJ6@wC(7S{Jm2}bZj=vC
zinnP#@crJmXc^D%c)gbS@7g_`ynqU+=h=^+_4lv;w1;L&^D(3G9*u`I{<Qs{Xzkyj
z`~5=w{KkoPJpO+h?+P8zeHvek0XE%*Gql{-=Ovw|<<=AK!b`P`XXBl(<+iSW8Lj{B
zv&(sd=1y`~n8)Fn*Ss#A>K3i0uGhyZTDA`KbX_0Nc#cK{ZDw_Eb{nkJ6=7|Eww7O{
z@fS2!?spf!!@7X|iu{?o{xq$R?f+oe-5<eD)n~Qb)g4*ZGV<Kha#Pnot>v?@VeS8C
z9XdB*gx)P*-E_-0YJ9!MNsaq8-mCHVHU5dl4`}>^#?NW|y2kHmwDeVox#QS%^XAuX
z-ED2#y!l!?xJ@4KE6<y}@~W|QV^<~$>q6_+%cD|5cHzqPmnyXZ(a~|D>HTEu+snLq
zwSbk@YODH)yXJsaFvod?b&l0hpF7IO9Dbfv(eVx+zGL;j&Z&Q{#d*HY>F-P{{<wCm
zVW(3+Fz^^_CuHF)6o%qkkHg<}9Nst%|DVEb`JZ3cNaAekJS(I7i}goj;Y{m%tF9dL
z<==$+rT(2%|7qdJ8vl!>|Fc%Ev2dP2|7RJF!cbg$9RAC~&31X5TZG#~?XkE^_?auu
zv)~<g(s1d?YPsZOU6B{=m;Ifi{n{w}Eb*uQ3AZAiL0WeDImc>#&E<B+jdy83RqE%k
z_T&GsThDo3s4TwI;Vv8g_s@^RKX@Ge$>Z>^2se4)@qF_*^<N|>R(p6Lx_qJVV;!&c
z$Kl(Bzw#LUOqlvB<!s;du6&bl;~T~a-{b-GxlB4G527xOja%}3>zRT)nYJQNIF={;
zrlu05>_oym#k=f0V7N3EvJzX~uyfN*+c#T@#I~KcCbmWsH{P=8rmczSmRqdEwjDR$
zuxUr)<{NL^wRLx5_of?mY)weT@$$h!TKaK1mnR>ON4D+Qe#7R(y0NRQ$znP^D6SjZ
zAl0X`d*so}<&->Vd^(lQ4c1<@URI`y#a!O>A<zBIA6@6P+`jv!gzQRsZ?+`QO5c6c
z<|%nr^zPK2skHN?<y>kyZ6(qZsdCDibRO|L=-O#+9_lPPC{Crl{Z1BU%14Lnj?r}D
zj#0+6SDqHG12(%mQOYMW@~~*xY-0P(vSK2eGf(TDkd5ai<N@E~&hv<6=L%ALPe~qA
zY<5<j8ZA#iPbB2w@wxJ3Vmz~NFubeA*2|;O<q6CKVn&g!WXm2f={#TBgAxfzLt^XB
zEeSKS$>a2$Fqu1M-Xt9pGX+nVg5)6NiykNkxssBHp660i**oZf8kr=oa%8-1tg+I;
z>2hk1#B$M%87xT((#3)`CXb#@kL}IPjLD<2#mw?SMVoHeet9{y*K*cnQl*SFHgPZ~
z{kgGRbl2<`u|*S4AtCFE>8X@ypi70RvNh%mb4(V;_U5H%9>8vmIZlif^NuuQ=}cm>
zC>A9$6VeGvZqr1nSWL<AObaZVR%Tn$fiteubaq_Ek}sRh2r(v(i2&Bvcz$|XF4x{!
z`;(W<{iU3O<l5}M0(ygoi2F-j?bOAxi5JQL=Gu(*VQr5Xk#)nb!TCht)e?u>SF}B%
zJ=|E9ah(9(A`9P>dpvA^PTL~}wIc3=dHd!*z})kqJ?@JUJ0>~uGPjdv5$$n*`g&nz
zlW33oY(!hzOBLruMeB`1%{?sI<GvM<$B%l5`=q_OCq^0f#fUL?vp+O`#nv6NU~-D~
zxGzV<eL3e3`+twNzd<|1eLf=I7nt^@4d?Ie!c0!|cHJ6WkD3DIh4&9`>4%U1tgIMr
zZ#UhVh#j{$_*d>YoEPG|r8L~$`mk2ei18{Nj34|DsXgv15T6|EnA-!ki~jx#wa5J#
zBHj<7KKuWt+8)Ob_fv@R)oAHPj2qOvEi?O%_P9?*T#t$xdHWAj`?}s|Blf91Z~swQ
zZ-%JT*24Q<M6QoAqWQJYnxXc1UyO))QpI_pjQACyX8*DM!TXRW-1=5lqXyesw7w-}
zbBG7+^?e@RhjdKm@`47~E<Bi%b;H|l>icTMpws&3KWLBm@6;ae%Mq(bn>tHqhiEoF
zynWo)Bl7$kw9|f1mBxl)|8d`l_a!(_U>G(QG@h>84-rTEtB9Min%8iIyY3qI-$lLe
v@9Dr^F4)W^j347N3}bg&y8kZhcDL{r>Kxle-7}BVe(kTiHP5C7RJ8stqpf*V

literal 0
HcmV?d00001


From 57b314752ec0ad42685bc78b376326f1f4c04669 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 29 Jan 2025 19:19:37 +0000
Subject: [PATCH 278/368] debugfs: Fix the missing initializations in
 __debugfs_file_get()

both method table pointers in debugfs_fsdata need to be initialized,
obviously, and calculating the bitmap of present methods would also
go better if we start with initialized state.

Fixes: 41a0ecc0997c ("debugfs: get rid of dynamically allocation proxy_ops")
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Link: https://lore.kernel.org/r/20250129191937.GR1977892@ZenIV
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/debugfs/file.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index e33cc77699cd5..69e9ddcb113df 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -94,6 +94,7 @@ static int __debugfs_file_get(struct dentry *dentry, enum dbgfs_get_mode mode)
 		fsd = d_fsd;
 	} else {
 		struct inode *inode = dentry->d_inode;
+		unsigned int methods = 0;
 
 		if (WARN_ON(mode == DBGFS_GET_ALREADY))
 			return -EINVAL;
@@ -106,25 +107,28 @@ static int __debugfs_file_get(struct dentry *dentry, enum dbgfs_get_mode mode)
 			const struct debugfs_short_fops *ops;
 			ops = fsd->short_fops = DEBUGFS_I(inode)->short_fops;
 			if (ops->llseek)
-				fsd->methods |= HAS_LSEEK;
+				methods |= HAS_LSEEK;
 			if (ops->read)
-				fsd->methods |= HAS_READ;
+				methods |= HAS_READ;
 			if (ops->write)
-				fsd->methods |= HAS_WRITE;
+				methods |= HAS_WRITE;
+			fsd->real_fops = NULL;
 		} else {
 			const struct file_operations *ops;
 			ops = fsd->real_fops = DEBUGFS_I(inode)->real_fops;
 			if (ops->llseek)
-				fsd->methods |= HAS_LSEEK;
+				methods |= HAS_LSEEK;
 			if (ops->read)
-				fsd->methods |= HAS_READ;
+				methods |= HAS_READ;
 			if (ops->write)
-				fsd->methods |= HAS_WRITE;
+				methods |= HAS_WRITE;
 			if (ops->unlocked_ioctl)
-				fsd->methods |= HAS_IOCTL;
+				methods |= HAS_IOCTL;
 			if (ops->poll)
-				fsd->methods |= HAS_POLL;
+				methods |= HAS_POLL;
+			fsd->short_fops = NULL;
 		}
+		fsd->methods = methods;
 		refcount_set(&fsd->active_users, 1);
 		init_completion(&fsd->active_users_drained);
 		INIT_LIST_HEAD(&fsd->cancellations);

From 8c2fa44132e8cd1b05c77a705adb8d1f5a5daf3f Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 30 Jan 2025 13:32:59 +0100
Subject: [PATCH 279/368] ALSA: hda/realtek: Workaround for resume on Dell
 Venue 11 Pro 7130

It was reported that the headphone output on Dell Venue 11 Pro 7130
becomes mono after PM resume.  The cause seems to be the BIOS setting
up the codec COEF 0x0d bit 0x40 wrongly by some reason, and restoring
the original value 0x2800 fixes the problem.

This patch adds the quirk entry to perform the COEF restore.

Cc: <stable@vger.kernel.org>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=219697
Link: https://bugzilla.opensuse.org/show_bug.cgi?id=1235686
Link: https://patch.msgid.link/20250130123301.8996-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index d36a79a8ecbf8..8192be394d0d0 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -7497,6 +7497,16 @@ static void alc287_fixup_lenovo_thinkpad_with_alc1318(struct hda_codec *codec,
 	spec->gen.pcm_playback_hook = alc287_alc1318_playback_pcm_hook;
 }
 
+/*
+ * Clear COEF 0x0d (PCBEEP passthrough) bit 0x40 where BIOS sets it wrongly
+ * at PM resume
+ */
+static void alc283_fixup_dell_hp_resume(struct hda_codec *codec,
+					const struct hda_fixup *fix, int action)
+{
+	if (action == HDA_FIXUP_ACT_INIT)
+		alc_write_coef_idx(codec, 0xd, 0x2800);
+}
 
 enum {
 	ALC269_FIXUP_GPIO2,
@@ -7799,6 +7809,7 @@ enum {
 	ALC269_FIXUP_VAIO_VJFH52_MIC_NO_PRESENCE,
 	ALC233_FIXUP_MEDION_MTL_SPK,
 	ALC294_FIXUP_BASS_SPEAKER_15,
+	ALC283_FIXUP_DELL_HP_RESUME,
 };
 
 /* A special fixup for Lenovo C940 and Yoga Duet 7;
@@ -10143,6 +10154,10 @@ static const struct hda_fixup alc269_fixups[] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc294_fixup_bass_speaker_15,
 	},
+	[ALC283_FIXUP_DELL_HP_RESUME] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc283_fixup_dell_hp_resume,
+	},
 };
 
 static const struct hda_quirk alc269_fixup_tbl[] = {
@@ -10203,6 +10218,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1028, 0x05f4, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x05f5, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x05f6, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1028, 0x0604, "Dell Venue 11 Pro 7130", ALC283_FIXUP_DELL_HP_RESUME),
 	SND_PCI_QUIRK(0x1028, 0x0615, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK),
 	SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK),
 	SND_PCI_QUIRK(0x1028, 0x062c, "Dell Latitude E5550", ALC292_FIXUP_DELL_E7X),

From 8c8492ca64e79c6e0f433e8c9d2bcbd039ef83d0 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 30 Jan 2025 08:40:29 -0700
Subject: [PATCH 280/368] io_uring/net: don't retry connect operation on
 EPOLLERR

If a socket is shutdown before the connection completes, POLLERR is set
in the poll mask. However, connect ignores this as it doesn't know, and
attempts the connection again. This may lead to a bogus -ETIMEDOUT
result, where it should have noticed the POLLERR and just returned
-ECONNRESET instead.

Have the poll logic check for whether or not POLLERR is set in the mask,
and if so, mark the request as failed. Then connect can appropriately
fail the request rather than retry it.

Reported-by: Sergey Galas <ssgalas@cloud.ru>
Cc: stable@vger.kernel.org
Link: https://github.com/axboe/liburing/discussions/1335
Fixes: 3fb1bd688172 ("io_uring/net: handle -EINPROGRESS correct for IORING_OP_CONNECT")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/net.c  | 5 +++++
 io_uring/poll.c | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/io_uring/net.c b/io_uring/net.c
index d89c39f853e39..17852a6616ffe 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -1710,6 +1710,11 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags)
 	int ret;
 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
 
+	if (unlikely(req->flags & REQ_F_FAIL)) {
+		ret = -ECONNRESET;
+		goto out;
+	}
+
 	file_flags = force_nonblock ? O_NONBLOCK : 0;
 
 	ret = __sys_connect_file(req->file, &io->addr, connect->addr_len,
diff --git a/io_uring/poll.c b/io_uring/poll.c
index 31b118133bb04..bb1c0cd4f809a 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -273,6 +273,8 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
 				return IOU_POLL_REISSUE;
 			}
 		}
+		if (unlikely(req->cqe.res & EPOLLERR))
+			req_set_fail(req);
 		if (req->apoll_events & EPOLLONESHOT)
 			return IOU_POLL_DONE;
 

From ad9f265c7328d9d73a9d1edbd52f4415cc764296 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Mon, 27 Jan 2025 11:10:26 -0800
Subject: [PATCH 281/368] stackinit: Add old-style zero-init syntax to struct
 tests

The deprecated way to do a full zero init of a structure is with "= { 0 }",
but we weren't testing this style. Add it.

Link: https://lore.kernel.org/r/20250127191031.245214-1-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 lib/stackinit_kunit.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lib/stackinit_kunit.c b/lib/stackinit_kunit.c
index c40818ec9c180..7cc9af181e898 100644
--- a/lib/stackinit_kunit.c
+++ b/lib/stackinit_kunit.c
@@ -101,6 +101,7 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size,
 
 #define INIT_STRUCT_none(var_type)	/**/
 #define INIT_STRUCT_zero(var_type)	= { }
+#define INIT_STRUCT_old_zero(var_type)	= { 0 }
 
 
 #define __static_partial		{ .two = 0, }
@@ -346,6 +347,7 @@ struct test_user {
 /* These should be fully initialized all the time! */
 DEFINE_SCALAR_TESTS(zero, ALWAYS_PASS);
 DEFINE_STRUCT_TESTS(zero, ALWAYS_PASS);
+DEFINE_STRUCT_TESTS(old_zero, ALWAYS_PASS);
 /* Struct initializers: padding may be left uninitialized. */
 DEFINE_STRUCT_INITIALIZER_TESTS(static, STRONG_PASS);
 DEFINE_STRUCT_INITIALIZER_TESTS(dynamic, STRONG_PASS);
@@ -440,6 +442,7 @@ static struct kunit_case stackinit_test_cases[] = {
 	/* These are explicitly initialized and should always pass. */
 	KUNIT_test_scalars(zero),
 	KUNIT_test_structs(zero),
+	KUNIT_test_structs(old_zero),
 	/* Padding here appears to be accidentally always initialized? */
 	KUNIT_test_structs(dynamic_partial),
 	KUNIT_test_structs(assigned_dynamic_partial),

From e71a29db79da194678630ebfcc53ff2aecc9d441 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Mon, 27 Jan 2025 11:10:27 -0800
Subject: [PATCH 282/368] stackinit: Add union initialization to selftests

The stack initialization selftests were checking scalars, strings,
and structs, but not unions. Add union tests (which are mostly identical
setup to structs). This catches the recent union initialization behavioral
changes seen in GCC 15. Before GCC 15, this new test passes:

    ok 18 test_small_start_old_zero

With GCC 15, it fails:

    not ok 18 test_small_start_old_zero

Specifically, a union with a larger member where a smaller member is
initialized with the older "= { 0 }" syntax:

union test_small_start {
     char one:1;
     char two;
     short three;
     unsigned long four;
     struct big_struct {
             unsigned long array[8];
     } big;
};

This is a regression in compiler behavior that Linux has depended on.
GCC does not seem likely to fix it, instead suggesting that affected
projects start using -fzero-init-padding-bits=unions:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118403

Link: https://lore.kernel.org/r/20250127191031.245214-2-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 lib/stackinit_kunit.c | 103 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 103 insertions(+)

diff --git a/lib/stackinit_kunit.c b/lib/stackinit_kunit.c
index 7cc9af181e898..fbe910c9c8253 100644
--- a/lib/stackinit_kunit.c
+++ b/lib/stackinit_kunit.c
@@ -47,10 +47,12 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size,
 #define DO_NOTHING_TYPE_SCALAR(var_type)	var_type
 #define DO_NOTHING_TYPE_STRING(var_type)	void
 #define DO_NOTHING_TYPE_STRUCT(var_type)	void
+#define DO_NOTHING_TYPE_UNION(var_type)		void
 
 #define DO_NOTHING_RETURN_SCALAR(ptr)		*(ptr)
 #define DO_NOTHING_RETURN_STRING(ptr)		/**/
 #define DO_NOTHING_RETURN_STRUCT(ptr)		/**/
+#define DO_NOTHING_RETURN_UNION(ptr)		/**/
 
 #define DO_NOTHING_CALL_SCALAR(var, name)			\
 		(var) = do_nothing_ ## name(&(var))
@@ -58,10 +60,13 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size,
 		do_nothing_ ## name(var)
 #define DO_NOTHING_CALL_STRUCT(var, name)			\
 		do_nothing_ ## name(&(var))
+#define DO_NOTHING_CALL_UNION(var, name)			\
+		do_nothing_ ## name(&(var))
 
 #define FETCH_ARG_SCALAR(var)		&var
 #define FETCH_ARG_STRING(var)		var
 #define FETCH_ARG_STRUCT(var)		&var
+#define FETCH_ARG_UNION(var)		&var
 
 /*
  * On m68k, if the leaf function test variable is longer than 8 bytes,
@@ -77,6 +82,7 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size,
 #define INIT_CLONE_SCALAR		/**/
 #define INIT_CLONE_STRING		[FILL_SIZE_STRING]
 #define INIT_CLONE_STRUCT		/**/
+#define INIT_CLONE_UNION		/**/
 
 #define ZERO_CLONE_SCALAR(zero)		memset(&(zero), 0x00, sizeof(zero))
 #define ZERO_CLONE_STRING(zero)		memset(&(zero), 0x00, sizeof(zero))
@@ -92,6 +98,7 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size,
 		zero.three = 0;				\
 		zero.four = 0;				\
 	} while (0)
+#define ZERO_CLONE_UNION(zero)		ZERO_CLONE_STRUCT(zero)
 
 #define INIT_SCALAR_none(var_type)	/**/
 #define INIT_SCALAR_zero(var_type)	= 0
@@ -147,6 +154,34 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size,
 #define INIT_STRUCT_assigned_copy(var_type)				\
 					; var = *(arg)
 
+/* Union initialization is the same as structs. */
+#define INIT_UNION_none(var_type)	INIT_STRUCT_none(var_type)
+#define INIT_UNION_zero(var_type)	INIT_STRUCT_zero(var_type)
+#define INIT_UNION_old_zero(var_type)	INIT_STRUCT_old_zero(var_type)
+
+#define INIT_UNION_static_partial(var_type)		\
+	INIT_STRUCT_static_partial(var_type)
+#define INIT_UNION_static_all(var_type)			\
+	INIT_STRUCT_static_all(var_type)
+#define INIT_UNION_dynamic_partial(var_type)		\
+	INIT_STRUCT_dynamic_partial(var_type)
+#define INIT_UNION_dynamic_all(var_type)		\
+	INIT_STRUCT_dynamic_all(var_type)
+#define INIT_UNION_runtime_partial(var_type)		\
+	INIT_STRUCT_runtime_partial(var_type)
+#define INIT_UNION_runtime_all(var_type)		\
+	INIT_STRUCT_runtime_all(var_type)
+#define INIT_UNION_assigned_static_partial(var_type)	\
+	INIT_STRUCT_assigned_static_partial(var_type)
+#define INIT_UNION_assigned_static_all(var_type)	\
+	INIT_STRUCT_assigned_static_all(var_type)
+#define INIT_UNION_assigned_dynamic_partial(var_type)	\
+	INIT_STRUCT_assigned_dynamic_partial(var_type)
+#define INIT_UNION_assigned_dynamic_all(var_type)	\
+	INIT_STRUCT_assigned_dynamic_all(var_type)
+#define INIT_UNION_assigned_copy(var_type)		\
+	INIT_STRUCT_assigned_copy(var_type)
+
 /*
  * @name: unique string name for the test
  * @var_type: type to be tested for zeroing initialization
@@ -295,6 +330,33 @@ struct test_user {
 	unsigned long four;
 };
 
+/* No padding: all members are the same size. */
+union test_same_sizes {
+	unsigned long one;
+	unsigned long two;
+	unsigned long three;
+	unsigned long four;
+};
+
+/* Mismatched sizes, with one and two being small */
+union test_small_start {
+	char one:1;
+	char two;
+	short three;
+	unsigned long four;
+	struct big_struct {
+		unsigned long array[8];
+	} big;
+};
+
+/* Mismatched sizes, with one and two being small */
+union test_small_end {
+	short one;
+	unsigned long two;
+	char three:1;
+	char four;
+};
+
 #define ALWAYS_PASS	WANT_SUCCESS
 #define ALWAYS_FAIL	XFAIL
 
@@ -333,6 +395,11 @@ struct test_user {
 			    struct test_ ## name, STRUCT, init, \
 			    xfail)
 
+#define DEFINE_UNION_TEST(name, init, xfail)			\
+		DEFINE_TEST(name ## _ ## init,			\
+			    union test_ ## name, STRUCT, init,	\
+			    xfail)
+
 #define DEFINE_STRUCT_TESTS(init, xfail)			\
 		DEFINE_STRUCT_TEST(small_hole, init, xfail);	\
 		DEFINE_STRUCT_TEST(big_hole, init, xfail);	\
@@ -344,10 +411,22 @@ struct test_user {
 				    xfail);			\
 		DEFINE_STRUCT_TESTS(base ## _ ## all, xfail)
 
+#define DEFINE_UNION_INITIALIZER_TESTS(base, xfail)		\
+		DEFINE_UNION_TESTS(base ## _ ## partial,	\
+				    xfail);			\
+		DEFINE_UNION_TESTS(base ## _ ## all, xfail)
+
+#define DEFINE_UNION_TESTS(init, xfail)				\
+		DEFINE_UNION_TEST(same_sizes, init, xfail);	\
+		DEFINE_UNION_TEST(small_start, init, xfail);	\
+		DEFINE_UNION_TEST(small_end, init, xfail);
+
 /* These should be fully initialized all the time! */
 DEFINE_SCALAR_TESTS(zero, ALWAYS_PASS);
 DEFINE_STRUCT_TESTS(zero, ALWAYS_PASS);
 DEFINE_STRUCT_TESTS(old_zero, ALWAYS_PASS);
+DEFINE_UNION_TESTS(zero, ALWAYS_PASS);
+DEFINE_UNION_TESTS(old_zero, ALWAYS_PASS);
 /* Struct initializers: padding may be left uninitialized. */
 DEFINE_STRUCT_INITIALIZER_TESTS(static, STRONG_PASS);
 DEFINE_STRUCT_INITIALIZER_TESTS(dynamic, STRONG_PASS);
@@ -355,6 +434,12 @@ DEFINE_STRUCT_INITIALIZER_TESTS(runtime, STRONG_PASS);
 DEFINE_STRUCT_INITIALIZER_TESTS(assigned_static, STRONG_PASS);
 DEFINE_STRUCT_INITIALIZER_TESTS(assigned_dynamic, STRONG_PASS);
 DEFINE_STRUCT_TESTS(assigned_copy, ALWAYS_FAIL);
+DEFINE_UNION_INITIALIZER_TESTS(static, STRONG_PASS);
+DEFINE_UNION_INITIALIZER_TESTS(dynamic, STRONG_PASS);
+DEFINE_UNION_INITIALIZER_TESTS(runtime, STRONG_PASS);
+DEFINE_UNION_INITIALIZER_TESTS(assigned_static, STRONG_PASS);
+DEFINE_UNION_INITIALIZER_TESTS(assigned_dynamic, STRONG_PASS);
+DEFINE_UNION_TESTS(assigned_copy, ALWAYS_FAIL);
 /* No initialization without compiler instrumentation. */
 DEFINE_SCALAR_TESTS(none, STRONG_PASS);
 DEFINE_STRUCT_TESTS(none, BYREF_PASS);
@@ -438,14 +523,23 @@ DEFINE_TEST_DRIVER(switch_2_none, uint64_t, SCALAR, ALWAYS_FAIL);
 		KUNIT_CASE(test_trailing_hole_ ## init),\
 		KUNIT_CASE(test_packed_ ## init)	\
 
+#define KUNIT_test_unions(init)				\
+		KUNIT_CASE(test_same_sizes_ ## init),	\
+		KUNIT_CASE(test_small_start_ ## init),	\
+		KUNIT_CASE(test_small_end_ ## init)	\
+
 static struct kunit_case stackinit_test_cases[] = {
 	/* These are explicitly initialized and should always pass. */
 	KUNIT_test_scalars(zero),
 	KUNIT_test_structs(zero),
 	KUNIT_test_structs(old_zero),
+	KUNIT_test_unions(zero),
+	KUNIT_test_unions(old_zero),
 	/* Padding here appears to be accidentally always initialized? */
 	KUNIT_test_structs(dynamic_partial),
 	KUNIT_test_structs(assigned_dynamic_partial),
+	KUNIT_test_unions(dynamic_partial),
+	KUNIT_test_unions(assigned_dynamic_partial),
 	/* Padding initialization depends on compiler behaviors. */
 	KUNIT_test_structs(static_partial),
 	KUNIT_test_structs(static_all),
@@ -455,8 +549,17 @@ static struct kunit_case stackinit_test_cases[] = {
 	KUNIT_test_structs(assigned_static_partial),
 	KUNIT_test_structs(assigned_static_all),
 	KUNIT_test_structs(assigned_dynamic_all),
+	KUNIT_test_unions(static_partial),
+	KUNIT_test_unions(static_all),
+	KUNIT_test_unions(dynamic_all),
+	KUNIT_test_unions(runtime_partial),
+	KUNIT_test_unions(runtime_all),
+	KUNIT_test_unions(assigned_static_partial),
+	KUNIT_test_unions(assigned_static_all),
+	KUNIT_test_unions(assigned_dynamic_all),
 	/* Everything fails this since it effectively performs a memcpy(). */
 	KUNIT_test_structs(assigned_copy),
+	KUNIT_test_unions(assigned_copy),
 	/* STRUCTLEAK_BYREF_ALL should cover everything from here down. */
 	KUNIT_test_scalars(none),
 	KUNIT_CASE(test_switch_1_none),

From dce4aab8441d285b9a78b33753e0bf583c1320ee Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Mon, 27 Jan 2025 11:10:28 -0800
Subject: [PATCH 283/368] kbuild: Use -fzero-init-padding-bits=all

GCC 15 introduces a regression in "= { 0 }" style initialization of
unions that Linux has depended on for eliminating uninitialized variable
contents. GCC does not seem likely to fix it[1], instead suggesting[2]
that affected projects start using -fzero-init-padding-bits=unions.

To avoid future surprises beyond just the current situation with unions,
enable -fzero-init-padding-bits=all when available (GCC 15+). This will
correctly zero padding bits in unions and structs that might have been
left uninitialized, and will make sure there is no immediate regression
in union initializations. As seen in the stackinit KUnit selftest union
cases, which were passing before, were failing under GCC 15:

    not ok 18 test_small_start_old_zero
    ok 29 test_small_start_dynamic_partial # SKIP XFAIL uninit bytes: 63
    ok 32 test_small_start_assigned_dynamic_partial # SKIP XFAIL uninit bytes: 63
    ok 67 test_small_start_static_partial # SKIP XFAIL uninit bytes: 63
    ok 70 test_small_start_static_all # SKIP XFAIL uninit bytes: 56
    ok 73 test_small_start_dynamic_all # SKIP XFAIL uninit bytes: 56
    ok 82 test_small_start_assigned_static_partial # SKIP XFAIL uninit bytes: 63
    ok 85 test_small_start_assigned_static_all # SKIP XFAIL uninit bytes: 56
    ok 88 test_small_start_assigned_dynamic_all # SKIP XFAIL uninit bytes: 56

The above all now pass again with -fzero-init-padding-bits=all added.

This also fixes the following cases for struct initialization that had
been XFAIL until now because there was no compiler support beyond the
larger "-ftrivial-auto-var-init=zero" option:

    ok 38 test_small_hole_static_all # SKIP XFAIL uninit bytes: 3
    ok 39 test_big_hole_static_all # SKIP XFAIL uninit bytes: 124
    ok 40 test_trailing_hole_static_all # SKIP XFAIL uninit bytes: 7
    ok 42 test_small_hole_dynamic_all # SKIP XFAIL uninit bytes: 3
    ok 43 test_big_hole_dynamic_all # SKIP XFAIL uninit bytes: 124
    ok 44 test_trailing_hole_dynamic_all # SKIP XFAIL uninit bytes: 7
    ok 58 test_small_hole_assigned_static_all # SKIP XFAIL uninit bytes: 3
    ok 59 test_big_hole_assigned_static_all # SKIP XFAIL uninit bytes: 124
    ok 60 test_trailing_hole_assigned_static_all # SKIP XFAIL uninit bytes: 7
    ok 62 test_small_hole_assigned_dynamic_all # SKIP XFAIL uninit bytes: 3
    ok 63 test_big_hole_assigned_dynamic_all # SKIP XFAIL uninit bytes: 124
    ok 64 test_trailing_hole_assigned_dynamic_all # SKIP XFAIL uninit bytes: 7

All of the above now pass when built under GCC 15. Tests can be seen
with:

    ./tools/testing/kunit/kunit.py run stackinit --arch=x86_64 \
        --make_option CC=gcc-15

Clang continues to fully initialize these kinds of variables[3] without
additional flags.

Suggested-by: Jakub Jelinek <jakub@redhat.com>
Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118403 [1]
Link: https://lore.kernel.org/linux-toolchains/Z0hRrrNU3Q+ro2T7@tucnak/ [2]
Link: https://github.com/llvm/llvm-project/commit/7a086e1b2dc05f54afae3591614feede727601fa [3]
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Acked-by: Masahiro Yamada <masahiroy@kernel.org>
Link: https://lore.kernel.org/r/20250127191031.245214-3-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 scripts/Makefile.extrawarn | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index 1d13cecc7cc78..eb719f6d8d536 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -77,6 +77,9 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init)
 # Warn if there is an enum types mismatch
 KBUILD_CFLAGS += $(call cc-option,-Wenum-conversion)
 
+# Explicitly clear padding bits during variable initialization
+KBUILD_CFLAGS += $(call cc-option,-fzero-init-padding-bits=all)
+
 KBUILD_CFLAGS += -Wextra
 KBUILD_CFLAGS += -Wunused
 

From 8a20030038742b9915c6d811a4e6c14b126cafb4 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 3 Dec 2024 17:17:34 -0500
Subject: [PATCH 284/368] hexagon: fix using plain integer as NULL pointer
 warning in cmpxchg

Sparse reports

    net/ipv4/inet_diag.c:1511:17: sparse: sparse: Using plain integer as NULL pointer

Due to this code calling cmpxchg on a non-integer type
struct inet_diag_handler *

    return !cmpxchg((const struct inet_diag_handler**)&inet_diag_table[type],
                    NULL, h) ? 0 : -EEXIST;

While hexagon's cmpxchg assigns an integer value to a variable of this
type.

    __typeof__(*(ptr)) __oldval = 0;

Update this assignment to cast 0 to the correct type.

The original issue is easily reproduced at head with the below block,
and is absent after this change.

    make LLVM=1 ARCH=hexagon defconfig
    make C=1 LLVM=1 ARCH=hexagon net/ipv4/inet_diag.o

Fixes: 99a70aa051d2 ("Hexagon: Add processor and system headers")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202411091538.PGSTqUBi-lkp@intel.com/
Signed-off-by: Willem de Bruijn <willemb@google.com>
Tested-by: Christian Gmeiner <cgmeiner@igalia.com>
Link: https://lore.kernel.org/r/20241203221736.282020-1-willemdebruijn.kernel@gmail.com
Signed-off-by: Brian Cain <bcain@quicinc.com>
Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com>
---
 arch/hexagon/include/asm/cmpxchg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/hexagon/include/asm/cmpxchg.h b/arch/hexagon/include/asm/cmpxchg.h
index bf6cf5579cf45..9c58fb81f7fd6 100644
--- a/arch/hexagon/include/asm/cmpxchg.h
+++ b/arch/hexagon/include/asm/cmpxchg.h
@@ -56,7 +56,7 @@ __arch_xchg(unsigned long x, volatile void *ptr, int size)
 	__typeof__(ptr) __ptr = (ptr);				\
 	__typeof__(*(ptr)) __old = (old);			\
 	__typeof__(*(ptr)) __new = (new);			\
-	__typeof__(*(ptr)) __oldval = 0;			\
+	__typeof__(*(ptr)) __oldval = (__typeof__(*(ptr))) 0;	\
 								\
 	asm volatile(						\
 		"1:	%0 = memw_locked(%1);\n"		\

From e1e481edf9718222eeb285c41c1837c1c6b0afbd Mon Sep 17 00:00:00 2001
From: Hardevsinh Palaniya <hardevsinh.palaniya@siliconsignals.io>
Date: Mon, 11 Nov 2024 19:54:10 +0530
Subject: [PATCH 285/368] hexagon: time: Remove redundant null check for
 resource

Null check for 'resource' before assignment is unnecessary because the
variable 'resource' is initialized to NULL at the beginning of the function.

Signed-off-by: Hardevsinh Palaniya <hardevsinh.palaniya@siliconsignals.io>
Link: https://lore.kernel.org/r/20241111142458.67854-1-hardevsinh.palaniya@siliconsignals.io
Signed-off-by: Brian Cain <bcain@quicinc.com>
Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com>
---
 arch/hexagon/kernel/time.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/hexagon/kernel/time.c b/arch/hexagon/kernel/time.c
index f0f207e2a6947..6f851e1cd4ee0 100644
--- a/arch/hexagon/kernel/time.c
+++ b/arch/hexagon/kernel/time.c
@@ -170,8 +170,7 @@ static void __init time_init_deferred(void)
 
 	ce_dev->cpumask = cpu_all_mask;
 
-	if (!resource)
-		resource = rtos_timer_device.resource;
+	resource = rtos_timer_device.resource;
 
 	/*  ioremap here means this has to run later, after paging init  */
 	rtos_timer = ioremap(resource->start, resource_size(resource));

From e882d6f72caa9fca7b615c7bc88998717552e05e Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Thu, 2 May 2024 19:38:18 +0200
Subject: [PATCH 286/368] hexagon: Move kernel prototypes out of
 uapi/asm/setup.h header

The kernel function prototypes are of no use for userspace and
shouldn't get exposed in an uapi header, so let's move them into
an internal header instead.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Link: https://lore.kernel.org/r/20240502173818.58152-1-thuth@redhat.com
Signed-off-by: Brian Cain <bcain@quicinc.com>
Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com>
---
 arch/hexagon/include/asm/setup.h      | 20 ++++++++++++++++++++
 arch/hexagon/include/uapi/asm/setup.h | 14 ++------------
 2 files changed, 22 insertions(+), 12 deletions(-)
 create mode 100644 arch/hexagon/include/asm/setup.h

diff --git a/arch/hexagon/include/asm/setup.h b/arch/hexagon/include/asm/setup.h
new file mode 100644
index 0000000000000..9f2749cd4052d
--- /dev/null
+++ b/arch/hexagon/include/asm/setup.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ */
+
+#ifndef _ASM_HEXAGON_SETUP_H
+#define _ASM_HEXAGON_SETUP_H
+
+#include <linux/init.h>
+#include <uapi/asm/setup.h>
+
+extern char external_cmdline_buffer;
+
+void __init setup_arch_memory(void);
+
+#endif
diff --git a/arch/hexagon/include/uapi/asm/setup.h b/arch/hexagon/include/uapi/asm/setup.h
index 8ce9428b15832..598f74f671f65 100644
--- a/arch/hexagon/include/uapi/asm/setup.h
+++ b/arch/hexagon/include/uapi/asm/setup.h
@@ -17,19 +17,9 @@
  * 02110-1301, USA.
  */
 
-#ifndef _ASM_SETUP_H
-#define _ASM_SETUP_H
-
-#ifdef __KERNEL__
-#include <linux/init.h>
-#else
-#define __init
-#endif
+#ifndef _UAPI_ASM_HEXAGON_SETUP_H
+#define _UAPI_ASM_HEXAGON_SETUP_H
 
 #include <asm-generic/setup.h>
 
-extern char external_cmdline_buffer;
-
-void __init setup_arch_memory(void);
-
 #endif

From e8265a947b0267950a2b74e5a4f118e6764540e8 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Wed, 8 Feb 2023 09:11:05 +0800
Subject: [PATCH 287/368] hexagon: Fix warning comparing pointer to 0

./arch/hexagon/kernel/traps.c:138:6-7: WARNING comparing pointer to 0

Avoid pointer type value compared with 0 to make code clear.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=3978
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Link: https://lore.kernel.org/r/20230208011105.80219-1-yang.lee@linux.alibaba.com
Signed-off-by: Brian Cain <bcain@quicinc.com>
Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com>
---
 arch/hexagon/kernel/traps.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c
index 75e062722d285..73bddcfa8ca6b 100644
--- a/arch/hexagon/kernel/traps.c
+++ b/arch/hexagon/kernel/traps.c
@@ -135,7 +135,7 @@ static void do_show_stack(struct task_struct *task, unsigned long *fp,
 		}
 
 		/* Attempt to continue past exception. */
-		if (0 == newfp) {
+		if (!newfp) {
 			struct pt_regs *regs = (struct pt_regs *) (((void *)fp)
 						+ 8);
 

From 03410e87563a122075c3721acc7d5510e41d8332 Mon Sep 17 00:00:00 2001
From: Lin Yujun <linyujun809@huawei.com>
Date: Mon, 22 May 2023 02:56:08 +0000
Subject: [PATCH 288/368] hexagon: Fix unbalanced spinlock in die()

die executes holding the spinlock of &die.lock and unlock
it after printing the oops message.
However in the code if the notify_die() returns NOTIFY_STOP
, die() exit with returning 1 but never unlocked the spinlock.

Fix this by adding spin_unlock_irq(&die.lock) before returning.

Fixes: cf9750bae262 ("Hexagon: Provide basic debugging and system trap support.")
Signed-off-by: Lin Yujun <linyujun809@huawei.com>
Link: https://lore.kernel.org/r/20230522025608.2515558-1-linyujun809@huawei.com
Signed-off-by: Brian Cain <bcain@quicinc.com>
Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com>
---
 arch/hexagon/kernel/traps.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c
index 73bddcfa8ca6b..e732aa01c2ff0 100644
--- a/arch/hexagon/kernel/traps.c
+++ b/arch/hexagon/kernel/traps.c
@@ -195,8 +195,10 @@ int die(const char *str, struct pt_regs *regs, long err)
 	printk(KERN_EMERG "Oops: %s[#%d]:\n", str, ++die.counter);
 
 	if (notify_die(DIE_OOPS, str, regs, err, pt_cause(regs), SIGSEGV) ==
-	    NOTIFY_STOP)
+	    NOTIFY_STOP) {
+		spin_unlock_irq(&die.lock);
 		return 1;
+	}
 
 	print_modules();
 	show_regs(regs);

From 84d78214b4f187da7e029f5dad344203511a04d7 Mon Sep 17 00:00:00 2001
From: Brian Cain <brian.cain@oss.qualcomm.com>
Date: Mon, 27 Jan 2025 12:51:03 -0800
Subject: [PATCH 289/368] MAINTAINERS: Update my email address

Qualcomm is migrating away from quicinc.com email addresses towards ones
with *.qualcomm.com.

Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com>
Signed-off-by: Brian Cain <bcain@quicinc.com>
---
 .mailmap    | 2 ++
 MAINTAINERS | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.mailmap b/.mailmap
index 17dd8eb2630e6..1c007dec12d80 100644
--- a/.mailmap
+++ b/.mailmap
@@ -142,6 +142,8 @@ Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@bootlin.com>
 Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@free-electrons.com>
 Brendan Higgins <brendan.higgins@linux.dev> <brendanhiggins@google.com>
 Brian Avery <b.avery@hp.com>
+Brian Cain <bcain@kernel.org> <brian.cain@oss.qualcomm.com>
+Brian Cain <bcain@kernel.org> <bcain@quicinc.com>
 Brian King <brking@us.ibm.com>
 Brian Silverman <bsilver16384@gmail.com> <brian.silverman@bluerivertech.com>
 Bryan Tan <bryan-bt.tan@broadcom.com> <bryantan@vmware.com>
diff --git a/MAINTAINERS b/MAINTAINERS
index d49306cc17e34..4fcd5733b656d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19426,7 +19426,7 @@ F:	drivers/misc/fastrpc.c
 F:	include/uapi/misc/fastrpc.h
 
 QUALCOMM HEXAGON ARCHITECTURE
-M:	Brian Cain <bcain@quicinc.com>
+M:	Brian Cain <brian.cain@oss.qualcomm.com>
 L:	linux-hexagon@vger.kernel.org
 S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bcain/linux.git

From ec918a11e63856999b31705e81226dd7dc043e20 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Date: Thu, 30 Jan 2025 11:48:56 +0100
Subject: [PATCH 290/368] Revert "mips: fix shmctl/semctl/msgctl syscall for
 o32"

This reverts commit bc7584e009c39375294794f7ca751a6b2622c425.

The split IPC system calls for o32 have been introduced with modern
version only. Changing this breaks ABI.

Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/kernel/syscalls/syscall_o32.tbl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index e8a57c2067580..349b8aad1159f 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -382,15 +382,15 @@
 368	o32	io_pgetevents			sys_io_pgetevents_time32	compat_sys_io_pgetevents
 # room for arch specific calls
 393	o32	semget				sys_semget
-394	o32	semctl				sys_old_semctl		compat_sys_old_semctl
+394	o32	semctl				sys_semctl			compat_sys_semctl
 395	o32	shmget				sys_shmget
-396	o32	shmctl				sys_old_shmctl		compat_sys_old_shmctl
+396	o32	shmctl				sys_shmctl			compat_sys_shmctl
 397	o32	shmat				sys_shmat			compat_sys_shmat
 398	o32	shmdt				sys_shmdt
 399	o32	msgget				sys_msgget
 400	o32	msgsnd				sys_msgsnd			compat_sys_msgsnd
 401	o32	msgrcv				sys_msgrcv			compat_sys_msgrcv
-402	o32	msgctl				sys_old_msgctl		compat_sys_old_msgctl
+402	o32	msgctl				sys_msgctl			compat_sys_msgctl
 403	o32	clock_gettime64			sys_clock_gettime		sys_clock_gettime
 404	o32	clock_settime64			sys_clock_settime		sys_clock_settime
 405	o32	clock_adjtime64			sys_clock_adjtime		sys_clock_adjtime

From bb2784d9ab49587ba4fbff37a319fff2924db289 Mon Sep 17 00:00:00 2001
From: Easwar Hariharan <eahariha@linux.microsoft.com>
Date: Thu, 30 Jan 2025 19:26:58 +0000
Subject: [PATCH 291/368] jiffies: Cast to unsigned long in secs_to_jiffies()
 conversion

While converting users of msecs_to_jiffies(), lkp reported that some range
checks would always be true because of the mismatch between the implied int
value of secs_to_jiffies() vs the unsigned long return value of the
msecs_to_jiffies() calls it was replacing.

Fix this by casting the secs_to_jiffies() input value to unsigned long.

Fixes: b35108a51cf7ba ("jiffies: Define secs_to_jiffies()")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Easwar Hariharan <eahariha@linux.microsoft.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/all/20250130192701.99626-1-eahariha@linux.microsoft.com
Closes: https://lore.kernel.org/oe-kbuild-all/202501301334.NB6NszQR-lkp@intel.com/
---
 include/linux/jiffies.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index ed945f42e064a..0ea8c9887429f 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -537,7 +537,7 @@ static __always_inline unsigned long msecs_to_jiffies(const unsigned int m)
  *
  * Return: jiffies value
  */
-#define secs_to_jiffies(_secs) ((_secs) * HZ)
+#define secs_to_jiffies(_secs) (unsigned long)((_secs) * HZ)
 
 extern unsigned long __usecs_to_jiffies(const unsigned int u);
 #if !(USEC_PER_SEC % HZ)

From 1e1a9cecfab3f22ebef0a976f849c87be8d03c1c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 31 Jan 2025 13:03:47 +0100
Subject: [PATCH 292/368] block: force noio scope in blk_mq_freeze_queue

When block drivers or the core block code perform allocations with a
frozen queue, this could try to recurse into the block device to
reclaim memory and deadlock.  Thus all allocations done by a process
that froze a queue need to be done without __GFP_IO and __GFP_FS.
Instead of tying to track all of them down, force a noio scope as
part of freezing the queue.

Note that nvme is a bit of a mess here due to the non-owner freezes,
and they will be addressed separately.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20250131120352.1315351-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c            | 10 ++++++----
 block/blk-iocost.c            | 14 ++++++++------
 block/blk-iolatency.c         |  6 ++++--
 block/blk-mq.c                | 21 +++++++++++++--------
 block/blk-pm.c                |  2 +-
 block/blk-rq-qos.c            | 12 +++++++-----
 block/blk-settings.c          |  5 +++--
 block/blk-sysfs.c             |  8 +++-----
 block/blk-throttle.c          |  5 +++--
 block/blk-zoned.c             |  5 +++--
 block/elevator.c              | 16 ++++++++++------
 drivers/block/aoe/aoedev.c    |  5 +++--
 drivers/block/ataflop.c       |  5 +++--
 drivers/block/loop.c          | 20 ++++++++++++--------
 drivers/block/nbd.c           |  7 ++++---
 drivers/block/rbd.c           |  5 +++--
 drivers/block/sunvdc.c        |  5 +++--
 drivers/block/swim3.c         |  5 +++--
 drivers/block/virtio_blk.c    |  5 +++--
 drivers/mtd/mtd_blkdevs.c     |  5 +++--
 drivers/nvme/host/core.c      | 17 ++++++++++-------
 drivers/nvme/host/multipath.c |  2 +-
 drivers/scsi/scsi_lib.c       |  5 +++--
 drivers/scsi/scsi_scan.c      |  5 +++--
 drivers/ufs/core/ufs-sysfs.c  |  7 +++++--
 include/linux/blk-mq.h        | 18 ++++++++++++++++--
 26 files changed, 136 insertions(+), 84 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 45a395862fbc8..c795fa3a30e1a 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1545,6 +1545,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
 	struct request_queue *q = disk->queue;
 	struct blkg_policy_data *pd_prealloc = NULL;
 	struct blkcg_gq *blkg, *pinned_blkg = NULL;
+	unsigned int memflags;
 	int ret;
 
 	if (blkcg_policy_enabled(q, pol))
@@ -1559,7 +1560,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
 		return -EINVAL;
 
 	if (queue_is_mq(q))
-		blk_mq_freeze_queue(q);
+		memflags = blk_mq_freeze_queue(q);
 retry:
 	spin_lock_irq(&q->queue_lock);
 
@@ -1623,7 +1624,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
 	spin_unlock_irq(&q->queue_lock);
 out:
 	if (queue_is_mq(q))
-		blk_mq_unfreeze_queue(q);
+		blk_mq_unfreeze_queue(q, memflags);
 	if (pinned_blkg)
 		blkg_put(pinned_blkg);
 	if (pd_prealloc)
@@ -1667,12 +1668,13 @@ void blkcg_deactivate_policy(struct gendisk *disk,
 {
 	struct request_queue *q = disk->queue;
 	struct blkcg_gq *blkg;
+	unsigned int memflags;
 
 	if (!blkcg_policy_enabled(q, pol))
 		return;
 
 	if (queue_is_mq(q))
-		blk_mq_freeze_queue(q);
+		memflags = blk_mq_freeze_queue(q);
 
 	mutex_lock(&q->blkcg_mutex);
 	spin_lock_irq(&q->queue_lock);
@@ -1696,7 +1698,7 @@ void blkcg_deactivate_policy(struct gendisk *disk,
 	mutex_unlock(&q->blkcg_mutex);
 
 	if (queue_is_mq(q))
-		blk_mq_unfreeze_queue(q);
+		blk_mq_unfreeze_queue(q, memflags);
 }
 EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
 
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index a5894ec9696e7..65a1d4427ccf4 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3224,6 +3224,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
 	u32 qos[NR_QOS_PARAMS];
 	bool enable, user;
 	char *body, *p;
+	unsigned int memflags;
 	int ret;
 
 	blkg_conf_init(&ctx, input);
@@ -3247,7 +3248,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
 		ioc = q_to_ioc(disk->queue);
 	}
 
-	blk_mq_freeze_queue(disk->queue);
+	memflags = blk_mq_freeze_queue(disk->queue);
 	blk_mq_quiesce_queue(disk->queue);
 
 	spin_lock_irq(&ioc->lock);
@@ -3347,7 +3348,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
 		wbt_enable_default(disk);
 
 	blk_mq_unquiesce_queue(disk->queue);
-	blk_mq_unfreeze_queue(disk->queue);
+	blk_mq_unfreeze_queue(disk->queue, memflags);
 
 	blkg_conf_exit(&ctx);
 	return nbytes;
@@ -3355,7 +3356,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
 	spin_unlock_irq(&ioc->lock);
 
 	blk_mq_unquiesce_queue(disk->queue);
-	blk_mq_unfreeze_queue(disk->queue);
+	blk_mq_unfreeze_queue(disk->queue, memflags);
 
 	ret = -EINVAL;
 err:
@@ -3414,6 +3415,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
 {
 	struct blkg_conf_ctx ctx;
 	struct request_queue *q;
+	unsigned int memflags;
 	struct ioc *ioc;
 	u64 u[NR_I_LCOEFS];
 	bool user;
@@ -3441,7 +3443,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
 		ioc = q_to_ioc(q);
 	}
 
-	blk_mq_freeze_queue(q);
+	memflags = blk_mq_freeze_queue(q);
 	blk_mq_quiesce_queue(q);
 
 	spin_lock_irq(&ioc->lock);
@@ -3493,7 +3495,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
 	spin_unlock_irq(&ioc->lock);
 
 	blk_mq_unquiesce_queue(q);
-	blk_mq_unfreeze_queue(q);
+	blk_mq_unfreeze_queue(q, memflags);
 
 	blkg_conf_exit(&ctx);
 	return nbytes;
@@ -3502,7 +3504,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
 	spin_unlock_irq(&ioc->lock);
 
 	blk_mq_unquiesce_queue(q);
-	blk_mq_unfreeze_queue(q);
+	blk_mq_unfreeze_queue(q, memflags);
 
 	ret = -EINVAL;
 err:
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index ebb522788d978..42c1e0b9a68f2 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -749,9 +749,11 @@ static void blkiolatency_enable_work_fn(struct work_struct *work)
 	 */
 	enabled = atomic_read(&blkiolat->enable_cnt);
 	if (enabled != blkiolat->enabled) {
-		blk_mq_freeze_queue(blkiolat->rqos.disk->queue);
+		unsigned int memflags;
+
+		memflags = blk_mq_freeze_queue(blkiolat->rqos.disk->queue);
 		blkiolat->enabled = enabled;
-		blk_mq_unfreeze_queue(blkiolat->rqos.disk->queue);
+		blk_mq_unfreeze_queue(blkiolat->rqos.disk->queue, memflags);
 	}
 }
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index da39a1cac7022..40490ac880457 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -210,12 +210,12 @@ int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
 }
 EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout);
 
-void blk_mq_freeze_queue(struct request_queue *q)
+void blk_mq_freeze_queue_nomemsave(struct request_queue *q)
 {
 	blk_freeze_queue_start(q);
 	blk_mq_freeze_queue_wait(q);
 }
-EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);
+EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_nomemsave);
 
 bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
 {
@@ -236,12 +236,12 @@ bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
 	return unfreeze;
 }
 
-void blk_mq_unfreeze_queue(struct request_queue *q)
+void blk_mq_unfreeze_queue_nomemrestore(struct request_queue *q)
 {
 	if (__blk_mq_unfreeze_queue(q, false))
 		blk_unfreeze_release_lock(q);
 }
-EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
+EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue_nomemrestore);
 
 /*
  * non_owner variant of blk_freeze_queue_start
@@ -4223,13 +4223,14 @@ static void blk_mq_update_tag_set_shared(struct blk_mq_tag_set *set,
 					 bool shared)
 {
 	struct request_queue *q;
+	unsigned int memflags;
 
 	lockdep_assert_held(&set->tag_list_lock);
 
 	list_for_each_entry(q, &set->tag_list, tag_set_list) {
-		blk_mq_freeze_queue(q);
+		memflags = blk_mq_freeze_queue(q);
 		queue_set_hctx_shared(q, shared);
-		blk_mq_unfreeze_queue(q);
+		blk_mq_unfreeze_queue(q, memflags);
 	}
 }
 
@@ -4992,6 +4993,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 	struct request_queue *q;
 	LIST_HEAD(head);
 	int prev_nr_hw_queues = set->nr_hw_queues;
+	unsigned int memflags;
 	int i;
 
 	lockdep_assert_held(&set->tag_list_lock);
@@ -5003,8 +5005,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 	if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues)
 		return;
 
+	memflags = memalloc_noio_save();
 	list_for_each_entry(q, &set->tag_list, tag_set_list)
-		blk_mq_freeze_queue(q);
+		blk_mq_freeze_queue_nomemsave(q);
+
 	/*
 	 * Switch IO scheduler to 'none', cleaning up the data associated
 	 * with the previous scheduler. We will switch back once we are done
@@ -5052,7 +5056,8 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 		blk_mq_elv_switch_back(&head, q);
 
 	list_for_each_entry(q, &set->tag_list, tag_set_list)
-		blk_mq_unfreeze_queue(q);
+		blk_mq_unfreeze_queue_nomemrestore(q);
+	memalloc_noio_restore(memflags);
 
 	/* Free the excess tags when nr_hw_queues shrink. */
 	for (i = set->nr_hw_queues; i < prev_nr_hw_queues; i++)
diff --git a/block/blk-pm.c b/block/blk-pm.c
index 42e8420747153..8d3e052f91da1 100644
--- a/block/blk-pm.c
+++ b/block/blk-pm.c
@@ -89,7 +89,7 @@ int blk_pre_runtime_suspend(struct request_queue *q)
 	if (percpu_ref_is_zero(&q->q_usage_counter))
 		ret = 0;
 	/* Switch q_usage_counter back to per-cpu mode. */
-	blk_mq_unfreeze_queue(q);
+	blk_mq_unfreeze_queue_nomemrestore(q);
 
 	if (ret < 0) {
 		spin_lock_irq(&q->queue_lock);
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index eb9618cd68adf..d4d4f4dc0e23f 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -299,6 +299,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
 		const struct rq_qos_ops *ops)
 {
 	struct request_queue *q = disk->queue;
+	unsigned int memflags;
 
 	lockdep_assert_held(&q->rq_qos_mutex);
 
@@ -310,14 +311,14 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
 	 * No IO can be in-flight when adding rqos, so freeze queue, which
 	 * is fine since we only support rq_qos for blk-mq queue.
 	 */
-	blk_mq_freeze_queue(q);
+	memflags = blk_mq_freeze_queue(q);
 
 	if (rq_qos_id(q, rqos->id))
 		goto ebusy;
 	rqos->next = q->rq_qos;
 	q->rq_qos = rqos;
 
-	blk_mq_unfreeze_queue(q);
+	blk_mq_unfreeze_queue(q, memflags);
 
 	if (rqos->ops->debugfs_attrs) {
 		mutex_lock(&q->debugfs_mutex);
@@ -327,7 +328,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
 
 	return 0;
 ebusy:
-	blk_mq_unfreeze_queue(q);
+	blk_mq_unfreeze_queue(q, memflags);
 	return -EBUSY;
 }
 
@@ -335,17 +336,18 @@ void rq_qos_del(struct rq_qos *rqos)
 {
 	struct request_queue *q = rqos->disk->queue;
 	struct rq_qos **cur;
+	unsigned int memflags;
 
 	lockdep_assert_held(&q->rq_qos_mutex);
 
-	blk_mq_freeze_queue(q);
+	memflags = blk_mq_freeze_queue(q);
 	for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
 		if (*cur == rqos) {
 			*cur = rqos->next;
 			break;
 		}
 	}
-	blk_mq_unfreeze_queue(q);
+	blk_mq_unfreeze_queue(q, memflags);
 
 	mutex_lock(&q->debugfs_mutex);
 	blk_mq_debugfs_unregister_rqos(rqos);
diff --git a/block/blk-settings.c b/block/blk-settings.c
index db12396ff5c79..c44dadc35e1ec 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -461,11 +461,12 @@ EXPORT_SYMBOL_GPL(queue_limits_commit_update);
 int queue_limits_commit_update_frozen(struct request_queue *q,
 		struct queue_limits *lim)
 {
+	unsigned int memflags;
 	int ret;
 
-	blk_mq_freeze_queue(q);
+	memflags = blk_mq_freeze_queue(q);
 	ret = queue_limits_commit_update(q, lim);
-	blk_mq_unfreeze_queue(q);
+	blk_mq_unfreeze_queue(q, memflags);
 
 	return ret;
 }
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 7b970e6765e72..6f548a4376aa4 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -681,7 +681,7 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
 	struct queue_sysfs_entry *entry = to_queue(attr);
 	struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
 	struct request_queue *q = disk->queue;
-	unsigned int noio_flag;
+	unsigned int memflags;
 	ssize_t res;
 
 	if (!entry->store_limit && !entry->store)
@@ -711,11 +711,9 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
 	}
 
 	mutex_lock(&q->sysfs_lock);
-	blk_mq_freeze_queue(q);
-	noio_flag = memalloc_noio_save();
+	memflags = blk_mq_freeze_queue(q);
 	res = entry->store(disk, page, length);
-	memalloc_noio_restore(noio_flag);
-	blk_mq_unfreeze_queue(q);
+	blk_mq_unfreeze_queue(q, memflags);
 	mutex_unlock(&q->sysfs_lock);
 	return res;
 }
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 82dbaefcfa3bf..8d149aff9fd0b 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1202,6 +1202,7 @@ static int blk_throtl_init(struct gendisk *disk)
 {
 	struct request_queue *q = disk->queue;
 	struct throtl_data *td;
+	unsigned int memflags;
 	int ret;
 
 	td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
@@ -1215,7 +1216,7 @@ static int blk_throtl_init(struct gendisk *disk)
 	 * Freeze queue before activating policy, to synchronize with IO path,
 	 * which is protected by 'q_usage_counter'.
 	 */
-	blk_mq_freeze_queue(disk->queue);
+	memflags = blk_mq_freeze_queue(disk->queue);
 	blk_mq_quiesce_queue(disk->queue);
 
 	q->td = td;
@@ -1239,7 +1240,7 @@ static int blk_throtl_init(struct gendisk *disk)
 
 out:
 	blk_mq_unquiesce_queue(disk->queue);
-	blk_mq_unfreeze_queue(disk->queue);
+	blk_mq_unfreeze_queue(disk->queue, memflags);
 
 	return ret;
 }
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 9d08a54c201ee..761ea662ddc34 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -1717,9 +1717,10 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
 	else
 		pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
 	if (ret) {
-		blk_mq_freeze_queue(q);
+		unsigned int memflags = blk_mq_freeze_queue(q);
+
 		disk_free_zone_resources(disk);
-		blk_mq_unfreeze_queue(q);
+		blk_mq_unfreeze_queue(q, memflags);
 	}
 
 	return ret;
diff --git a/block/elevator.c b/block/elevator.c
index b81216c48b6bc..cd2ce49216010 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -570,6 +570,7 @@ static struct elevator_type *elevator_get_default(struct request_queue *q)
 void elevator_init_mq(struct request_queue *q)
 {
 	struct elevator_type *e;
+	unsigned int memflags;
 	int err;
 
 	WARN_ON_ONCE(blk_queue_registered(q));
@@ -590,13 +591,13 @@ void elevator_init_mq(struct request_queue *q)
 	 *
 	 * Disk isn't added yet, so verifying queue lock only manually.
 	 */
-	blk_mq_freeze_queue(q);
+	memflags = blk_mq_freeze_queue(q);
 
 	blk_mq_cancel_work_sync(q);
 
 	err = blk_mq_init_sched(q, e);
 
-	blk_mq_unfreeze_queue(q);
+	blk_mq_unfreeze_queue(q, memflags);
 
 	if (err) {
 		pr_warn("\"%s\" elevator initialization failed, "
@@ -614,11 +615,12 @@ void elevator_init_mq(struct request_queue *q)
  */
 int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 {
+	unsigned int memflags;
 	int ret;
 
 	lockdep_assert_held(&q->sysfs_lock);
 
-	blk_mq_freeze_queue(q);
+	memflags = blk_mq_freeze_queue(q);
 	blk_mq_quiesce_queue(q);
 
 	if (q->elevator) {
@@ -639,7 +641,7 @@ int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 
 out_unfreeze:
 	blk_mq_unquiesce_queue(q);
-	blk_mq_unfreeze_queue(q);
+	blk_mq_unfreeze_queue(q, memflags);
 
 	if (ret) {
 		pr_warn("elv: switch to \"%s\" failed, falling back to \"none\"\n",
@@ -651,9 +653,11 @@ int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 
 void elevator_disable(struct request_queue *q)
 {
+	unsigned int memflags;
+
 	lockdep_assert_held(&q->sysfs_lock);
 
-	blk_mq_freeze_queue(q);
+	memflags = blk_mq_freeze_queue(q);
 	blk_mq_quiesce_queue(q);
 
 	elv_unregister_queue(q);
@@ -664,7 +668,7 @@ void elevator_disable(struct request_queue *q)
 	blk_add_trace_msg(q, "elv switch: none");
 
 	blk_mq_unquiesce_queue(q);
-	blk_mq_unfreeze_queue(q);
+	blk_mq_unfreeze_queue(q, memflags);
 }
 
 /*
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index 3523dd82d7a00..4db7f6ce8ade0 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -226,10 +226,11 @@ aoedev_downdev(struct aoedev *d)
 	/* fast fail all pending I/O */
 	if (d->blkq) {
 		/* UP is cleared, freeze+quiesce to insure all are errored */
-		blk_mq_freeze_queue(d->blkq);
+		unsigned int memflags = blk_mq_freeze_queue(d->blkq);
+
 		blk_mq_quiesce_queue(d->blkq);
 		blk_mq_unquiesce_queue(d->blkq);
-		blk_mq_unfreeze_queue(d->blkq);
+		blk_mq_unfreeze_queue(d->blkq, memflags);
 	}
 
 	if (d->gd)
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 110f9aca2667d..a81ade622a01d 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -746,6 +746,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc)
 	unsigned char	*p;
 	int sect, nsect;
 	unsigned long	flags;
+	unsigned int memflags;
 	int ret;
 
 	if (type) {
@@ -758,7 +759,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc)
 	}
 
 	q = unit[drive].disk[type]->queue;
-	blk_mq_freeze_queue(q);
+	memflags = blk_mq_freeze_queue(q);
 	blk_mq_quiesce_queue(q);
 
 	local_irq_save(flags);
@@ -817,7 +818,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc)
 	ret = FormatError ? -EIO : 0;
 out:
 	blk_mq_unquiesce_queue(q);
-	blk_mq_unfreeze_queue(q);
+	blk_mq_unfreeze_queue(q, memflags);
 	return ret;
 }
 
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index d1f1d6bef2e69..c05fe27a96b64 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -586,6 +586,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 {
 	struct file *file = fget(arg);
 	struct file *old_file;
+	unsigned int memflags;
 	int error;
 	bool partscan;
 	bool is_loop;
@@ -623,14 +624,14 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 
 	/* and ... switch */
 	disk_force_media_change(lo->lo_disk);
-	blk_mq_freeze_queue(lo->lo_queue);
+	memflags = blk_mq_freeze_queue(lo->lo_queue);
 	mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
 	lo->lo_backing_file = file;
 	lo->old_gfp_mask = mapping_gfp_mask(file->f_mapping);
 	mapping_set_gfp_mask(file->f_mapping,
 			     lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
 	loop_update_dio(lo);
-	blk_mq_unfreeze_queue(lo->lo_queue);
+	blk_mq_unfreeze_queue(lo->lo_queue, memflags);
 	partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
 	loop_global_unlock(lo, is_loop);
 
@@ -1255,6 +1256,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 	int err;
 	bool partscan = false;
 	bool size_changed = false;
+	unsigned int memflags;
 
 	err = mutex_lock_killable(&lo->lo_mutex);
 	if (err)
@@ -1272,7 +1274,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 	}
 
 	/* I/O needs to be drained before changing lo_offset or lo_sizelimit */
-	blk_mq_freeze_queue(lo->lo_queue);
+	memflags = blk_mq_freeze_queue(lo->lo_queue);
 
 	err = loop_set_status_from_info(lo, info);
 	if (err)
@@ -1294,7 +1296,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 	loop_update_dio(lo);
 
 out_unfreeze:
-	blk_mq_unfreeze_queue(lo->lo_queue);
+	blk_mq_unfreeze_queue(lo->lo_queue, memflags);
 	if (partscan)
 		clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state);
 out_unlock:
@@ -1446,6 +1448,7 @@ static int loop_set_capacity(struct loop_device *lo)
 static int loop_set_dio(struct loop_device *lo, unsigned long arg)
 {
 	bool use_dio = !!arg;
+	unsigned int memflags;
 
 	if (lo->lo_state != Lo_bound)
 		return -ENXIO;
@@ -1459,18 +1462,19 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg)
 		vfs_fsync(lo->lo_backing_file, 0);
 	}
 
-	blk_mq_freeze_queue(lo->lo_queue);
+	memflags = blk_mq_freeze_queue(lo->lo_queue);
 	if (use_dio)
 		lo->lo_flags |= LO_FLAGS_DIRECT_IO;
 	else
 		lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
-	blk_mq_unfreeze_queue(lo->lo_queue);
+	blk_mq_unfreeze_queue(lo->lo_queue, memflags);
 	return 0;
 }
 
 static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
 {
 	struct queue_limits lim;
+	unsigned int memflags;
 	int err = 0;
 
 	if (lo->lo_state != Lo_bound)
@@ -1485,10 +1489,10 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
 	lim = queue_limits_start_update(lo->lo_queue);
 	loop_update_limits(lo, &lim, arg);
 
-	blk_mq_freeze_queue(lo->lo_queue);
+	memflags = blk_mq_freeze_queue(lo->lo_queue);
 	err = queue_limits_commit_update(lo->lo_queue, &lim);
 	loop_update_dio(lo);
-	blk_mq_unfreeze_queue(lo->lo_queue);
+	blk_mq_unfreeze_queue(lo->lo_queue, memflags);
 
 	return err;
 }
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index b63a0f29a54ab..7bdc7eb808ea9 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1234,6 +1234,7 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
 	struct socket *sock;
 	struct nbd_sock **socks;
 	struct nbd_sock *nsock;
+	unsigned int memflags;
 	int err;
 
 	/* Arg will be cast to int, check it to avoid overflow */
@@ -1247,7 +1248,7 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
 	 * We need to make sure we don't get any errant requests while we're
 	 * reallocating the ->socks array.
 	 */
-	blk_mq_freeze_queue(nbd->disk->queue);
+	memflags = blk_mq_freeze_queue(nbd->disk->queue);
 
 	if (!netlink && !nbd->task_setup &&
 	    !test_bit(NBD_RT_BOUND, &config->runtime_flags))
@@ -1288,12 +1289,12 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
 	INIT_WORK(&nsock->work, nbd_pending_cmd_work);
 	socks[config->num_connections++] = nsock;
 	atomic_inc(&config->live_connections);
-	blk_mq_unfreeze_queue(nbd->disk->queue);
+	blk_mq_unfreeze_queue(nbd->disk->queue, memflags);
 
 	return 0;
 
 put_socket:
-	blk_mq_unfreeze_queue(nbd->disk->queue);
+	blk_mq_unfreeze_queue(nbd->disk->queue, memflags);
 	sockfd_put(sock);
 	return err;
 }
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 5b393e4a1ddfc..faafd7ff43d6e 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -7281,9 +7281,10 @@ static ssize_t do_rbd_remove(const char *buf, size_t count)
 		 * Prevent new IO from being queued and wait for existing
 		 * IO to complete/fail.
 		 */
-		blk_mq_freeze_queue(rbd_dev->disk->queue);
+		unsigned int memflags = blk_mq_freeze_queue(rbd_dev->disk->queue);
+
 		blk_mark_disk_dead(rbd_dev->disk);
-		blk_mq_unfreeze_queue(rbd_dev->disk->queue);
+		blk_mq_unfreeze_queue(rbd_dev->disk->queue, memflags);
 	}
 
 	del_gendisk(rbd_dev->disk);
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 88dcae6ec5751..05c4aee7f262a 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -1113,6 +1113,7 @@ static void vdc_requeue_inflight(struct vdc_port *port)
 static void vdc_queue_drain(struct vdc_port *port)
 {
 	struct request_queue *q = port->disk->queue;
+	unsigned int memflags;
 
 	/*
 	 * Mark the queue as draining, then freeze/quiesce to ensure
@@ -1121,12 +1122,12 @@ static void vdc_queue_drain(struct vdc_port *port)
 	port->drain = 1;
 	spin_unlock_irq(&port->vio.lock);
 
-	blk_mq_freeze_queue(q);
+	memflags = blk_mq_freeze_queue(q);
 	blk_mq_quiesce_queue(q);
 
 	spin_lock_irq(&port->vio.lock);
 	port->drain = 0;
-	blk_mq_unquiesce_queue(q);
+	blk_mq_unquiesce_queue(q, memflags);
 	blk_mq_unfreeze_queue(q);
 }
 
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 9914153b365b6..3aedcb5add61f 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -840,6 +840,7 @@ static int grab_drive(struct floppy_state *fs, enum swim_state state,
 static void release_drive(struct floppy_state *fs)
 {
 	struct request_queue *q = disks[fs->index]->queue;
+	unsigned int memflags;
 	unsigned long flags;
 
 	swim3_dbg("%s", "-> release drive\n");
@@ -848,10 +849,10 @@ static void release_drive(struct floppy_state *fs)
 	fs->state = idle;
 	spin_unlock_irqrestore(&swim3_lock, flags);
 
-	blk_mq_freeze_queue(q);
+	memflags = blk_mq_freeze_queue(q);
 	blk_mq_quiesce_queue(q);
 	blk_mq_unquiesce_queue(q);
-	blk_mq_unfreeze_queue(q);
+	blk_mq_unfreeze_queue(q, memflags);
 }
 
 static int fd_eject(struct floppy_state *fs)
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index bbaa26b523b8d..a4af39fc7ea28 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -1584,11 +1584,12 @@ static int virtblk_freeze(struct virtio_device *vdev)
 {
 	struct virtio_blk *vblk = vdev->priv;
 	struct request_queue *q = vblk->disk->queue;
+	unsigned int memflags;
 
 	/* Ensure no requests in virtqueues before deleting vqs. */
-	blk_mq_freeze_queue(q);
+	memflags = blk_mq_freeze_queue(q);
 	blk_mq_quiesce_queue_nowait(q);
-	blk_mq_unfreeze_queue(q);
+	blk_mq_unfreeze_queue(q, memflags);
 
 	/* Ensure we don't receive any more interrupts */
 	virtio_reset_device(vdev);
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index ee7e1d9089861..847c11542f024 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -404,6 +404,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old)
 {
 	unsigned long flags;
+	unsigned int memflags;
 
 	lockdep_assert_held(&mtd_table_mutex);
 
@@ -420,10 +421,10 @@ int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old)
 	spin_unlock_irqrestore(&old->queue_lock, flags);
 
 	/* freeze+quiesce queue to ensure all requests are flushed */
-	blk_mq_freeze_queue(old->rq);
+	memflags = blk_mq_freeze_queue(old->rq);
 	blk_mq_quiesce_queue(old->rq);
 	blk_mq_unquiesce_queue(old->rq);
-	blk_mq_unfreeze_queue(old->rq);
+	blk_mq_unfreeze_queue(old->rq, memflags);
 
 	/* If the device is currently open, tell trans driver to close it,
 		then put mtd device, and don't touch it again */
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 76b615d4d5b91..40046770f1bf0 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2132,15 +2132,16 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns,
 		struct nvme_ns_info *info)
 {
 	struct queue_limits lim;
+	unsigned int memflags;
 	int ret;
 
 	lim = queue_limits_start_update(ns->disk->queue);
 	nvme_set_ctrl_limits(ns->ctrl, &lim);
 
-	blk_mq_freeze_queue(ns->disk->queue);
+	memflags = blk_mq_freeze_queue(ns->disk->queue);
 	ret = queue_limits_commit_update(ns->disk->queue, &lim);
 	set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
-	blk_mq_unfreeze_queue(ns->disk->queue);
+	blk_mq_unfreeze_queue(ns->disk->queue, memflags);
 
 	/* Hide the block-interface for these devices */
 	if (!ret)
@@ -2155,6 +2156,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
 	struct nvme_id_ns_nvm *nvm = NULL;
 	struct nvme_zone_info zi = {};
 	struct nvme_id_ns *id;
+	unsigned int memflags;
 	sector_t capacity;
 	unsigned lbaf;
 	int ret;
@@ -2186,7 +2188,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
 
 	lim = queue_limits_start_update(ns->disk->queue);
 
-	blk_mq_freeze_queue(ns->disk->queue);
+	memflags = blk_mq_freeze_queue(ns->disk->queue);
 	ns->head->lba_shift = id->lbaf[lbaf].ds;
 	ns->head->nuse = le64_to_cpu(id->nuse);
 	capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze));
@@ -2219,7 +2221,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
 
 	ret = queue_limits_commit_update(ns->disk->queue, &lim);
 	if (ret) {
-		blk_mq_unfreeze_queue(ns->disk->queue);
+		blk_mq_unfreeze_queue(ns->disk->queue, memflags);
 		goto out;
 	}
 
@@ -2235,7 +2237,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
 		ns->head->features |= NVME_NS_DEAC;
 	set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
 	set_bit(NVME_NS_READY, &ns->flags);
-	blk_mq_unfreeze_queue(ns->disk->queue);
+	blk_mq_unfreeze_queue(ns->disk->queue, memflags);
 
 	if (blk_queue_is_zoned(ns->queue)) {
 		ret = blk_revalidate_disk_zones(ns->disk);
@@ -2291,9 +2293,10 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
 	if (!ret && nvme_ns_head_multipath(ns->head)) {
 		struct queue_limits *ns_lim = &ns->disk->queue->limits;
 		struct queue_limits lim;
+		unsigned int memflags;
 
 		lim = queue_limits_start_update(ns->head->disk->queue);
-		blk_mq_freeze_queue(ns->head->disk->queue);
+		memflags = blk_mq_freeze_queue(ns->head->disk->queue);
 		/*
 		 * queue_limits mixes values that are the hardware limitations
 		 * for bio splitting with what is the device configuration.
@@ -2325,7 +2328,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
 		set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
 		nvme_mpath_revalidate_paths(ns);
 
-		blk_mq_unfreeze_queue(ns->head->disk->queue);
+		blk_mq_unfreeze_queue(ns->head->disk->queue, memflags);
 	}
 
 	return ret;
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index a85d190942bdf..2a76355650830 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -60,7 +60,7 @@ void nvme_mpath_unfreeze(struct nvme_subsystem *subsys)
 	lockdep_assert_held(&subsys->lock);
 	list_for_each_entry(h, &subsys->nsheads, entry)
 		if (h->disk)
-			blk_mq_unfreeze_queue(h->disk->queue);
+			blk_mq_unfreeze_queue_nomemrestore(h->disk->queue);
 }
 
 void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 4411426a78948..b86e259516a7e 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2723,6 +2723,7 @@ int
 scsi_device_quiesce(struct scsi_device *sdev)
 {
 	struct request_queue *q = sdev->request_queue;
+	unsigned int memflags;
 	int err;
 
 	/*
@@ -2737,7 +2738,7 @@ scsi_device_quiesce(struct scsi_device *sdev)
 
 	blk_set_pm_only(q);
 
-	blk_mq_freeze_queue(q);
+	memflags = blk_mq_freeze_queue(q);
 	/*
 	 * Ensure that the effect of blk_set_pm_only() will be visible
 	 * for percpu_ref_tryget() callers that occur after the queue
@@ -2745,7 +2746,7 @@ scsi_device_quiesce(struct scsi_device *sdev)
 	 * was called. See also https://lwn.net/Articles/573497/.
 	 */
 	synchronize_rcu();
-	blk_mq_unfreeze_queue(q);
+	blk_mq_unfreeze_queue(q, memflags);
 
 	mutex_lock(&sdev->state_mutex);
 	err = scsi_device_set_state(sdev, SDEV_QUIESCE);
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 042329b74c6e6..312d782139548 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -220,6 +220,7 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev,
 	int new_shift = sbitmap_calculate_shift(depth);
 	bool need_alloc = !sdev->budget_map.map;
 	bool need_free = false;
+	unsigned int memflags;
 	int ret;
 	struct sbitmap sb_backup;
 
@@ -240,7 +241,7 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev,
 	 * and here disk isn't added yet, so freezing is pretty fast
 	 */
 	if (need_free) {
-		blk_mq_freeze_queue(sdev->request_queue);
+		memflags = blk_mq_freeze_queue(sdev->request_queue);
 		sb_backup = sdev->budget_map;
 	}
 	ret = sbitmap_init_node(&sdev->budget_map,
@@ -256,7 +257,7 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev,
 		else
 			sbitmap_free(&sb_backup);
 		ret = 0;
-		blk_mq_unfreeze_queue(sdev->request_queue);
+		blk_mq_unfreeze_queue(sdev->request_queue, memflags);
 	}
 	return ret;
 }
diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c
index 796e37a1d859f..3438269a54405 100644
--- a/drivers/ufs/core/ufs-sysfs.c
+++ b/drivers/ufs/core/ufs-sysfs.c
@@ -1439,6 +1439,7 @@ static ssize_t max_number_of_rtt_store(struct device *dev,
 	struct ufs_hba *hba = dev_get_drvdata(dev);
 	struct ufs_dev_info *dev_info = &hba->dev_info;
 	struct scsi_device *sdev;
+	unsigned int memflags;
 	unsigned int rtt;
 	int ret;
 
@@ -1458,14 +1459,16 @@ static ssize_t max_number_of_rtt_store(struct device *dev,
 
 	ufshcd_rpm_get_sync(hba);
 
+	memflags = memalloc_noio_save();
 	shost_for_each_device(sdev, hba->host)
-		blk_mq_freeze_queue(sdev->request_queue);
+		blk_mq_freeze_queue_nomemsave(sdev->request_queue);
 
 	ret = ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_WRITE_ATTR,
 		QUERY_ATTR_IDN_MAX_NUM_OF_RTT, 0, 0, &rtt);
 
 	shost_for_each_device(sdev, hba->host)
-		blk_mq_unfreeze_queue(sdev->request_queue);
+		blk_mq_unfreeze_queue_nomemrestore(sdev->request_queue);
+	memalloc_noio_restore(memflags);
 
 	ufshcd_rpm_put_sync(hba);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a0a9007cc1e36..9ebb53f031cdb 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -900,8 +900,22 @@ void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs);
 void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
 		busy_tag_iter_fn *fn, void *priv);
 void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset);
-void blk_mq_freeze_queue(struct request_queue *q);
-void blk_mq_unfreeze_queue(struct request_queue *q);
+void blk_mq_freeze_queue_nomemsave(struct request_queue *q);
+void blk_mq_unfreeze_queue_nomemrestore(struct request_queue *q);
+static inline unsigned int __must_check
+blk_mq_freeze_queue(struct request_queue *q)
+{
+	unsigned int memflags = memalloc_noio_save();
+
+	blk_mq_freeze_queue_nomemsave(q);
+	return memflags;
+}
+static inline void
+blk_mq_unfreeze_queue(struct request_queue *q, unsigned int memflags)
+{
+	blk_mq_unfreeze_queue_nomemrestore(q);
+	memalloc_noio_restore(memflags);
+}
 void blk_freeze_queue_start(struct request_queue *q);
 void blk_mq_freeze_queue_wait(struct request_queue *q);
 int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,

From 743bbd93cf29f653fae0e1416a31f03231689911 Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Thu, 23 Jan 2025 16:01:16 +0100
Subject: [PATCH 293/368] ice: put Rx buffers after being done with current
 frame

Introduce a new helper ice_put_rx_mbuf() that will go through gathered
frags from current frame and will call ice_put_rx_buf() on them. Current
logic that was supposed to simplify and optimize the driver where we go
through a batch of all buffers processed in current NAPI instance turned
out to be broken for jumbo frames and very heavy load that was coming
from both multi-thread iperf and nginx/wrk pair between server and
client. The delay introduced by approach that we are dropping is simply
too big and we need to take the decision regarding page
recycling/releasing as quick as we can.

While at it, address an error path of ice_add_xdp_frag() - we were
missing buffer putting from day 1 there.

As a nice side effect we get rid of annoying and repetitive three-liner:

	xdp->data = NULL;
	rx_ring->first_desc = ntc;
	rx_ring->nr_frags = 0;

by embedding it within introduced routine.

Fixes: 1dc1a7e7f410 ("ice: Centrallize Rx buffer recycling")
Reported-and-tested-by: Xu Du <xudu@redhat.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Co-developed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Chandan Kumar Rout <chandanx.rout@intel.com> (A Contingent Worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_txrx.c | 79 ++++++++++++++---------
 1 file changed, 50 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 5d2d7736fd5f1..e173d9c989883 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1103,6 +1103,49 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf)
 	rx_buf->page = NULL;
 }
 
+/**
+ * ice_put_rx_mbuf - ice_put_rx_buf() caller, for all frame frags
+ * @rx_ring: Rx ring with all the auxiliary data
+ * @xdp: XDP buffer carrying linear + frags part
+ * @xdp_xmit: XDP_TX/XDP_REDIRECT verdict storage
+ * @ntc: a current next_to_clean value to be stored at rx_ring
+ *
+ * Walk through gathered fragments and satisfy internal page
+ * recycle mechanism; we take here an action related to verdict
+ * returned by XDP program;
+ */
+static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
+			    u32 *xdp_xmit, u32 ntc)
+{
+	u32 nr_frags = rx_ring->nr_frags + 1;
+	u32 idx = rx_ring->first_desc;
+	u32 cnt = rx_ring->count;
+	struct ice_rx_buf *buf;
+	int i;
+
+	for (i = 0; i < nr_frags; i++) {
+		buf = &rx_ring->rx_buf[idx];
+
+		if (buf->act & (ICE_XDP_TX | ICE_XDP_REDIR)) {
+			ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
+			*xdp_xmit |= buf->act;
+		} else if (buf->act & ICE_XDP_CONSUMED) {
+			buf->pagecnt_bias++;
+		} else if (buf->act == ICE_XDP_PASS) {
+			ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
+		}
+
+		ice_put_rx_buf(rx_ring, buf);
+
+		if (++idx == cnt)
+			idx = 0;
+	}
+
+	xdp->data = NULL;
+	rx_ring->first_desc = ntc;
+	rx_ring->nr_frags = 0;
+}
+
 /**
  * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
  * @rx_ring: Rx descriptor ring to transact packets on
@@ -1120,7 +1163,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
 	unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
 	unsigned int offset = rx_ring->rx_offset;
 	struct xdp_buff *xdp = &rx_ring->xdp;
-	u32 cached_ntc = rx_ring->first_desc;
 	struct ice_tx_ring *xdp_ring = NULL;
 	struct bpf_prog *xdp_prog = NULL;
 	u32 ntc = rx_ring->next_to_clean;
@@ -1128,7 +1170,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
 	u32 xdp_xmit = 0;
 	u32 cached_ntu;
 	bool failure;
-	u32 first;
 
 	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
 	if (xdp_prog) {
@@ -1190,6 +1231,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
 			xdp_prepare_buff(xdp, hard_start, offset, size, !!offset);
 			xdp_buff_clear_frags_flag(xdp);
 		} else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) {
+			ice_put_rx_mbuf(rx_ring, xdp, NULL, ntc);
 			break;
 		}
 		if (++ntc == cnt)
@@ -1205,9 +1247,8 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
 		total_rx_bytes += xdp_get_buff_len(xdp);
 		total_rx_pkts++;
 
-		xdp->data = NULL;
-		rx_ring->first_desc = ntc;
-		rx_ring->nr_frags = 0;
+		ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc);
+
 		continue;
 construct_skb:
 		if (likely(ice_ring_uses_build_skb(rx_ring)))
@@ -1221,14 +1262,11 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
 			if (unlikely(xdp_buff_has_frags(xdp)))
 				ice_set_rx_bufs_act(xdp, rx_ring,
 						    ICE_XDP_CONSUMED);
-			xdp->data = NULL;
-			rx_ring->first_desc = ntc;
-			rx_ring->nr_frags = 0;
-			break;
 		}
-		xdp->data = NULL;
-		rx_ring->first_desc = ntc;
-		rx_ring->nr_frags = 0;
+		ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc);
+
+		if (!skb)
+			break;
 
 		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
 		if (unlikely(ice_test_staterr(rx_desc->wb.status_error0,
@@ -1257,23 +1295,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
 		total_rx_pkts++;
 	}
 
-	first = rx_ring->first_desc;
-	while (cached_ntc != first) {
-		struct ice_rx_buf *buf = &rx_ring->rx_buf[cached_ntc];
-
-		if (buf->act & (ICE_XDP_TX | ICE_XDP_REDIR)) {
-			ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
-			xdp_xmit |= buf->act;
-		} else if (buf->act & ICE_XDP_CONSUMED) {
-			buf->pagecnt_bias++;
-		} else if (buf->act == ICE_XDP_PASS) {
-			ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
-		}
-
-		ice_put_rx_buf(rx_ring, buf);
-		if (++cached_ntc >= cnt)
-			cached_ntc = 0;
-	}
 	rx_ring->next_to_clean = ntc;
 	/* return up to cleaned_count buffers to hardware */
 	failure = ice_alloc_rx_bufs(rx_ring, ICE_RX_DESC_UNUSED(rx_ring));

From 11c4aa074d547d825b19cd8d9f288254d89d805c Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Thu, 23 Jan 2025 16:01:17 +0100
Subject: [PATCH 294/368] ice: gather page_count()'s of each frag right before
 XDP prog call

If we store the pgcnt on few fragments while being in the middle of
gathering the whole frame and we stumbled upon DD bit not being set, we
terminate the NAPI Rx processing loop and come back later on. Then on
next NAPI execution we work on previously stored pgcnt.

Imagine that second half of page was used actively by networking stack
and by the time we came back, stack is not busy with this page anymore
and decremented the refcnt. The page reuse algorithm in this case should
be good to reuse the page but given the old refcnt it will not do so and
attempt to release the page via page_frag_cache_drain() with
pagecnt_bias used as an arg. This in turn will result in negative refcnt
on struct page, which was initially observed by Xu Du.

Therefore, move the page count storage from ice_get_rx_buf() to a place
where we are sure that whole frame has been collected, but before
calling XDP program as it internally can also change the page count of
fragments belonging to xdp_buff.

Fixes: ac0753391195 ("ice: Store page count inside ice_rx_buf")
Reported-and-tested-by: Xu Du <xudu@redhat.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Co-developed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Chandan Kumar Rout <chandanx.rout@intel.com> (A Contingent Worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_txrx.c | 27 ++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index e173d9c989883..cf46bcf143b4b 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -924,7 +924,6 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size,
 	struct ice_rx_buf *rx_buf;
 
 	rx_buf = &rx_ring->rx_buf[ntc];
-	rx_buf->pgcnt = page_count(rx_buf->page);
 	prefetchw(rx_buf->page);
 
 	if (!size)
@@ -940,6 +939,31 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size,
 	return rx_buf;
 }
 
+/**
+ * ice_get_pgcnts - grab page_count() for gathered fragments
+ * @rx_ring: Rx descriptor ring to store the page counts on
+ *
+ * This function is intended to be called right before running XDP
+ * program so that the page recycling mechanism will be able to take
+ * a correct decision regarding underlying pages; this is done in such
+ * way as XDP program can change the refcount of page
+ */
+static void ice_get_pgcnts(struct ice_rx_ring *rx_ring)
+{
+	u32 nr_frags = rx_ring->nr_frags + 1;
+	u32 idx = rx_ring->first_desc;
+	struct ice_rx_buf *rx_buf;
+	u32 cnt = rx_ring->count;
+
+	for (int i = 0; i < nr_frags; i++) {
+		rx_buf = &rx_ring->rx_buf[idx];
+		rx_buf->pgcnt = page_count(rx_buf->page);
+
+		if (++idx == cnt)
+			idx = 0;
+	}
+}
+
 /**
  * ice_build_skb - Build skb around an existing buffer
  * @rx_ring: Rx descriptor ring to transact packets on
@@ -1241,6 +1265,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
 		if (ice_is_non_eop(rx_ring, rx_desc))
 			continue;
 
+		ice_get_pgcnts(rx_ring);
 		ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf, rx_desc);
 		if (rx_buf->act == ICE_XDP_PASS)
 			goto construct_skb;

From 468a1952df78f65c5991b7ac885c8b5b7dd87bab Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Thu, 23 Jan 2025 16:01:18 +0100
Subject: [PATCH 295/368] ice: stop storing XDP verdict within ice_rx_buf

Idea behind having ice_rx_buf::act was to simplify and speed up the Rx
data path by walking through buffers that were representing cleaned HW
Rx descriptors. Since it caused us a major headache recently and we
rolled back to old approach that 'puts' Rx buffers right after running
XDP prog/creating skb, this is useless now and should be removed.

Get rid of ice_rx_buf::act and related logic. We still need to take care
of a corner case where XDP program releases a particular fragment.

Make ice_run_xdp() to return its result and use it within
ice_put_rx_mbuf().

Fixes: 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side")
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Chandan Kumar Rout <chandanx.rout@intel.com> (A Contingent Worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_txrx.c     | 62 +++++++++++--------
 drivers/net/ethernet/intel/ice/ice_txrx.h     |  1 -
 drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 43 -------------
 3 files changed, 36 insertions(+), 70 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index cf46bcf143b4b..9c9ea4c1b93b7 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -527,15 +527,14 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring)
  * @xdp: xdp_buff used as input to the XDP program
  * @xdp_prog: XDP program to run
  * @xdp_ring: ring to be used for XDP_TX action
- * @rx_buf: Rx buffer to store the XDP action
  * @eop_desc: Last descriptor in packet to read metadata from
  *
  * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
  */
-static void
+static u32
 ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
 	    struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
-	    struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc)
+	    union ice_32b_rx_flex_desc *eop_desc)
 {
 	unsigned int ret = ICE_XDP_PASS;
 	u32 act;
@@ -574,7 +573,7 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
 		ret = ICE_XDP_CONSUMED;
 	}
 exit:
-	ice_set_rx_bufs_act(xdp, rx_ring, ret);
+	return ret;
 }
 
 /**
@@ -860,10 +859,8 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
 		xdp_buff_set_frags_flag(xdp);
 	}
 
-	if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) {
-		ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED);
+	if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS))
 		return -ENOMEM;
-	}
 
 	__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page,
 				   rx_buf->page_offset, size);
@@ -1075,12 +1072,12 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
 				rx_buf->page_offset + headlen, size,
 				xdp->frame_sz);
 	} else {
-		/* buffer is unused, change the act that should be taken later
-		 * on; data was copied onto skb's linear part so there's no
+		/* buffer is unused, restore biased page count in Rx buffer;
+		 * data was copied onto skb's linear part so there's no
 		 * need for adjusting page offset and we can reuse this buffer
 		 * as-is
 		 */
-		rx_buf->act = ICE_SKB_CONSUMED;
+		rx_buf->pagecnt_bias++;
 	}
 
 	if (unlikely(xdp_buff_has_frags(xdp))) {
@@ -1133,29 +1130,34 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf)
  * @xdp: XDP buffer carrying linear + frags part
  * @xdp_xmit: XDP_TX/XDP_REDIRECT verdict storage
  * @ntc: a current next_to_clean value to be stored at rx_ring
+ * @verdict: return code from XDP program execution
  *
  * Walk through gathered fragments and satisfy internal page
  * recycle mechanism; we take here an action related to verdict
  * returned by XDP program;
  */
 static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
-			    u32 *xdp_xmit, u32 ntc)
+			    u32 *xdp_xmit, u32 ntc, u32 verdict)
 {
 	u32 nr_frags = rx_ring->nr_frags + 1;
 	u32 idx = rx_ring->first_desc;
 	u32 cnt = rx_ring->count;
+	u32 post_xdp_frags = 1;
 	struct ice_rx_buf *buf;
 	int i;
 
-	for (i = 0; i < nr_frags; i++) {
+	if (unlikely(xdp_buff_has_frags(xdp)))
+		post_xdp_frags += xdp_get_shared_info_from_buff(xdp)->nr_frags;
+
+	for (i = 0; i < post_xdp_frags; i++) {
 		buf = &rx_ring->rx_buf[idx];
 
-		if (buf->act & (ICE_XDP_TX | ICE_XDP_REDIR)) {
+		if (verdict & (ICE_XDP_TX | ICE_XDP_REDIR)) {
 			ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
-			*xdp_xmit |= buf->act;
-		} else if (buf->act & ICE_XDP_CONSUMED) {
+			*xdp_xmit |= verdict;
+		} else if (verdict & ICE_XDP_CONSUMED) {
 			buf->pagecnt_bias++;
-		} else if (buf->act == ICE_XDP_PASS) {
+		} else if (verdict == ICE_XDP_PASS) {
 			ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
 		}
 
@@ -1164,6 +1166,17 @@ static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
 		if (++idx == cnt)
 			idx = 0;
 	}
+	/* handle buffers that represented frags released by XDP prog;
+	 * for these we keep pagecnt_bias as-is; refcount from struct page
+	 * has been decremented within XDP prog and we do not have to increase
+	 * the biased refcnt
+	 */
+	for (; i < nr_frags; i++) {
+		buf = &rx_ring->rx_buf[idx];
+		ice_put_rx_buf(rx_ring, buf);
+		if (++idx == cnt)
+			idx = 0;
+	}
 
 	xdp->data = NULL;
 	rx_ring->first_desc = ntc;
@@ -1190,9 +1203,9 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
 	struct ice_tx_ring *xdp_ring = NULL;
 	struct bpf_prog *xdp_prog = NULL;
 	u32 ntc = rx_ring->next_to_clean;
+	u32 cached_ntu, xdp_verdict;
 	u32 cnt = rx_ring->count;
 	u32 xdp_xmit = 0;
-	u32 cached_ntu;
 	bool failure;
 
 	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
@@ -1255,7 +1268,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
 			xdp_prepare_buff(xdp, hard_start, offset, size, !!offset);
 			xdp_buff_clear_frags_flag(xdp);
 		} else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) {
-			ice_put_rx_mbuf(rx_ring, xdp, NULL, ntc);
+			ice_put_rx_mbuf(rx_ring, xdp, NULL, ntc, ICE_XDP_CONSUMED);
 			break;
 		}
 		if (++ntc == cnt)
@@ -1266,13 +1279,13 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
 			continue;
 
 		ice_get_pgcnts(rx_ring);
-		ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf, rx_desc);
-		if (rx_buf->act == ICE_XDP_PASS)
+		xdp_verdict = ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_desc);
+		if (xdp_verdict == ICE_XDP_PASS)
 			goto construct_skb;
 		total_rx_bytes += xdp_get_buff_len(xdp);
 		total_rx_pkts++;
 
-		ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc);
+		ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict);
 
 		continue;
 construct_skb:
@@ -1283,12 +1296,9 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
 		/* exit if we failed to retrieve a buffer */
 		if (!skb) {
 			rx_ring->ring_stats->rx_stats.alloc_page_failed++;
-			rx_buf->act = ICE_XDP_CONSUMED;
-			if (unlikely(xdp_buff_has_frags(xdp)))
-				ice_set_rx_bufs_act(xdp, rx_ring,
-						    ICE_XDP_CONSUMED);
+			xdp_verdict = ICE_XDP_CONSUMED;
 		}
-		ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc);
+		ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict);
 
 		if (!skb)
 			break;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index cb347c852ba9e..806bce701df34 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -201,7 +201,6 @@ struct ice_rx_buf {
 	struct page *page;
 	unsigned int page_offset;
 	unsigned int pgcnt;
-	unsigned int act;
 	unsigned int pagecnt_bias;
 };
 
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
index 79f960c6680d1..6cf32b4041275 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
@@ -5,49 +5,6 @@
 #define _ICE_TXRX_LIB_H_
 #include "ice.h"
 
-/**
- * ice_set_rx_bufs_act - propagate Rx buffer action to frags
- * @xdp: XDP buffer representing frame (linear and frags part)
- * @rx_ring: Rx ring struct
- * act: action to store onto Rx buffers related to XDP buffer parts
- *
- * Set action that should be taken before putting Rx buffer from first frag
- * to the last.
- */
-static inline void
-ice_set_rx_bufs_act(struct xdp_buff *xdp, const struct ice_rx_ring *rx_ring,
-		    const unsigned int act)
-{
-	u32 sinfo_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags;
-	u32 nr_frags = rx_ring->nr_frags + 1;
-	u32 idx = rx_ring->first_desc;
-	u32 cnt = rx_ring->count;
-	struct ice_rx_buf *buf;
-
-	for (int i = 0; i < nr_frags; i++) {
-		buf = &rx_ring->rx_buf[idx];
-		buf->act = act;
-
-		if (++idx == cnt)
-			idx = 0;
-	}
-
-	/* adjust pagecnt_bias on frags freed by XDP prog */
-	if (sinfo_frags < rx_ring->nr_frags && act == ICE_XDP_CONSUMED) {
-		u32 delta = rx_ring->nr_frags - sinfo_frags;
-
-		while (delta) {
-			if (idx == 0)
-				idx = cnt - 1;
-			else
-				idx--;
-			buf = &rx_ring->rx_buf[idx];
-			buf->pagecnt_bias--;
-			delta--;
-		}
-	}
-}
-
 /**
  * ice_test_staterr - tests bits in Rx descriptor status and error fields
  * @status_err_n: Rx descriptor status_error0 or status_error1 bits

From 45a99d5d117300eb84eceaa312bb3c3262f8c85b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Mon, 16 Sep 2024 20:21:52 +0200
Subject: [PATCH 296/368] cifs: Add support for creating native Windows sockets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Native Windows sockets created by WinSock on Windows 10 April 2018 Update
(version 1803) or Windows Server 2019 (version 1809) or later versions is
reparse point with IO_REPARSE_TAG_AF_UNIX tag, with empty reparse point
data buffer and without any EAs.

Create AF_UNIX sockets in this native format if -o nonativesocket was not
specified.

This change makes AF_UNIX sockets created by Linux CIFS client compatible
with AF_UNIX sockets created by Windows applications on NTFS volumes.

Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/cifsfs.c     |  4 ++++
 fs/smb/client/connect.c    |  2 ++
 fs/smb/client/fs_context.c |  5 +++++
 fs/smb/client/fs_context.h |  2 ++
 fs/smb/client/reparse.c    | 32 ++++++++++++++++++++++++++++++++
 5 files changed, 45 insertions(+)

diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index f2c852c9d6a11..6a3bd652d251d 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -715,6 +715,10 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
 					    cifs_sb->ctx->backupgid));
 	seq_show_option(s, "reparse",
 			cifs_reparse_type_str(cifs_sb->ctx->reparse_type));
+	if (cifs_sb->ctx->nonativesocket)
+		seq_puts(s, ",nonativesocket");
+	else
+		seq_puts(s, ",nativesocket");
 	seq_show_option(s, "symlink",
 			cifs_symlink_type_str(get_cifs_symlink_type(cifs_sb)));
 
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index ebd20f48f6aac..f917de020dd5d 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -2849,6 +2849,8 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data)
 		return 0;
 	if (old->ctx->reparse_type != new->ctx->reparse_type)
 		return 0;
+	if (old->ctx->nonativesocket != new->ctx->nonativesocket)
+		return 0;
 	if (old->ctx->symlink_type != new->ctx->symlink_type)
 		return 0;
 
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index 821eb149e4b88..e9b286d9a7ba3 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -133,6 +133,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
 	fsparam_flag("rootfs", Opt_rootfs),
 	fsparam_flag("compress", Opt_compress),
 	fsparam_flag("witness", Opt_witness),
+	fsparam_flag_no("nativesocket", Opt_nativesocket),
 
 	/* Mount options which take uid or gid */
 	fsparam_uid("backupuid", Opt_backupuid),
@@ -1784,6 +1785,9 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
 		if (parse_reparse_flavor(fc, param->string, ctx))
 			goto cifs_parse_mount_err;
 		break;
+	case Opt_nativesocket:
+		ctx->nonativesocket = result.negated;
+		break;
 	case Opt_symlink:
 		if (parse_symlink_flavor(fc, param->string, ctx))
 			goto cifs_parse_mount_err;
@@ -1918,6 +1922,7 @@ int smb3_init_fs_context(struct fs_context *fc)
 	ctx->retrans = 1;
 	ctx->reparse_type = CIFS_REPARSE_TYPE_DEFAULT;
 	ctx->symlink_type = CIFS_SYMLINK_TYPE_DEFAULT;
+	ctx->nonativesocket = 0;
 
 /*
  *	short int override_uid = -1;
diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h
index 2ccdda350267f..881bfc08667e7 100644
--- a/fs/smb/client/fs_context.h
+++ b/fs/smb/client/fs_context.h
@@ -179,6 +179,7 @@ enum cifs_param {
 	Opt_cache,
 	Opt_reparse,
 	Opt_upcalltarget,
+	Opt_nativesocket,
 	Opt_symlink,
 	Opt_symlinkroot,
 
@@ -310,6 +311,7 @@ struct smb3_fs_context {
 	bool dfs_automount:1; /* set for dfs automount only */
 	enum cifs_reparse_type reparse_type;
 	enum cifs_symlink_type symlink_type;
+	bool nonativesocket:1;
 	bool dfs_conn:1; /* set for dfs mounts */
 	char *dns_dom;
 	char *symlinkroot; /* top level directory for native SMB symlinks in absolute format */
diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c
index 24a5f563df26a..34bf4e3f28d98 100644
--- a/fs/smb/client/reparse.c
+++ b/fs/smb/client/reparse.c
@@ -378,6 +378,35 @@ static int detect_directory_symlink_target(struct cifs_sb_info *cifs_sb,
 	return 0;
 }
 
+static int create_native_socket(const unsigned int xid, struct inode *inode,
+				struct dentry *dentry, struct cifs_tcon *tcon,
+				const char *full_path)
+{
+	struct reparse_data_buffer buf = {
+		.ReparseTag = cpu_to_le32(IO_REPARSE_TAG_AF_UNIX),
+		.ReparseDataLength = cpu_to_le16(0),
+	};
+	struct cifs_open_info_data data = {
+		.reparse_point = true,
+		.reparse = { .tag = IO_REPARSE_TAG_AF_UNIX, .buf = &buf, },
+	};
+	struct kvec iov = {
+		.iov_base = &buf,
+		.iov_len = sizeof(buf),
+	};
+	struct inode *new;
+	int rc = 0;
+
+	new = smb2_get_reparse_inode(&data, inode->i_sb, xid,
+				     tcon, full_path, false, &iov, NULL);
+	if (!IS_ERR(new))
+		d_instantiate(dentry, new);
+	else
+		rc = PTR_ERR(new);
+	cifs_free_open_info(&data);
+	return rc;
+}
+
 static int nfs_set_reparse_buf(struct reparse_nfs_data_buffer *buf,
 			       mode_t mode, dev_t dev,
 			       struct kvec *iov)
@@ -601,6 +630,9 @@ int smb2_mknod_reparse(unsigned int xid, struct inode *inode,
 {
 	struct smb3_fs_context *ctx = CIFS_SB(inode->i_sb)->ctx;
 
+	if (S_ISSOCK(mode) && !ctx->nonativesocket && ctx->reparse_type != CIFS_REPARSE_TYPE_NONE)
+		return create_native_socket(xid, inode, dentry, tcon, full_path);
+
 	switch (ctx->reparse_type) {
 	case CIFS_REPARSE_TYPE_NFS:
 		return mknod_nfs(xid, inode, dentry, tcon, full_path, mode, dev, NULL);

From 071b8a67a8b2e611e837dfa342a883183a19c190 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Fri, 13 Sep 2024 11:42:59 +0200
Subject: [PATCH 297/368] cifs: Add support for creating NFS-style symlinks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CIFS client is currently able to parse NFS-style symlinks, but is not able
to create them. This functionality is useful when the mounted SMB share is
used also by Windows NFS server (on Windows Server 2012 or new). It allows
interop of symlinks between SMB share mounted by Linux CIFS client and same
export from Windows NFS server mounted by some NFS client.

New symlinks would be created in NFS-style only in case the mount option
-o reparse=nfs is specified, which is not by default. So default CIFS
mounts are not affected by this change.

Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/reparse.c | 47 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 39 insertions(+), 8 deletions(-)

diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c
index 34bf4e3f28d98..4cdaa83a215c0 100644
--- a/fs/smb/client/reparse.c
+++ b/fs/smb/client/reparse.c
@@ -409,6 +409,8 @@ static int create_native_socket(const unsigned int xid, struct inode *inode,
 
 static int nfs_set_reparse_buf(struct reparse_nfs_data_buffer *buf,
 			       mode_t mode, dev_t dev,
+			       __le16 *symname_utf16,
+			       int symname_utf16_len,
 			       struct kvec *iov)
 {
 	u64 type;
@@ -419,13 +421,18 @@ static int nfs_set_reparse_buf(struct reparse_nfs_data_buffer *buf,
 	switch ((type = reparse_mode_nfs_type(mode))) {
 	case NFS_SPECFILE_BLK:
 	case NFS_SPECFILE_CHR:
-		dlen = sizeof(__le64);
+		dlen = 2 * sizeof(__le32);
+		((__le32 *)buf->DataBuffer)[0] = cpu_to_le32(MAJOR(dev));
+		((__le32 *)buf->DataBuffer)[1] = cpu_to_le32(MINOR(dev));
+		break;
+	case NFS_SPECFILE_LNK:
+		dlen = symname_utf16_len;
+		memcpy(buf->DataBuffer, symname_utf16, symname_utf16_len);
 		break;
 	case NFS_SPECFILE_FIFO:
 	case NFS_SPECFILE_SOCK:
 		dlen = 0;
 		break;
-	case NFS_SPECFILE_LNK: /* TODO: add support for NFS symlinks */
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -435,8 +442,6 @@ static int nfs_set_reparse_buf(struct reparse_nfs_data_buffer *buf,
 	buf->InodeType = cpu_to_le64(type);
 	buf->ReparseDataLength = cpu_to_le16(len + dlen -
 					     sizeof(struct reparse_data_buffer));
-	*(__le64 *)buf->DataBuffer = cpu_to_le64(((u64)MINOR(dev) << 32) |
-						 MAJOR(dev));
 	iov->iov_base = buf;
 	iov->iov_len = len + dlen;
 	return 0;
@@ -447,21 +452,42 @@ static int mknod_nfs(unsigned int xid, struct inode *inode,
 		     const char *full_path, umode_t mode, dev_t dev,
 		     const char *symname)
 {
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 	struct cifs_open_info_data data;
-	struct reparse_nfs_data_buffer *p;
+	struct reparse_nfs_data_buffer *p = NULL;
+	__le16 *symname_utf16 = NULL;
+	int symname_utf16_len = 0;
 	struct inode *new;
 	struct kvec iov;
 	__u8 buf[sizeof(*p) + sizeof(__le64)];
 	int rc;
 
-	p = (struct reparse_nfs_data_buffer *)buf;
-	rc = nfs_set_reparse_buf(p, mode, dev, &iov);
+	if (S_ISLNK(mode)) {
+		symname_utf16 = cifs_strndup_to_utf16(symname, strlen(symname),
+						      &symname_utf16_len,
+						      cifs_sb->local_nls,
+						      NO_MAP_UNI_RSVD);
+		if (!symname_utf16) {
+			rc = -ENOMEM;
+			goto out;
+		}
+		symname_utf16_len -= 2; /* symlink is without trailing wide-nul */
+		p = kzalloc(sizeof(*p) + symname_utf16_len, GFP_KERNEL);
+		if (!p) {
+			rc = -ENOMEM;
+			goto out;
+		}
+	} else {
+		p = (struct reparse_nfs_data_buffer *)buf;
+	}
+	rc = nfs_set_reparse_buf(p, mode, dev, symname_utf16, symname_utf16_len, &iov);
 	if (rc)
-		return rc;
+		goto out;
 
 	data = (struct cifs_open_info_data) {
 		.reparse_point = true,
 		.reparse = { .tag = IO_REPARSE_TAG_NFS, .buf = (struct reparse_data_buffer *)p, },
+		.symlink_target = kstrdup(symname, GFP_KERNEL),
 	};
 
 	new = smb2_get_reparse_inode(&data, inode->i_sb, xid,
@@ -471,6 +497,11 @@ static int mknod_nfs(unsigned int xid, struct inode *inode,
 	else
 		rc = PTR_ERR(new);
 	cifs_free_open_info(&data);
+out:
+	if (S_ISLNK(mode)) {
+		kfree(symname_utf16);
+		kfree(p);
+	}
 	return rc;
 }
 

From 021840c1426c012a812f8b8d9413f3cf9d3e0b9b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Sun, 29 Dec 2024 15:31:05 +0100
Subject: [PATCH 298/368] cifs: Fix struct FILE_ALL_INFO
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

struct FILE_ALL_INFO for level 263 (0x107) used by QPathInfo does not have
any IndexNumber, AccessFlags, IndexNumber1, CurrentByteOffset, Mode or
AlignmentRequirement members. So remove all of them.

Also adjust code in move_cifs_info_to_smb2() function which converts struct
FILE_ALL_INFO to struct smb2_file_all_info.

Fixed content of struct FILE_ALL_INFO was verified that is correct against:
* [MS-CIFS] section 2.2.8.3.10 SMB_QUERY_FILE_ALL_INFO
* Samba server implementation of trans2 query file/path for level 263
* Packet structure tests against Windows SMB servers

This change fixes CIFSSMBQFileInfo() and CIFSSMBQPathInfo() functions which
directly copy received FILE_ALL_INFO network buffers into kernel structures
of FILE_ALL_INFO type.

struct FILE_ALL_INFO is the response structure returned by the SMB server.
So the incorrect definition of this structure can lead to returning bogus
information in stat() call.

Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/cifsglob.h | 12 +++++++-----
 fs/smb/client/cifspdu.h  |  6 ------
 2 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index ee9754fad3e8a..5ba6b46fe9d1e 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -2203,11 +2203,13 @@ static inline size_t ntlmssp_workstation_name_size(const struct cifs_ses *ses)
 
 static inline void move_cifs_info_to_smb2(struct smb2_file_all_info *dst, const FILE_ALL_INFO *src)
 {
-	memcpy(dst, src, (size_t)((u8 *)&src->AccessFlags - (u8 *)src));
-	dst->AccessFlags = src->AccessFlags;
-	dst->CurrentByteOffset = src->CurrentByteOffset;
-	dst->Mode = src->Mode;
-	dst->AlignmentRequirement = src->AlignmentRequirement;
+	memcpy(dst, src, (size_t)((u8 *)&src->EASize - (u8 *)src));
+	dst->IndexNumber = 0;
+	dst->EASize = src->EASize;
+	dst->AccessFlags = 0;
+	dst->CurrentByteOffset = 0;
+	dst->Mode = 0;
+	dst->AlignmentRequirement = 0;
 	dst->FileNameLength = src->FileNameLength;
 }
 
diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h
index 84743f3d7c512..48d0d6f439cf4 100644
--- a/fs/smb/client/cifspdu.h
+++ b/fs/smb/client/cifspdu.h
@@ -2290,13 +2290,7 @@ typedef struct { /* data block encoding of response to level 263 QPathInfo */
 	__u8 DeletePending;
 	__u8 Directory;
 	__u16 Pad2;
-	__le64 IndexNumber;
 	__le32 EASize;
-	__le32 AccessFlags;
-	__u64 IndexNumber1;
-	__le64 CurrentByteOffset;
-	__le32 Mode;
-	__le32 AlignmentRequirement;
 	__le32 FileNameLength;
 	union {
 		char __pad;

From eea5119fa5979c350af5783a8148eacdd4219715 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Tue, 28 Jan 2025 01:04:23 -0600
Subject: [PATCH 299/368] smb3: add support for IAKerb

There are now more servers which advertise support for IAKerb (passthrough
Kerberos authentication via proxy).  IAKerb is a public extension industry
standard Kerberos protocol that allows a client without line-of-sight
to a Domain Controller to authenticate. There can be cases where we
would fail to mount if the server only advertises the OID for IAKerb
in SPNEGO/GSSAPI.  Add code to allow us to still upcall to userspace
in these cases to obtain the Kerberos ticket.

Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/asn1.c        | 2 ++
 fs/smb/client/cifs_spnego.c | 4 +++-
 fs/smb/client/cifsglob.h    | 4 ++++
 fs/smb/client/sess.c        | 3 ++-
 fs/smb/client/smb2pdu.c     | 2 +-
 5 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/fs/smb/client/asn1.c b/fs/smb/client/asn1.c
index b5724ef9f182f..214a44509e7b9 100644
--- a/fs/smb/client/asn1.c
+++ b/fs/smb/client/asn1.c
@@ -52,6 +52,8 @@ int cifs_neg_token_init_mech_type(void *context, size_t hdrlen,
 		server->sec_kerberos = true;
 	else if (oid == OID_ntlmssp)
 		server->sec_ntlmssp = true;
+	else if (oid == OID_IAKerb)
+		server->sec_iakerb = true;
 	else {
 		char buf[50];
 
diff --git a/fs/smb/client/cifs_spnego.c b/fs/smb/client/cifs_spnego.c
index 28f568b5fc277..bc1c1e9b288ad 100644
--- a/fs/smb/client/cifs_spnego.c
+++ b/fs/smb/client/cifs_spnego.c
@@ -138,11 +138,13 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo,
 
 	dp = description + strlen(description);
 
-	/* for now, only sec=krb5 and sec=mskrb5 are valid */
+	/* for now, only sec=krb5 and sec=mskrb5 and iakerb are valid */
 	if (server->sec_kerberos)
 		sprintf(dp, ";sec=krb5");
 	else if (server->sec_mskerberos)
 		sprintf(dp, ";sec=mskrb5");
+	else if (server->sec_iakerb)
+		sprintf(dp, ";sec=iakerb");
 	else {
 		cifs_dbg(VFS, "unknown or missing server auth type, use krb5\n");
 		sprintf(dp, ";sec=krb5");
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 5ba6b46fe9d1e..a68434ad744ae 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -151,6 +151,7 @@ enum securityEnum {
 	NTLMv2,			/* Legacy NTLM auth with NTLMv2 hash */
 	RawNTLMSSP,		/* NTLMSSP without SPNEGO, NTLMv2 hash */
 	Kerberos,		/* Kerberos via SPNEGO */
+	IAKerb,			/* Kerberos proxy */
 };
 
 enum upcall_target_enum {
@@ -781,6 +782,7 @@ struct TCP_Server_Info {
 	bool	sec_kerberosu2u;	/* supports U2U Kerberos */
 	bool	sec_kerberos;		/* supports plain Kerberos */
 	bool	sec_mskerberos;		/* supports legacy MS Kerberos */
+	bool	sec_iakerb;		/* supports pass-through auth for Kerberos (krb5 proxy) */
 	bool	large_buf;		/* is current buffer large? */
 	/* use SMBD connection instead of socket */
 	bool	rdma;
@@ -2148,6 +2150,8 @@ static inline char *get_security_type_str(enum securityEnum sectype)
 		return "Kerberos";
 	case NTLMv2:
 		return "NTLMv2";
+	case IAKerb:
+		return "IAKerb";
 	default:
 		return "Unknown";
 	}
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index 91d4d409cb1dc..faa80e7d54a6e 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -1235,12 +1235,13 @@ cifs_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
 		switch (requested) {
 		case Kerberos:
 		case RawNTLMSSP:
+		case IAKerb:
 			return requested;
 		case Unspecified:
 			if (server->sec_ntlmssp &&
 			    (global_secflags & CIFSSEC_MAY_NTLMSSP))
 				return RawNTLMSSP;
-			if ((server->sec_kerberos || server->sec_mskerberos) &&
+			if ((server->sec_kerberos || server->sec_mskerberos || server->sec_iakerb) &&
 			    (global_secflags & CIFSSEC_MAY_KRB5))
 				return Kerberos;
 			fallthrough;
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 9f54596a6866c..40ad9e79437a4 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -1429,7 +1429,7 @@ smb2_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
 		if (server->sec_ntlmssp &&
 			(global_secflags & CIFSSEC_MAY_NTLMSSP))
 			return RawNTLMSSP;
-		if ((server->sec_kerberos || server->sec_mskerberos) &&
+		if ((server->sec_kerberos || server->sec_mskerberos || server->sec_iakerb) &&
 			(global_secflags & CIFSSEC_MAY_KRB5))
 			return Kerberos;
 		fallthrough;

From 4e2043be5c149cb07d806c438a8ec8657741bd31 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Sat, 28 Sep 2024 13:24:26 +0200
Subject: [PATCH 300/368] cifs: Add support for creating WSL-style symlinks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This change implements support for creating new symlink in WSL-style by
Linux cifs client when -o reparse=wsl mount option is specified. WSL-style
symlink uses reparse point with tag IO_REPARSE_TAG_LX_SYMLINK and symlink
target location is stored in reparse buffer in UTF-8 encoding prefixed by
32-bit flags. Flags bits are unknown, but it was observed that WSL always
sets flags to value 0x02000000. Do same in Linux cifs client.

New symlinks would be created in WSL-style only in case the mount option
-o reparse=wsl is specified, which is not by default. So default CIFS
mounts are not affected by this change.

Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/reparse.c | 65 +++++++++++++++++++++++++++++++++--------
 1 file changed, 53 insertions(+), 12 deletions(-)

diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c
index 4cdaa83a215c0..0a5a52a8a7dd1 100644
--- a/fs/smb/client/reparse.c
+++ b/fs/smb/client/reparse.c
@@ -505,9 +505,17 @@ static int mknod_nfs(unsigned int xid, struct inode *inode,
 	return rc;
 }
 
-static int wsl_set_reparse_buf(struct reparse_data_buffer *buf,
-			       mode_t mode, struct kvec *iov)
+static int wsl_set_reparse_buf(struct reparse_data_buffer **buf,
+			       mode_t mode, const char *symname,
+			       struct cifs_sb_info *cifs_sb,
+			       struct kvec *iov)
 {
+	struct reparse_wsl_symlink_data_buffer *symlink_buf;
+	__le16 *symname_utf16;
+	int symname_utf16_len;
+	int symname_utf8_maxlen;
+	int symname_utf8_len;
+	size_t buf_len;
 	u32 tag;
 
 	switch ((tag = reparse_mode_wsl_tag(mode))) {
@@ -515,17 +523,45 @@ static int wsl_set_reparse_buf(struct reparse_data_buffer *buf,
 	case IO_REPARSE_TAG_LX_CHR:
 	case IO_REPARSE_TAG_LX_FIFO:
 	case IO_REPARSE_TAG_AF_UNIX:
+		buf_len = sizeof(struct reparse_data_buffer);
+		*buf = kzalloc(buf_len, GFP_KERNEL);
+		if (!*buf)
+			return -ENOMEM;
+		break;
+	case IO_REPARSE_TAG_LX_SYMLINK:
+		symname_utf16 = cifs_strndup_to_utf16(symname, strlen(symname),
+						      &symname_utf16_len,
+						      cifs_sb->local_nls,
+						      NO_MAP_UNI_RSVD);
+		if (!symname_utf16)
+			return -ENOMEM;
+		symname_utf8_maxlen = symname_utf16_len/2*3;
+		symlink_buf = kzalloc(sizeof(struct reparse_wsl_symlink_data_buffer) +
+				      symname_utf8_maxlen, GFP_KERNEL);
+		if (!symlink_buf) {
+			kfree(symname_utf16);
+			return -ENOMEM;
+		}
+		/* Flag 0x02000000 is unknown, but all wsl symlinks have this value */
+		symlink_buf->Flags = cpu_to_le32(0x02000000);
+		/* PathBuffer is in UTF-8 but without trailing null-term byte */
+		symname_utf8_len = utf16s_to_utf8s((wchar_t *)symname_utf16, symname_utf16_len/2,
+						   UTF16_LITTLE_ENDIAN,
+						   symlink_buf->PathBuffer,
+						   symname_utf8_maxlen);
+		*buf = (struct reparse_data_buffer *)symlink_buf;
+		buf_len = sizeof(struct reparse_wsl_symlink_data_buffer) + symname_utf8_len;
+		kfree(symname_utf16);
 		break;
-	case IO_REPARSE_TAG_LX_SYMLINK: /* TODO: add support for WSL symlinks */
 	default:
 		return -EOPNOTSUPP;
 	}
 
-	buf->ReparseTag = cpu_to_le32(tag);
-	buf->Reserved = 0;
-	buf->ReparseDataLength = 0;
-	iov->iov_base = buf;
-	iov->iov_len = sizeof(*buf);
+	(*buf)->ReparseTag = cpu_to_le32(tag);
+	(*buf)->Reserved = 0;
+	(*buf)->ReparseDataLength = cpu_to_le16(buf_len - sizeof(struct reparse_data_buffer));
+	iov->iov_base = *buf;
+	iov->iov_len = buf_len;
 	return 0;
 }
 
@@ -617,25 +653,29 @@ static int mknod_wsl(unsigned int xid, struct inode *inode,
 		     const char *full_path, umode_t mode, dev_t dev,
 		     const char *symname)
 {
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 	struct cifs_open_info_data data;
-	struct reparse_data_buffer buf;
+	struct reparse_data_buffer *buf;
 	struct smb2_create_ea_ctx *cc;
 	struct inode *new;
 	unsigned int len;
 	struct kvec reparse_iov, xattr_iov;
 	int rc;
 
-	rc = wsl_set_reparse_buf(&buf, mode, &reparse_iov);
+	rc = wsl_set_reparse_buf(&buf, mode, symname, cifs_sb, &reparse_iov);
 	if (rc)
 		return rc;
 
 	rc = wsl_set_xattrs(inode, mode, dev, &xattr_iov);
-	if (rc)
+	if (rc) {
+		kfree(buf);
 		return rc;
+	}
 
 	data = (struct cifs_open_info_data) {
 		.reparse_point = true,
-		.reparse = { .tag = le32_to_cpu(buf.ReparseTag), .buf = &buf, },
+		.reparse = { .tag = le32_to_cpu(buf->ReparseTag), .buf = buf, },
+		.symlink_target = kstrdup(symname, GFP_KERNEL),
 	};
 
 	cc = xattr_iov.iov_base;
@@ -652,6 +692,7 @@ static int mknod_wsl(unsigned int xid, struct inode *inode,
 		rc = PTR_ERR(new);
 	cifs_free_open_info(&data);
 	kfree(xattr_iov.iov_base);
+	kfree(buf);
 	return rc;
 }
 

From 2008d8c7121a9eee0ef8ea121581269886535150 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Mon, 27 Jan 2025 17:45:57 -0600
Subject: [PATCH 301/368] cifs: update internal version number

To 2.53

Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/cifsfs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h
index a762dbbbd959b..831fee962c4d6 100644
--- a/fs/smb/client/cifsfs.h
+++ b/fs/smb/client/cifsfs.h
@@ -146,6 +146,6 @@ extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
 
 /* when changing internal version - update following two lines at same time */
-#define SMB3_PRODUCT_BUILD 52
-#define CIFS_VERSION   "2.52"
+#define SMB3_PRODUCT_BUILD 53
+#define CIFS_VERSION   "2.53"
 #endif				/* _CIFSFS_H */

From a49da4ef4b94345554923cdba1127a2d2a73d1e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Mon, 23 Sep 2024 22:29:30 +0200
Subject: [PATCH 302/368] cifs: Fix parsing native symlinks directory/file type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As SMB protocol distinguish between symlink to directory and symlink to
file, add some mechanism to disallow resolving incompatible types.

When SMB symlink is of the directory type, ensure that its target path ends
with slash. This forces Linux to not allow resolving such symlink to file.

And when SMB symlink is of the file type and its target path ends with
slash then returns an error as such symlink is unresolvable. Such symlink
always points to invalid location as file cannot end with slash.

As POSIX server does not distinguish between symlinks to file and symlink
directory, do not apply this change for symlinks from POSIX SMB server. For
POSIX SMB servers, this change does nothing.

This mimics Windows behavior of native SMB symlinks.

Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/inode.c     |  5 ++++
 fs/smb/client/smb2file.c  | 51 +++++++++++++++++++++++++++++++++++++++
 fs/smb/client/smb2inode.c |  5 ++++
 fs/smb/client/smb2proto.h |  1 +
 4 files changed, 62 insertions(+)

diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index 8896c88320c8d..9cc31cf6ebd07 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -1216,6 +1216,11 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data,
 							      full_path,
 							      iov, data);
 		}
+
+		if (data->reparse.tag == IO_REPARSE_TAG_SYMLINK && !rc) {
+			bool directory = le32_to_cpu(data->fi.Attributes) & ATTR_DIRECTORY;
+			rc = smb2_fix_symlink_target_type(&data->symlink_target, directory, cifs_sb);
+		}
 		break;
 	}
 
diff --git a/fs/smb/client/smb2file.c b/fs/smb/client/smb2file.c
index c5e689b2fc497..d609a20fb98a9 100644
--- a/fs/smb/client/smb2file.c
+++ b/fs/smb/client/smb2file.c
@@ -63,6 +63,52 @@ static struct smb2_symlink_err_rsp *symlink_data(const struct kvec *iov)
 	return sym;
 }
 
+int smb2_fix_symlink_target_type(char **target, bool directory, struct cifs_sb_info *cifs_sb)
+{
+	char *buf;
+	int len;
+
+	/*
+	 * POSIX server does not distinguish between symlinks to file and
+	 * symlink directory. So nothing is needed to fix on the client side.
+	 */
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
+		return 0;
+
+	if (!*target)
+		return -EIO;
+
+	len = strlen(*target);
+	if (!len)
+		return -EIO;
+
+	/*
+	 * If this is directory symlink and it does not have trailing slash then
+	 * append it. Trailing slash simulates Windows/SMB behavior which do not
+	 * allow resolving directory symlink to file.
+	 */
+	if (directory && (*target)[len-1] != '/') {
+		buf = krealloc(*target, len+2, GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+		buf[len] = '/';
+		buf[len+1] = '\0';
+		*target = buf;
+		len++;
+	}
+
+	/*
+	 * If this is a file (non-directory) symlink and it points to path name
+	 * with trailing slash then this is an invalid symlink because file name
+	 * cannot contain slash character. File name with slash is invalid on
+	 * both Windows and Linux systems. So return an error for such symlink.
+	 */
+	if (!directory && (*target)[len-1] == '/')
+		return -EIO;
+
+	return 0;
+}
+
 int smb2_parse_symlink_response(struct cifs_sb_info *cifs_sb, const struct kvec *iov,
 				const char *full_path, char **path)
 {
@@ -132,6 +178,11 @@ int smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32
 					       NULL, NULL, NULL);
 				oparms->create_options &= ~OPEN_REPARSE_POINT;
 			}
+			if (!rc) {
+				bool directory = le32_to_cpu(data->fi.Attributes) & ATTR_DIRECTORY;
+				rc = smb2_fix_symlink_target_type(&data->symlink_target,
+								  directory, oparms->cifs_sb);
+			}
 		}
 	}
 
diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c
index c97f14757c27c..5dfb30b0a852c 100644
--- a/fs/smb/client/smb2inode.c
+++ b/fs/smb/client/smb2inode.c
@@ -1010,6 +1010,11 @@ int smb2_query_path_info(const unsigned int xid,
 			else
 				rc = -EOPNOTSUPP;
 		}
+
+		if (data->reparse.tag == IO_REPARSE_TAG_SYMLINK && !rc) {
+			bool directory = le32_to_cpu(data->fi.Attributes) & ATTR_DIRECTORY;
+			rc = smb2_fix_symlink_target_type(&data->symlink_target, directory, cifs_sb);
+		}
 		break;
 	case -EREMOTE:
 		break;
diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h
index 10f5e37d15309..2336dfb23f363 100644
--- a/fs/smb/client/smb2proto.h
+++ b/fs/smb/client/smb2proto.h
@@ -111,6 +111,7 @@ extern int smb3_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
 			  struct cifs_sb_info *cifs_sb,
 			  const unsigned char *path, char *pbuf,
 			  unsigned int *pbytes_read);
+int smb2_fix_symlink_target_type(char **target, bool directory, struct cifs_sb_info *cifs_sb);
 int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len,
 			      bool relative,
 			      const char *full_path,

From 71d815bf5dfd4f63f7557e0abe7f257c202863a1 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 13 Jan 2025 16:53:07 +0100
Subject: [PATCH 303/368] kbuild: Strip runtime const RELA sections correctly

Due to the fact that runtime const ELF sections are named without a
leading period or double underscore, the RSTRIP logic that removes the
static RELA sections from vmlinux fails to identify them. This results
in a situation like below, where some sections that were supposed to get
removed are left behind.

  [Nr] Name                              Type            Address          Off     Size   ES Flg Lk Inf Al

  [58] runtime_shift_d_hash_shift        PROGBITS        ffffffff83500f50 2900f50 000014 00   A  0   0  1
  [59] .relaruntime_shift_d_hash_shift   RELA            0000000000000000 55b6f00 000078 18   I 70  58  8
  [60] runtime_ptr_dentry_hashtable      PROGBITS        ffffffff83500f68 2900f68 000014 00   A  0   0  1
  [61] .relaruntime_ptr_dentry_hashtable RELA            0000000000000000 55b6f78 000078 18   I 70  60  8
  [62] runtime_ptr_USER_PTR_MAX          PROGBITS        ffffffff83500f80 2900f80 000238 00   A  0   0  1
  [63] .relaruntime_ptr_USER_PTR_MAX     RELA            0000000000000000 55b6ff0 000d50 18   I 70  62  8

So tweak the match expression to strip all sections starting with .rel.
While at it, consolidate the logic used by RISC-V, s390 and x86 into a
single shared Makefile library command.

Link: https://lore.kernel.org/all/CAHk-=wjk3ynjomNvFN8jf9A1k=qSc=JFF591W00uXj-qqNUxPQ@mail.gmail.com/
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Charlie Jenkins <charlie@rivosinc.com>
Tested-by: Charlie Jenkins <charlie@rivosinc.com>
Tested-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 arch/riscv/Makefile.postlink | 8 ++------
 arch/s390/Makefile.postlink  | 6 +-----
 arch/x86/Makefile.postlink   | 6 +-----
 scripts/Makefile.lib         | 3 +++
 4 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/arch/riscv/Makefile.postlink b/arch/riscv/Makefile.postlink
index 829b9abc91f61..6b0580949b6a2 100644
--- a/arch/riscv/Makefile.postlink
+++ b/arch/riscv/Makefile.postlink
@@ -10,6 +10,7 @@ __archpost:
 
 -include include/config/auto.conf
 include $(srctree)/scripts/Kbuild.include
+include $(srctree)/scripts/Makefile.lib
 
 quiet_cmd_relocs_check = CHKREL  $@
 cmd_relocs_check = 							\
@@ -19,11 +20,6 @@ ifdef CONFIG_RELOCATABLE
 quiet_cmd_cp_vmlinux_relocs = CPREL   vmlinux.relocs
 cmd_cp_vmlinux_relocs = cp vmlinux vmlinux.relocs
 
-quiet_cmd_relocs_strip = STRIPREL $@
-cmd_relocs_strip = $(OBJCOPY)   --remove-section='.rel.*'       \
-                                --remove-section='.rel__*'      \
-                                --remove-section='.rela.*'      \
-                                --remove-section='.rela__*' $@
 endif
 
 # `@true` prevents complaint when there is nothing to be done
@@ -33,7 +29,7 @@ vmlinux: FORCE
 ifdef CONFIG_RELOCATABLE
 	$(call if_changed,relocs_check)
 	$(call if_changed,cp_vmlinux_relocs)
-	$(call if_changed,relocs_strip)
+	$(call if_changed,strip_relocs)
 endif
 
 clean:
diff --git a/arch/s390/Makefile.postlink b/arch/s390/Makefile.postlink
index df82f54107693..1ae5478cd6aca 100644
--- a/arch/s390/Makefile.postlink
+++ b/arch/s390/Makefile.postlink
@@ -11,6 +11,7 @@ __archpost:
 
 -include include/config/auto.conf
 include $(srctree)/scripts/Kbuild.include
+include $(srctree)/scripts/Makefile.lib
 
 CMD_RELOCS=arch/s390/tools/relocs
 OUT_RELOCS = arch/s390/boot
@@ -19,11 +20,6 @@ quiet_cmd_relocs = RELOCS  $(OUT_RELOCS)/relocs.S
 	mkdir -p $(OUT_RELOCS); \
 	$(CMD_RELOCS) $@ > $(OUT_RELOCS)/relocs.S
 
-quiet_cmd_strip_relocs = RSTRIP  $@
-      cmd_strip_relocs = \
-	$(OBJCOPY) --remove-section='.rel.*' --remove-section='.rel__*' \
-		   --remove-section='.rela.*' --remove-section='.rela__*' $@
-
 vmlinux: FORCE
 	$(call cmd,relocs)
 	$(call cmd,strip_relocs)
diff --git a/arch/x86/Makefile.postlink b/arch/x86/Makefile.postlink
index fef2e977cc7dc..8b8a68162c940 100644
--- a/arch/x86/Makefile.postlink
+++ b/arch/x86/Makefile.postlink
@@ -11,6 +11,7 @@ __archpost:
 
 -include include/config/auto.conf
 include $(srctree)/scripts/Kbuild.include
+include $(srctree)/scripts/Makefile.lib
 
 CMD_RELOCS = arch/x86/tools/relocs
 OUT_RELOCS = arch/x86/boot/compressed
@@ -20,11 +21,6 @@ quiet_cmd_relocs = RELOCS  $(OUT_RELOCS)/$@.relocs
 	$(CMD_RELOCS) $@ > $(OUT_RELOCS)/$@.relocs; \
 	$(CMD_RELOCS) --abs-relocs $@
 
-quiet_cmd_strip_relocs = RSTRIP  $@
-      cmd_strip_relocs = \
-	$(OBJCOPY) --remove-section='.rel.*' --remove-section='.rel__*' \
-		   --remove-section='.rela.*' --remove-section='.rela__*' $@
-
 # `@true` prevents complaint when there is nothing to be done
 
 vmlinux: FORCE
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 7395200538da8..f604f51d23cac 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -374,6 +374,9 @@ quiet_cmd_ar = AR      $@
 quiet_cmd_objcopy = OBJCOPY $@
 cmd_objcopy = $(OBJCOPY) $(OBJCOPYFLAGS) $(OBJCOPYFLAGS_$(@F)) $< $@
 
+quiet_cmd_strip_relocs = RSTRIP  $@
+cmd_strip_relocs = $(OBJCOPY) --remove-section='.rel*' $@
+
 # Gzip
 # ---------------------------------------------------------------------------
 

From 695ed93bb30e03e9f826ee70abdd83f970741a37 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Fri, 31 Jan 2025 23:04:01 +0900
Subject: [PATCH 304/368] kbuild: fix Clang LTO with CONFIG_OBJTOOL=n

Since commit bede169618c6 ("kbuild: enable objtool for *.mod.o and
additional kernel objects"), Clang LTO builds do not perform any
optimizations when CONFIG_OBJTOOL is disabled (e.g., for ARCH=arm64).
This is because every LLVM bitcode file is immediately converted to
ELF format before the object files are linked together.

This commit fixes the breakage.

Fixes: bede169618c6 ("kbuild: enable objtool for *.mod.o and additional kernel objects")
Reported-by: Yonghong Song <yonghong.song@linux.dev>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Tested-by: Yonghong Song <yonghong.song@linux.dev>
---
 scripts/Makefile.build |  2 ++
 scripts/Makefile.lib   | 10 ++++++----
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 81d9dacad03c7..993708d118745 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -194,7 +194,9 @@ endif # CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT
 
 is-standard-object = $(if $(filter-out y%, $(OBJECT_FILES_NON_STANDARD_$(target-stem).o)$(OBJECT_FILES_NON_STANDARD)n),$(is-kernel-object))
 
+ifdef CONFIG_OBJTOOL
 $(obj)/%.o: private objtool-enabled = $(if $(is-standard-object),$(if $(delay-objtool),$(is-single-obj-m),y))
+endif
 
 ifneq ($(findstring 1, $(KBUILD_EXTRA_WARN)),)
 cmd_warn_shared_object = $(if $(word 2, $(modname-multi)),$(warning $(kbuild-file): $*.o is added to multiple modules: $(modname-multi)))
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index f604f51d23cac..ad55ef201aacb 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -287,6 +287,8 @@ delay-objtool := $(or $(CONFIG_LTO_CLANG),$(CONFIG_X86_KERNEL_IBT))
 cmd_objtool = $(if $(objtool-enabled), ; $(objtool) $(objtool-args) $@)
 cmd_gen_objtooldep = $(if $(objtool-enabled), { echo ; echo '$@: $$(wildcard $(objtool))' ; } >> $(dot-target).cmd)
 
+objtool-enabled := y
+
 endif # CONFIG_OBJTOOL
 
 # Useful for describing the dependency of composite objects
@@ -302,11 +304,11 @@ endef
 # ===========================================================================
 # These are shared by some Makefile.* files.
 
-objtool-enabled := y
-
 ifdef CONFIG_LTO_CLANG
-# objtool cannot process LLVM IR. Make $(LD) covert LLVM IR to ELF here.
-cmd_ld_single = $(if $(objtool-enabled), ; $(LD) $(ld_flags) -r -o $(tmp-target) $@; mv $(tmp-target) $@)
+# Run $(LD) here to covert LLVM IR to ELF in the following cases:
+#  - when this object needs objtool processing, as objtool cannot process LLVM IR
+#  - when this is a single-object module, as modpost cannot process LLVM IR
+cmd_ld_single = $(if $(objtool-enabled)$(is-single-obj-m), ; $(LD) $(ld_flags) -r -o $(tmp-target) $@; mv $(tmp-target) $@)
 endif
 
 quiet_cmd_cc_o_c = CC $(quiet_modtag)  $@

From 8004d635f27bbccaa5c083c50d4d5302a6ffa00e Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
Date: Tue, 14 Jan 2025 17:00:45 -0300
Subject: [PATCH 305/368] Revert "media: uvcvideo: Require entities to have a
 non-zero unique ID"

This reverts commit 3dd075fe8ebbc6fcbf998f81a75b8c4b159a6195.

Tomasz has reported that his device, Generalplus Technology Inc. 808 Camera,
with ID 1b3f:2002, stopped being detected:

$ ls -l /dev/video*
zsh: no matches found: /dev/video*
[    7.230599] usb 3-2: Found multiple Units with ID 5

This particular device is non-compliant, having both the Output Terminal
and Processing Unit with ID 5. uvc_scan_fallback, though, is able to build
a chain. However, when media elements are added and uvc_mc_create_links
call uvc_entity_by_id, it will get the incorrect entity,
media_create_pad_link will WARN, and it will fail to register the entities.

In order to reinstate support for such devices in a timely fashion,
reverting the fix for these warnings is appropriate. A proper fix that
considers the existence of such non-compliant devices will be submitted in
a later development cycle.

Reported-by: Tomasz Sikora <sikora.tomus@gmail.com>
Fixes: 3dd075fe8ebb ("media: uvcvideo: Require entities to have a non-zero unique ID")
Cc: stable@vger.kernel.org
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Ricardo Ribalda <ribalda@chromium.org>
Link: https://lore.kernel.org/r/20250114200045.1401644-1-cascardo@igalia.com
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/uvc/uvc_driver.c | 70 ++++++++++++------------------
 1 file changed, 27 insertions(+), 43 deletions(-)

diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c
index a10d4f4d9f95f..deadbcea5e227 100644
--- a/drivers/media/usb/uvc/uvc_driver.c
+++ b/drivers/media/usb/uvc/uvc_driver.c
@@ -790,27 +790,14 @@ static const u8 uvc_media_transport_input_guid[16] =
 	UVC_GUID_UVC_MEDIA_TRANSPORT_INPUT;
 static const u8 uvc_processing_guid[16] = UVC_GUID_UVC_PROCESSING;
 
-static struct uvc_entity *uvc_alloc_new_entity(struct uvc_device *dev, u16 type,
-					       u16 id, unsigned int num_pads,
-					       unsigned int extra_size)
+static struct uvc_entity *uvc_alloc_entity(u16 type, u16 id,
+		unsigned int num_pads, unsigned int extra_size)
 {
 	struct uvc_entity *entity;
 	unsigned int num_inputs;
 	unsigned int size;
 	unsigned int i;
 
-	/* Per UVC 1.1+ spec 3.7.2, the ID should be non-zero. */
-	if (id == 0) {
-		dev_err(&dev->udev->dev, "Found Unit with invalid ID 0.\n");
-		return ERR_PTR(-EINVAL);
-	}
-
-	/* Per UVC 1.1+ spec 3.7.2, the ID is unique. */
-	if (uvc_entity_by_id(dev, id)) {
-		dev_err(&dev->udev->dev, "Found multiple Units with ID %u\n", id);
-		return ERR_PTR(-EINVAL);
-	}
-
 	extra_size = roundup(extra_size, sizeof(*entity->pads));
 	if (num_pads)
 		num_inputs = type & UVC_TERM_OUTPUT ? num_pads : num_pads - 1;
@@ -820,7 +807,7 @@ static struct uvc_entity *uvc_alloc_new_entity(struct uvc_device *dev, u16 type,
 	     + num_inputs;
 	entity = kzalloc(size, GFP_KERNEL);
 	if (entity == NULL)
-		return ERR_PTR(-ENOMEM);
+		return NULL;
 
 	entity->id = id;
 	entity->type = type;
@@ -932,10 +919,10 @@ static int uvc_parse_vendor_control(struct uvc_device *dev,
 			break;
 		}
 
-		unit = uvc_alloc_new_entity(dev, UVC_VC_EXTENSION_UNIT,
-					    buffer[3], p + 1, 2 * n);
-		if (IS_ERR(unit))
-			return PTR_ERR(unit);
+		unit = uvc_alloc_entity(UVC_VC_EXTENSION_UNIT, buffer[3],
+					p + 1, 2*n);
+		if (unit == NULL)
+			return -ENOMEM;
 
 		memcpy(unit->guid, &buffer[4], 16);
 		unit->extension.bNumControls = buffer[20];
@@ -1044,10 +1031,10 @@ static int uvc_parse_standard_control(struct uvc_device *dev,
 			return -EINVAL;
 		}
 
-		term = uvc_alloc_new_entity(dev, type | UVC_TERM_INPUT,
-					    buffer[3], 1, n + p);
-		if (IS_ERR(term))
-			return PTR_ERR(term);
+		term = uvc_alloc_entity(type | UVC_TERM_INPUT, buffer[3],
+					1, n + p);
+		if (term == NULL)
+			return -ENOMEM;
 
 		if (UVC_ENTITY_TYPE(term) == UVC_ITT_CAMERA) {
 			term->camera.bControlSize = n;
@@ -1103,10 +1090,10 @@ static int uvc_parse_standard_control(struct uvc_device *dev,
 			return 0;
 		}
 
-		term = uvc_alloc_new_entity(dev, type | UVC_TERM_OUTPUT,
-					    buffer[3], 1, 0);
-		if (IS_ERR(term))
-			return PTR_ERR(term);
+		term = uvc_alloc_entity(type | UVC_TERM_OUTPUT, buffer[3],
+					1, 0);
+		if (term == NULL)
+			return -ENOMEM;
 
 		memcpy(term->baSourceID, &buffer[7], 1);
 
@@ -1125,10 +1112,9 @@ static int uvc_parse_standard_control(struct uvc_device *dev,
 			return -EINVAL;
 		}
 
-		unit = uvc_alloc_new_entity(dev, buffer[2], buffer[3],
-					    p + 1, 0);
-		if (IS_ERR(unit))
-			return PTR_ERR(unit);
+		unit = uvc_alloc_entity(buffer[2], buffer[3], p + 1, 0);
+		if (unit == NULL)
+			return -ENOMEM;
 
 		memcpy(unit->baSourceID, &buffer[5], p);
 
@@ -1148,9 +1134,9 @@ static int uvc_parse_standard_control(struct uvc_device *dev,
 			return -EINVAL;
 		}
 
-		unit = uvc_alloc_new_entity(dev, buffer[2], buffer[3], 2, n);
-		if (IS_ERR(unit))
-			return PTR_ERR(unit);
+		unit = uvc_alloc_entity(buffer[2], buffer[3], 2, n);
+		if (unit == NULL)
+			return -ENOMEM;
 
 		memcpy(unit->baSourceID, &buffer[4], 1);
 		unit->processing.wMaxMultiplier =
@@ -1177,10 +1163,9 @@ static int uvc_parse_standard_control(struct uvc_device *dev,
 			return -EINVAL;
 		}
 
-		unit = uvc_alloc_new_entity(dev, buffer[2], buffer[3],
-					    p + 1, n);
-		if (IS_ERR(unit))
-			return PTR_ERR(unit);
+		unit = uvc_alloc_entity(buffer[2], buffer[3], p + 1, n);
+		if (unit == NULL)
+			return -ENOMEM;
 
 		memcpy(unit->guid, &buffer[4], 16);
 		unit->extension.bNumControls = buffer[20];
@@ -1320,10 +1305,9 @@ static int uvc_gpio_parse(struct uvc_device *dev)
 		return dev_err_probe(&dev->intf->dev, irq,
 				     "No IRQ for privacy GPIO\n");
 
-	unit = uvc_alloc_new_entity(dev, UVC_EXT_GPIO_UNIT,
-				    UVC_EXT_GPIO_UNIT_ID, 0, 1);
-	if (IS_ERR(unit))
-		return PTR_ERR(unit);
+	unit = uvc_alloc_entity(UVC_EXT_GPIO_UNIT, UVC_EXT_GPIO_UNIT_ID, 0, 1);
+	if (!unit)
+		return -ENOMEM;
 
 	unit->gpio.gpio_privacy = gpio_privacy;
 	unit->gpio.irq = irq;

From 04a3389b35357e9bf44533d20a80eb70d188adb8 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 31 Jan 2025 19:49:17 -0800
Subject: [PATCH 306/368] Remove stale generated 'genheaders' file

This bogus stale file was added in commit 101971298be2 ("riscv: add a
warning when physical memory address overflows").  It's the old location
for what is now 'security/selinux/genheaders'.

It looks like it got incorrectly committed back when that file was in
the old location, and then rebasing kept the bogus file alive.

Reported-by: Eric Biggers <ebiggers@kernel.org>
Link: https://lore.kernel.org/linux-riscv/20250201020003.GA77370@sol.localdomain/
Fixes: 101971298be2 ("riscv: add a  warning when physical memory address overflows")
Cc: Palmer Dabbelt <palmer@rivosinc.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/selinux/genheaders/genheaders | Bin 90112 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100755 scripts/selinux/genheaders/genheaders

diff --git a/scripts/selinux/genheaders/genheaders b/scripts/selinux/genheaders/genheaders
deleted file mode 100755
index 3fc32a664a7930b12a38d02449aec78d49690dfe..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 90112
zcmeI54VYWidFQXqM}!HsAV3BA;1X~VGz`MnRD8IRv5n=#7zLYDVcSUZjK)$tBMC`k
z%cLnHP^yXRL`~8@lP2BHlcn8hyWI-frUOqCluw3sHxX$*)FdTJmIS#ZsnShSku1!<
z=ic`(>;7Xu*(8vziNu57dw%zvbI*P6@7#MeGt#|t>y8^&u2^B=b&~ZfmMPUg;gX21
zh{TW9iCAIl3@c(?V7<aRRo0&@{}1Y+zQ*=ScLC9-{3MB{UBE0HBfiYV79zH8qG@-$
zSK|S94Wi|D%ck8aX0d7hkyq3CcMw<Tzz~PqIooS#eTua;E=L@0XL8ee!d=htFZa28
zNh8{sbeVRYo7_GAp{R-IXhc5E7|s7-%_ql*tTV5O^RH!byNb5sXls2$Cl|tYeXTt4
zlWZ@h?c$HS9dlf-+e5_mUMKBLUjAQSdf2U7bbFkCltY?-L`|!8#Z-3B)$6aAnz(!_
zo13XzUI|})`PJ*kO8K#M&JfJFLh`?HYTM3Rt(8@)X_%%_=FkeKjQ&<?S*kcMQ`d~q
ztT*k=%e2+$;>5G0Y}Pybz2YuevQ`@Q68ZnJ^e3mU`L{!u9%h~AW!jm#{CcH;WAUBG
z;qN*Q|DteP{^!@70*=-HHOJxi9*6(Ca9jT8*Es@?)&Et;;g=kT|ITrE<T(6PNvJLB
z`1L9Q#~RQ6<M6%0%`xCtQoxzk`BpV18<H31r<F*Imk$=wiE@6XP)HXOQ~7aWg<>{W
zo=l8q_F0p~blOT+vSq7OE>6r8tjT;Koil|g35mqyP~AjAv>s2C%Bf;GF`deqY7^;V
z(QKuXN=#;Rsj2K8Y13F}zGzL3PvuK#YqBs?9=9eB6tm^D)RlF)yctQVoXzJf)2Epk
zS6kb5Y`<Z1V%^wPW9yGDA6;BGw&5tY65Ds*l#s2b_hw7wbaD4ho2T-*^zPK2DYK2e
z)A^hZG~re~W<@h&PLea<NitJTa{gN(bJATpWx&kZldbzCuQ)H1&&!@>X77Jj_Se7r
z`!dZ=wjR>*%=u>hN!G`;{E}?;4707rq-<U|f8aP39&vg52{(d=ly`}rrycc(2U>3X
zK5<*?`@ZJw|Mbtf<pA-^w7yOJ3gtoK-!KzeULoSwYW*<ri1G;Wy<c+sStPzq>o<vG
zf0u~gto2*OZ&ThTo>kr<UQym9ey{Q#@pmfk6Mw&Q>+A0R;rRZgav$-Bl>3Q)M0tSt
zCzad8A6FhE{x8Z~#Gh8)Ccfg+?zlU|ga7FAF7Z>fevkNS<$dDkDz~2W=I0g4eZ+rJ
zxu5v9&$#Ua#4p$SHu0;K2Z_I0d5HLHl!uAmqC7(UHsw*`DdjQZ8Rc=}Ips;>`;}*i
z|EBT+@wX|j5dS^pRpRecUL*b|%Im}%%IAoGNclYRN0l!Se@uCU_!pHg5`RK@llZrl
zFA-l>-XVS(d?tRD@*eRE!Mk2>|3N+M6TeF9TSvV0uUGCP9#!rqzDs$4_#Wjp@!OSK
z|Ki>5Ta^2V*OdE-zg>BN_<NPx#Q$7*koW`2L&VS2;}RzRpw^ENZz_)xe?)nV_@|V|
ziGNvnl6X&fhWN9}3&eeT+$+TW%B#fBQ(hx}k@7n6G39f_!^-E0Z&toQe5djT@woCu
z;(L@giSJXsM7*rLMf@)1ZQ^fJ-XZ=T<z3=`q`XJ`kCpd{->=+y%Ip8b%KgM2Q63<E
z@Uw1voA{@-evtSVl!u7pd>$tLRjnT({w?KE;(g^Y;wyEWapI>dPZB>H`}GZPesF&n
zB!0fu4-wz2^CC=q&*$BKBh2-FBT5|ki4nK4U&PPWJS2%DpBduFXMuQ7`>zncLV1<=
zRmy9`Hz=<YzgGDi@lDF-iEmfFK>T&e8^qtBe3AI1@+R?p%9n`4hZgaY)^8KPQ+bDY
zO?j92obn#=`;_;I|Bdp{H@$v#l!uAK&j@k&86^%sW6aghIC1!yBo03_#NlUwIQ*;-
zho4pA@Uuo7e%6V@&pG1obDlW-Tp$iV8^qz~B60ZHBo059h{MkoaroIL4nI4@;b)gP
z{Ol2jpMB!+)B2Xzx2F2$BmSszKk?5h4-mgk&yzOs$F+Wh_!G*b#Gh0i`&aKcpHdzt
zj(L$Jj(L$Gj(JgFuJfWo9P^?|9P^?^9P^@19P?t1IOfGXam<Sa;+Pi=;+PkU#4#_L
z#4#_Hh+|%~h+|%~iDO=Lh+|%KiDO>$h+|&#iDO<^J+BX#7e3;c7k=WH7Xjj!7fIsZ
zQQtDe;ah<?e5(+LZ&l{%Ta7q;s}qNBbHw4>JaPE8Kpeg`h{Lx<;_$6W9KJ0Phi@(7
z@U2Z8zIBMhw=Qw`)*}wz`o!Uz^=+>|@XbdYzWIs6w*YbYW)p{RLE`YOK>Qi?twJ2W
zRf)s58gckmXa1PG|8vCQ+dOgjwm=-dHHgEvMdI+SNgTc{5r=Or;_$6a9KLmk!?!MR
z_|_v1-}=PioAqyAf8d*sIDGRHhi?Jm@XaO;--5*9TZlM(3loQLRpO`W&&g}V;ai<J
ze48T<-{zS==H_dGIDBgmhi{9-;aih9d|M(8-&(}sTbnq1>kx-;UE=VqM;yNOiNiPR
ze|!CbZ$9Gi%}*S@1&G5pn>c(65{GXg;_xj@9KJ<}!?!4L_*N(GSKsD{!?$_j@NI!O
zd}}aQ-xi6(w<dA;wnQAhwTQ#FHgWjYAr9ZV#Nk_yIDG39hi}$*y#By9A948RCl22N
z#NnGw9KHpK!?zG|_!cG(-y+1}Ta-9_ixG!!apLf8o_I-rf44v!zBP!$w?*Rct;t+{
zTOtnMTEyX7n>c*y5QlGF;_$6U9KQ95!#C@@UVpMWAAQ72%KgO8R~{gKp>mseP<fE}
z70N@zuT~x={%YkB;!)*M;x{XA5PyU6MdI+GNgO^b5r+>g=ITS6IDF_3hYwxi@S#T>
zKJ<yh2kU#@;|m{r#NmUVID7~YhYvP!_z)xxA40_8Lzp;xh!BSlQR46+MjSrGiNl8^
z@rlp5`N|N_DQ^;gv+^b4@TWx_{<Mk1pAK{Nr%N3E^oYZsK5_VCnLikk7k)1fe|*H@
zkDoaF2@r=rHu3D|-0cO4!=Dgw_!B06q4pCY4u7J=;ZKY>{D~8XKS|>7Cqo?m6o|v0
z7V&#^{B7d!q(dB@bcw^09&`1iPaK|DefK!vxWf}4ad_e<4o?Ea;fYNgo&<@*lMr!u
z5+)8$BE;cIlsG(z5r-#n;_xI%9G+x|!;=DWcv2w_PpZV>NryQ8E~rZ!+wBp@c6(2I
zk1zfnsZab}>Y??0Z~gaz|G>-NuiQr*-uj8d+W>KRYcp4GgT&!&h&a3r6Nk4E;_x;~
z9Nxx=!`nD<c$*{+Z!^T<ZGkwvtq_N|RpRirMjYPOiNo7D;_zplIQ&^44sZSc$LquW
z>O+7ye6WebhahqI5Mr)Ago(q42yyrjB@Q2A#Nk7nIDAMFhYuOz@S#8)K2(UqhbnRS
zP$Lc>>crv09C7$CPaHlh5Qh&9;_zXSIDD|5@%r$v`Vb@zA40_8Lzp;xh%i?lqQv1t
zj5vIV6Ne8;;_x9u96l6?!-ooS_)sMdA8N$mL!CH$m?I7!=840H1>*3bK^#6T5{C~>
z;_zXKID7~_>-FL9)rT-~_z)ouAELzJLyWol5GM{FlEmRdhB$mE5Qh&H;_#tL96r>D
z!-qO?_%KHtKFkw`4-3TMLxVVcSR@V~n#AG55^?y@A`Tzg#Nk7S_&=);k!7zZPb!ZR
zhbJ-O@FY$go+O#8CmG`Kq(B^=REWcqDsgyHBMwjM#No*tad<LM9G)x?hbIl<@MMuV
zJZTb#CriZPNsBl<X%mMh9pdn$OB|lW<dX*Gh4&**s}FJFd-OOai9e(DGsFkSm-yiL
z5{Fk+;_#|Q9Dj#aCw|K3UBBmuuTh@V&kkaLH^Wcj=WG1}@e9CLdh>RP@(OX}tx6nu
zs}V=u>co+^IpWCMJaOc0fjIKkAdb8(5=Y*e#9yWRTRq8}hxN*9#F2+OapYl+IPx%0
z9C=tEjyyDoBM*zjk%uO6<Y9?8^3WoV^HTj}Zyv&$hdJWN!#r{1VSzaE&>)UHED}c^
zn#7TZCF00Ki#YPoCXPJJpW@x$>vew@h~qlaAdYb^62~~3%yphF5l4Pn#F3viaU8D>
zaa>2b#Bm+z5yy3;PaM|~%X|QD=)8*Sh>tj~BYxtzjs%F~I${&YbtFg}*O3r$Tt~vh
zaUF>e$8{u1d~h8h{yOy`PW;!DH;8AHFA|48P2%upi8%aeF;{=u#NkheIQ;1nhd({y
z@TX54{#Y;a`T&1?#Nm&hIQ$6^hd(xP_!A@!e?r9JPnbCTi4ccBQR46?MjZaciNl{H
zarl!V4u6`&?^J)5h{K;2aro0F4u3k#)t@eL_|qc}fBMAXkLC0F0DpYM;g6p<{0R_;
zKQ?jr6C@6QLd4-um^l225Qjfe;_xR%9R9?K!=EH^_>&<He_F)ftv<Ag!-o!W_|PQ|
zA9~Eyhdy!mV4dbYzVN|E96tDo!-oKI_+S%<4?*JaAw(QLgo(q42yyrjB@Q2A#Nk7n
zIDAMFhYuOz@S#8)K2(UqhbnQLPin+*KIsr&P=C6_;ZKh^{OJ>iKh`R*54aEX5r;p1
z;_xRx9RAqE;ZKk_{0R|<KVjnVCqf+lM2W+n7;*R$Ck}s-#NkhdIQ%IPhd&kK@TW=~
z{?v%WpC0jt)rUTD_+XvxJ-+b4M;t!*nd|u^KpZ~U#Nk7bID7~ZhYw-m@F7ARK17Mb
zhZu4A5GM{FlEmRdhB$mE5Qh&H;_#tL96r>D!-qO?_%KHtK3FgI9^cQX4?g1X!A~4M
z1c<{2o4NWBBn}@!#Nk7jIDCi@hYwNW@F7MVKE#Q`ha_?MkRc8q3dG?<g*beu5{C~p
z;_#tP96rnuhY$0_;ll!P_|PDZ>&qf>Twj{RkElOO#J{V&Mf~Kp`*SV-8D0<1RvsV@
z4{hS`Fi0F8hM22|VdC&GLL44OiNnJfad;Rf4iA&W;bDe2JS-50hZW-Ruu2>r)`-Ky
zI&pY7M;so`6NiTj#NlCsI6Pb=4iD|sULVHPhahqI5F!pA!o=Z2gt__<B@Q2A#Nk7n
zIDAMFhYuOz@S#8)K2(UqhbnRSP$Lc>>crv09C7$CPaHlh5Qh&9;_zXSIDBXlhYw4{
z;X`PR*N1KDLzp;xh!BSlQR46+#$0`f6Ne8;;_x9u96l6?!-ooS_)sMdA8N$mL!CH$
zm?I7!=840H1>*3bK^#6T5{C~>;_zXKIDBXkhYxMy@FC*&`jAu~qQv1tj5vIV6Ne8;
z=ITR+ID9A&hYuCv@S#c^KGcZAhdOciFh?9d%oB$X3&i0=gE)LxBn}^%#Nopdarn?8
z4j<aY;X{WweCQH~53w`7K9tpmIC1ooB#!N7h*!0r0`Wu2lV^Fi`)=hK;_$FQ93EDP
z!^0|b^{_@99@dG&!#U#caGp3kTp$h)8^qz^B5`=wBn}Ukh{MAcad_A!4i7uT;bE6J
zJnRvNhkfGk!Fq}J_`)9_ad=oD{($;WAr2p^#Nk7YIDDuxS0Cnx!-sj|@L_>Cd}t7d
z4~xX%Lz6gsSRxJ|TEyW)n>c*v5Qh(4;_#tI96t1k!w2hZ@9~8XKH~7fPaHl3h{J~}
z@kiB%8gckgCk`Lxh{K0@=IX-&arn?64j&eY!-pnu_^?DAKD3C#hc<Ee&>;>Vy2Rl_
zk2rkj6NeAhIo{(7AAH2&gP%Bj2oQ%4HgWh6Bn}_y#J{dS%n^qV^Tgr90&)1zV6Hwa
z5{C~>;_zXKIDBXkhYxMy@S#H-K6Hu0haPeG&?gQbtaH7`7e4ri!v{Zc_z)ltA8g|A
zAxIoPgowk3Fmd=WPyGAp!vb;m&>#*U7Ky`$CUf;+i8y>{5r+?L;_#tE96of3!-pPm
z_|PW~AFN;S9$)z2BMu+@#Nk7LIDD{)!-pVo_z)rvAHu}pLxebdh!Ten4dVWPa(_Oy
zNE|*iiNl8_;_#uxTzzO0hYua%@S#f_KJ<vghdy!mU<JI#7e4ri!v{Zc_z)ltA8g|A
zAxIoPgowk3Fmd=0Ar2p+#Nk7XIDCi`hYwBS7pf0S#PN51E#g6~-zJXycZgrE^}EEw
z%3J4okJlHz==#|vzD4VIh{Mk=aroIIj_-Hs6CZp&{e16u;HQr`elOV}z8(8Z9P_J3
z9OLg3$M~(6ddGw9`iNtG`H5rv0pgfnHgU|aAaNYu5OK_}FmcSU2yx7>C~?fM7;((6
zIC0FcByr5I3~|h_0&&c*3USP@DsjxO8gb08I&pk%ZjLxUpW7pTtNPO?o>XqV-0Q=e
zl>3Ndp88+mt)JKW0pf2~ZWFI54-$W;@(}U&C=V0APkDs+1InYsKd3xL{Nu{w#2-~|
zU*O%}FDVZae@c0X_z%EsZ$GDh!96d9iDO<zh+|$yiDO>JnCo>jP8{<(NgVS!LmczE
zKpgYBLLBqDN*wdLMjZ3HP8{=kjyUG^JaNqH1>%_34dR&Bi^MUno5V4%mxyCtw}@k2
zw~1q3cZi>>K143`dUBESDDlgb$B198JWl*N<w@eNML)0fZg)5OA&&2hP7=rWMQ4cP
z`=SfP@qN)1;`qMkD)BQv?ar4P@dy9W<#potzUVpP_`c|Q;`qMk1>*R==mv3oU-Tkz
zd|z~vIKD4>i8#J5x<&lF&**&%aeQBNhxnM*?-CCy?-9rMMfZvC)cV#%ULWGheZ==D
z_Y=qWMF)tNwZ2XKF6BYu_`c{6@%L!`FmZfebc8s*FFH#6e(fhl9N!n6B%aWDks*%j
zU4b~RcNOBe-c^a?dRHTk>s_5Vu6J|9alM--j_ch5aa`{T7kiKUKHc96aa^aW#BrUf
z5yy3^&RnlkbHs6-nkSCy)B<r_ry9g@omwQ0>r|6Cu2W0Iah+-r$91Yr9M`E1aa^am
z#BrVK5yy3^PaM}NYpwTq;X36bj_Z`4IIdFx;<!%P#Bu)_B#!&f5OMsTt4jQUdQ~G1
zuj<6%)f{nnHP2kVS|ARu8pPq%B5`=tBo42Zh{LNEad_1x4zD`I;Z>J7yy_8$SAF8}
z%KAmGC-BNg9A5c}!>a&scx4lZS3%<NDnuM!)rsG&p3D)4C-cPN$pUeB(qOKhEE0z&
zP2%umi8wrI5r-#j;_#$H9G-NE!;>Cyc+w{hPpnJ4#~q&dh{F><ad;9S4o__2@FYka
zo`i_QlQ403GEe*+>cawY_|PB@9~OzjhbD9NVTm|=Xc31GZQ}5uLmWPIiNl8;arn?D
z4j-(b_xQpGA948LCk`J1#NmTY96khz!-o)Y_z)%zA0ouzLzFmtXb}Gc^<j}Xd}tDf
z4@<=1LyNik&?XKaI>g~ampFXr5r+?b;_$(`)O&p4gO50T@Dqm*0pjq%CJrBh#Nk7T
zID7~bhYu0r@F7YZKE#N_hd6Qg&?LT~J}eQ34=v*Gp-mh<beO9TUE=VeM;t!%iNgo$
zGVk$)4?g1X!A~4M1c<{2n>c(35{C~V;_x9%96m&d!-ptw_z)uwAL7K}Ly|ar$PkAQ
zE#mj64{hS`p+g)#bcw@<9&`1fPaHm2zvMl>@WDqMKKO~lhX8T-U=xQALE`WsL>xYZ
ziNl8oarh7=4j*E~;X|A_d`J?94;kX{p+FoyREWce4)G7D4_)H$p+_7(^ohd<>s8+4
zJNSJHarodT4j%%<;e$;aJ_L!whY)f25GD>EBE;cClsJ5d5r+?P;_x9!96n@-!-oQK
z_)sAZAF9ORLyb6m=n?;j`p_p1AFRv0#}_{Mh{Fdzb3H!<h{Fe)ID7~ahYum*@F7eb
zK17JahbVFQ5F-vB;>6)Yk~n<G5Qh&1;_#tD96nTu!-pDi_)sSfALfX|2W!lGd|T>+
zk2rkr6Ne80;_$&{u08~b!-o)Y_z)%zA0ouzLzFmth!KYmapLeHNgO_8h{J~harjUn
z4j-z-;X{o$e5ezL4|Bxf!#r{Lus|F>_^<H#@VNRAAPygF;_x9z96p4Ys}Et~@F7AR
zK17MbhZu4A5GM{FlEmRdhB$mE5Qh&H;_#tL96r>D!-qO?_%KHtKFkw`4-3TMLxVVc
zSR@V~?2y-oBkDttID7~ZhYw-m@FBuneTWi=4>98KAx<1VB#Fa^3~~5SAPyfY#Nk7g
zIDDuPhYxk)@L`TPe3&N=9~OwihX!%@ut*#}G>OB9CF1ZQbfwpa@2U@B;_x9t96m&e
z!-p7i^&w6iJ|v05hYWG}P#_K;D#YPKl{kE;5r+?T;_zXPIDD8V4j&eX!-ocO_^?PE
zJ~WBLhb7|hp+y`%w28xq$U3hNEA{WqqQv1tj5vIV6Ne8;=ITR+ID9A&hYuCv@S#c^
zKGcZAhdOciFh?9d%oB$X3&i0=gE)LxBn}^%#Nopdarn?84j<aY;X{WweCQH~53#Gf
zKAfRG#EHX)BysqVAr2o3%+-eqarjUr4j*d7;X|D`e3&B+ALfa}hXvyBp+OuzEE0zg
zP2%uji8y>{5r+?L;_#tE96of3!-pPm_|PW~ACl|6J_OW<3~~5SAPyfY#Nk7gx%yBe
z4j<~o;lmtp_%Kf#J}eN24-Mk*VUaj|XcC7HOT^(ri#U8}6Ne8S;_#tM96t1j!-qa`
z_+V}D9$)z2BMu)5#4lDKD#YPKl{kE;5r+?T=IX;7ariJ#96l@%hYt<n@L`cSd}tDf
z4@<=1LyI_kXcLDI9pdnzOB_D*h{K0Iarj_e?LEHm!ABfE_=&@Z0CD(GCB8v@s1b(`
zb>i@0jyQanXRbah5Qh&9;_zXSIDBXlhYw4{;X{i!d}tGg4;|w0p-UV-^oYZUK5_V9
z{j&G?!UrF5_~0iF9|FYTgH0Sh1c}3kI`K{F!yIw=Fi#vlED(ne4d&{@B60Z8Bn}^z
zh{J~#arn?C4j($i;X{`=eCQE}4}IeB!3ukiFMRM3hYx<@@F74PKG?+JLy$Op2oZ-5
zVdC&%p7>7nVSzY&Xb^`Fi^SnWlezk^L>xY}h{K0Aarn?74j;P2;X{u&eCQL057sr_
z;|m{r#NmUVID7~YhYvP!_z)xxA40_8Lzp;xh!BSlQR48SLHsuLVUaj|XcC7HOT^(r
zi@EyHCJrAu#Nk7iIDF_4hYx+?@WHy)dwk)8k2rkr6Ne80;_$&H4j+QV;X{Zxd<YYV
z4-w+<Axa!R#E8R(IC1#UB%V<pmWacL7IFB{CJrAu%+-f3arn?94j=l&;e&OZ_xQpG
zA948LCk`J1#NmTY96khz!-o)Y_z)%zA0ouzLzFmth!KYmapLeHNgO_8h{J~#@jKLq
zHgWjSAr2q9#Nk7ax%$v24j-)Ry~h_m_=v*?KXLdFAPygF;_x9z96p4I!-p_&_z)ou
zAELzJLyS0lh!ckoN#gJ!LmWO7h{J~parn?7ey{q_B@Q2Y#Nk7qIDD`+dXMkm?;nW6
z2S0K65Fid8Y~t`CNE|+dh{K04arh7+4j-b#;X{l#e25c=4@u(iAwwKK6o|uz3UT;Q
zB@Q2I#Nk7a`0uI@ed6%J`W5f-g%3XB@WIbq&kq6O@WCbyAA-c;Lx?zh2or}75#sP6
zN*q4Kh{K0CarlrV4j(ea;X{Eqe5eqI4^`sup++1&)QQ7~IpXladbRiXzE6Gd5r+?c
z;_x9r96s30)rTN)_z)rvAHu}pLxebdh!TenG2-wcP8>cYiNl8sarjUm4j(GS;X{=;
ze5etJ4|U@3VU9R_m?sV&7Kp<Kf5hv<pQ;Z5;_$&H4j+QV;X{bI`Vb}#A0ouzLzFmt
zh!KYmapLeHNgO_8h{J~harjUn4j-z-;X{o$e5ezL4|Bxf!#r{Lus|F>G>F58MdI+m
z-sJV+uhoYjarh7-4j;nA;X{PE`Vb`!A7aGeL!3B#ND_w+8RGDvKpZ|)h{J~}arjUp
z4j<~o;lmtp_%Kf#J}eN24-Mk*VUaj|XcC7HOT^(r=mxJ3A5|a1#Nk7PIDCi_hYvC4
z>O-73d`J?94;kX{p+FoyREWceDslKwBMu+x#NopnariJ#96l@%hYt<n@L`cSd}tDf
z4@<=1LyI_kXcLDIk<DHoKB+!Li6=hmex5zH#asVztsf`;ZRN?W-un3ba)$V_)-MoW
z^(A+^72+>ZUM2o=<u&4$C@<XT9Zv}T6Nmp*;_$yl9RAmttN(Mv;r~2w_`g6L{x^uj
z|3%{PzeybaFA<0TE#mOMO&tDrh{OLbaroaO4*&ba;lH)bd%WPkk2w7I6Nmo+;_$yp
z{5tqU96r>E!-qNI@L`_0`mjJ8J~W8KhehJ>p-CJ*ED?tfE#mN@O&mUSh{K02arn?9
z4j=l&;e!?R9$)z2BMu+@#Nk7LIDD{)!-pVo_)sUlO?{XnKB?!!dE&Qd{RQIZ>2Yrm
zf4$aUBo42d#0Oq&_vS5y?GlH#4dUp3kvRHqGS~4h5r?-e;_$Xj9Nu<_!`m)#c-tcm
zZ~MgIt@Rpjey-Bv<s**odGiy$R_h0dZ&Gd(->y7J{B_Dh#NVJiOdMWCi0{+-QQ~JS
zj}b2^j}yOBd6IZdd4~9$@&@rKcuRand6W1(%9n^ALjBizkJmd;pE!O#S%`V-qujjN
z<)_MR!%6O~gY)mw5I<#zw}$v>L%co2*9`H_5I<{(S9Lt5@0Sd5OZkh0xg&E|`iA(*
zq5A$Ie)13x3~{q>c-ceTYy+?05XUo{&AQMKUxmsN!$bV^As!jxFCOC2A%4aXj}7tF
zLp(mj{X;xC#Lpb!qs37MMj04oV3dJT21XeeWnh$nQ3ggC7-e9TfuAe`(YwFliyk`V
z<L|Jn=x;U4E0<f*yFcdp=%BOZ4Sz4Ime>9d`FG6)5m_?jj9GuAvn;Q*A2DU~7WPQX
zDL-V&=B?|Irc-{vl+9c2BMqngXQpi4!XBA-%I`O2^Y-{i-6{WpDVw*lN2*Twou+Kw
z!X7C&<+qu#dFy&4>6G7U%H}Qlk(g7y!<5Zi*dr0ATr_3#*7ZopDZj~-&0E$Zwp0GX
z+bwIdE@kJn=~p+se$%a+cHg>7G82p*IyX97zStj~J!f2+-Mzf>&X+%I_BDEV!=9H~
z)|JifYo!whbyKnwJ+wt?AKvg+@-u+wp^C42y@9hImE5km=Wk`Nqq84==vwLEL#CH2
zzq#Y^hBrw+I}Weiyw<Wd&3<#o?7#0gyz#f?Epqol)AaDhx5@hG>_0uU0j;_|S+Qew
z;$oky-7PiEaq0e*q;S*SA78O!_WRM<FUw}4vmf6&+jY0o+<w=`{IbRFKR;`R^RL~l
z61N}vyNi3K{9DU2%^inKf3uH2Y%;g$gJyKyq*;0R@&{jO2DovJY{}W`nhUmA)@u&^
z#y7>7<#Moljci#qDI1e*Z=d~`Y+JY5%}EbCW}obS?ityf3~u|~&%A8rV-Lz6xe1VF
zS2m-E&v{IKRN#!Dd%fAF?Cq}%svO?fJYNR$5?x`;a=MqD|Ee_WeqZ`FEzTKQ@rP!`
zSImmT8~>{;oAm*#?=HD3&XW~SU9{$cs;rP2|B4)4Y6g2WH|3q(Uv&=LBa+eX^|JEr
z{wcrtsp#CHbAI`-G(5cJl<!~TpZU<5zuoXlQl7k~aPrLk(b*%iTU(t?%Pu+t?k1(}
zQ@?M<vIb+3@vS$N4qv`eLU+uJOx7EnWkhbX^AwsRgFQE6?^?&I{nY<)yD&2hwZA92
zcR#Hw<S-6aeATRYP*)6Rwry6-n-zyQeoB_jpv;_eNA%lHi!XOCkSHgO>$|7Q!l7{Y
zj|@MwG59-!Eu6FEJj>cX+jQr}p{P%0%p0WE!)AEhE1df7jJZp4A~VL{T_te)?8luM
z^I`GOGh<|WbWf8CJ7$kOv<>5tF`gouktBZ0mR^tiK-M36i*?g)Ew8!fi&AOF>~i<r
zvUky0IfNhW9+dUbLvQgJdw*lo-(BL=_>GGLo1C57bm_;on7Os}$|r7`ee$U@)sO9W
zKO*)n+vPWQw@X)BW}h}^&K+XStkmzx!M<P4F-{Nv*tu!;E0VC-7C9LNWQQ|mKr#zj
z-(OyqdGm}vew&$d|IN(PyFTvB%fFFTN4Dt+B(Qz9FC88fW6eB1wDI$A)2^knmr7?3
zo6}|Y+tNmMJtW&T={S0Ly0St>)~!l(4$od$ki%m+JHBxiy~#HI_!m5f$1fXrNGd;U
z&cEHdEM2+Wot4nN*zMwj5@bL(4t4R?b3I*{BYZ$=J#2L9-elT#pLRBXmuY+WoLjv0
z{YPFZyFIz)f_tRCIVEnNee8|V*{??LexeuKz4glGm5<6!rlsxjIcsJu>pPQUsB6x9
zvhG2l(Zi=aV8S*1@;P!j`&LbsW!Fzz?&$IRDCYU$jk8Yu4R1B!(r2S`@IL&lS4ThG
zKPkH66Vb<>EuSqNd{8^^Eq7#STPzFhs~cY@?W~zAZjIi(@jpu~IYeJ8zgTkeO5xqV
zkX`L&WUQY!<x(lH_;s{HJo09<w}V}%JJ%Us_f~0V27G;V&4&VVp1J3X<x9HtzDc&6
zmAgk996I|w(Yu=~YO!Tg^PVrxe9K&~rOJQ$^^eLR{=$s=&>0^v;qW;Z`OGECowNT-
zI(yh$hPw~SlI2|YrRC8py{sC&Mj04oV3dJT21XeeW#H!`11qeR*4omwww<3T+xba*
zIz62)9$aOW(&ID5Z26#d(L{POn@iifwr<|MW7Do(iM5wsbxp}$yKZ9D@Vf0gx9{Gy
zearCLRYOg&dgrY-*=wccX?ybdDZ7*(-)C>MCsR|U^mVI*mJVdg;~6`+FI~(f%Hvb1
zQt49r&9Y)VRT>^b=~`i@Y4dV%Mp{dGPcfa^XO?8hshO#=sq_}pZ82S*DdwEb$?CVP
zvQ}M`&P`+|kM6<RQbLA)S;-Dn)`r5BTUS|T2<a(nCO4JZlb%XXSlL{*Y^8Ds6K1pN
zvXx7hvy<lE>C#?nBE3I1Gc{$6=cfxZ<#Zyof3P?%qbjEdMM?0UbkQ2mq{sIq#`C#y
zx>B~L@~MeLAwQKJKRBpZOgmd1lxIrPtduUNbCdbvc-kzQB$_A(tB@+Drp>a`aWQ>+
zp_ngQvgtv;`%_cd2^mSETukLkmaHF7mr9m&w$CbM_l{?#CQRIymEDk7D(4F(I=f|J
zYRcLxo09&e9u^FdUb6X|Sy|Ydb=Emi%Ht{NLPI81Oq)8Xa@pODN_yO^oGfXAa`}|m
zkr}Bbo6F`hW^HjQJ1uE<gJBZ~bI$m)WwTAw`Ao5x&XujS^e4G>VyT=O-{&l4((VxU
zr4Q<6q=`9zT9N_jCNv-GtX!_(rpVnyE?=6-P7IT5VLXwQgJ6{omZtK16DG-6p3V;z
z<=B?;Q)x@k%v4%x-9D2pl~FWD+>-pH_GG8bG#k(459F+g)OaGlKV2+lC(=&AIe?{f
zsyLpJBXuAr3QWo#xM|jG%kWGh3MSiA+1yMek)58NDW~?x#zoJ>o@{O+A@c0ciZu=>
z%Eo0zISwaM6Vut8!;7f{mf5)}$&a((ZcuhJ;qHXl%Y@X*I{Tg&&lK|_nSlj4D8@;%
zm>M5<q1&@zBC3mWW(-a*=~8~i(O6cOv(qjumk*jbF`3<KP4CM|E~nBm@vYR%M7Eqb
zAm<}2n~76&<)1YBJ(-<KOSfeapeSds84+S8=SH1Jlf}GQ=gg^c-jXv*emrX?=It~2
za>^VxXF-Ip4y0rjS=sz}*_mCAv8h5qP8rs;Y)eiF+1x(qE0>aC8}DSsyRAh@XQL9`
zF-tDYB+E-<*HGDx)4gQW4ANaL?a>7}{bdq`ba7hC=J1!Wi9{~1b60#X9xUXu(m`rs
zBH@g~?DqaNmgRI{CPi_2A}hvBSaR~PrgHldCQss8(OuXxrNqn#7fog-^X}4AR?;c^
zA$w3Lo0*%JAf7oPGgF>0!WPrx`<={|Bs-E;GZaTSt2|y9oGJ4Y3Z2z*>~d*QWM*Pe
z!#Fi4WOI9qCTVFmh_NQ$=uGsIvoA_Audz6j%T^MlvZQPpNVg>8a#l|4m2)K4l|+>Z
znUKZ&L3gb)jh%X~Xl9Sh{-v|~WfGV6-fpFHGKFWv)XcP*9%jxsBP+_aGmU<8Q&QJG
zC%B7lZ>TG2naHO04r*%`xk+;-IeKMr`mIbB2g61CQd(BY6sM}$(s(J$&{YsMbzD=a
zgVxb87K+*ZS~PRdX))NZ$?P6E7v%TN3=R!Op^42Gd6QB>6f38HXvIXXI5=pSdpLvZ
zMNqDP)6UGpR`%uhyh%h!%%t~=%)F_+X*nBa#~GVMI9G^5{^%CbV{u|S#SO+~DwXq~
zGS8>vA|(?;^XB^Ih;FWx2U5ieIr|S5<<MLEv*`oY#Q0I+<vL_8zee(DIb$U5NEh=~
zA#E=EgERg*IhPEb>`h^Cz7!Ya1h7wTBc@VvF4$vE$cfTS$vFkNN7b=a&J>f=a<17^
zm~?L_oO6Q6Cl_zIjJmflgHykp0}`py(d&zQ33MiVNmf|pnH+XnPI;;EOv2UOTt8&!
z*=gBaDW7w0W>V%RL8hTO56cZqVtQ}Ua<V?@?qtH;t;k)5?C&}(m&VHjk+S2{gXL6Z
z-3DwpTNqYbPT5$IDo+>o4-R%QFZ-egXd*o>hZiNOR~o7}QNoIBabj@j3xz^{u$`%?
zLAT?nK?cg^La1BVGc_Zpy}X>76=Y||_hY$ODITqvElmu1C}i?O<I1Mf>2PS>l^fQh
z;dm-LiRIMP(aAqK*zSI_IaJv<KFG!GGPYuQFvUvKgBEgIBDbk~QgXpB$f;58SqEh~
zo0%CQ*5y)rC3!P@WOU|wme5-vv*=i57TlATNW53h0CK0|EEQ*RR^{kPdwS@28WR)F
zG_}SHGe!luYPc7uTs|igMlJzzMNP<iiriicw>W0ga(ye^E>mCbou{4aRMEMHm1N$f
z<sH~eNk)6Ey|!de%DaJmV;MWO-?>6&ox9G`nDeVX%i5funVPV3d3h5fv&5;NFWP1|
zWP|edW^Kv*`oyxXxXj+2$x3f>A+#l?X3BXv>tx5p=!3R%P7%Q;#%y~_-svtaw;p5m
zWmmY(t&6ggG8-oC#ICJ7w(q<(p14uo6umYPO$^@c{O}snbd}tb%2w>Gxp_$2?%N-I
zkCn*6(uDJtCpdHE)%NgPAnD-f{*`8&dx}YUyK&GSpOTp%J2%+F9~$4L*C%3IZ@Fpv
zu3g)2-nr|T1O4GDtE}Dmye+C6w9Va!=GrdjC2wWf2U$qvZ1?8I-Y@4l$#tr12iKNf
zC4;$iY?Za*r6*mtUiRDkref(U%ga@X3twGcUXZvb@f#Au-R0$&#Nd(T<)%dQI@=A_
ziaTzxR#f~eUi#t}`RXfP<Tw0f@_*;a<>gmu9TSdS-?A>2b*6qqw%w6vt@huzI`En`
zryuZDtyiCa-DOu@>~v$=%a8Z1zSNVSmJeP*S^72kKPh88VCbgR{(Dz$S{=Ciq%EuM
zm627e16x-6H?8(<I^DWnR@dbJk4YZ~%nskM+W(G~H>?i4{iGXK+lNoSVRi6#PT90N
zbnmH~R)_C?(U#T8lkAgMXWqEFaNFw4rqxMlF?x+MFv`Fv1EUQ5lo=>I;&u?yi0<$?
zyFkk&wtj96>rek2mfU!muD?R#H_RUr$m?1yM>OvJlFNVA|8CRmV}EYe@@*Qk8Y>#_
z)%Z@0@7MU38Xwa55sjbJ__)S@(fG68&mVVtPwO~W=yT<R`W*RFwY*y6xf)-g@fS62
z`;5EA%e8#9##d{6jmBFv-lj37F{2Tmf84L--_-avjlZYyeH#BnV?*PIG(M{FF^yl;
z_=Lu9Yh2dI`8rMa|16CcYUFx9FXeyG<9n6n;d+fxjk`4N(RjPY7ixaqqWf3V_;!u&
z)%fQcAJBNFdiJ1}n;IX{_$iHF*4WedtVW;a->>mJjTdPg(-_vcS>sNPagBR4?$cP-
zc$dbvX?%~yKhpTe8t>Qmutpr;N3?uUKi~RkEq_5H<~2T-jL#wWb^S_hce+M=4j28O
zXc5;%d|n)%569=h@wr>%1)uN6=eapg_<S}#kB!e?<MY<|d^J8#jn7Zx^V0ZyG(HcF
z&p#v2_<S=y&y3G6<MYb*>2diRjX&=3=xDnaN<NypKaXnstj7CvzCNzy<K^iIUH{~d
zsCT^fFEsv?=HZ3n$4`5FzB9^)Cxy?9^5MzHsrtTaG+*4b_(%Eh_)*gL-OraF_}(IX
zPZ7SC2;W15?;XPT4B>l)@I6BK-XMHWkZ17EL*Wh2vmeiOJ|=X$IgQVCO5S{e=Ht1}
z1Nysnln<oBiTL>(&sR^SpQ8ePe)%r-<h{?+ygX4-`u-DazC7P~y?>MsPl_KN<-?PY
zzdyn9@y|M5&rUptg(pw2d_0G{e$1|()_maSg3oA~e?G`RN5s!hPx-vt?HY~8+{g(8
zH|y`C&e#39;03Z@Kj|U3MEC!>^5az}cs|!Z$N9j|-G9>K^B>(V{2b@wKYD+D+U;Mj
z`8ZL2{_#2;@2_e6Y4@MoXGZz(q~y*MEFYf1KM#d>pJ4fTzURxr36_uNK?zTn50CQU
z$;W3-uzc`*IilnLuEvwwp56Pg3ukNl6XkmQWAEUP*<fsx4^PInjq>5iM{<-8Pd>_;
z5By#bzjvrAKcvyq=mlJO_s9>=(S2Z)4^KWG9p%H5kFSsN;mODMNBQvN!~aiiaddy`
z=HWu*L+}5CTE^dHU#{h_Mo%(dfQ2vU-%D-L{pH`Y;O~0z_tyA(ZTvj`g_e@-nuil*
zzC7Q5FLkTtBl&YOAAXe2Z_+&Ayz!&zztGp`H6L%*Skw4UjqlNTpT-9?eo!Nx<NBzU
zPxO3!N%!L^jX!vS?AK3vTu%Rj+ww#?pPsAZ{Ym%tN42|1`@c-%)f%tU_*#uWs-G9i
z`rW#}qt6q%M>L`N7@beueC*SFoG9lHz8}NS_nxcydf)`j$8)v++|S)7SU!0C&vp47
zqkMQ$`Uj(Yc=E9@%7-T(_l)x4$;Ssq`S9f9BcptH^3fXQ!;_E4NBQvN<H#r<o_u_F
zln+lnR*pWO?a9X(qkMSs5g6selaGr>`S9dp!zdq~d~6!!!;_DlqkMSsaoZ>#o_u6R
z`S9f9j!`~5`M7tK4^KXRca#rLKHfLVhbJF@I?9J9AAdc{hbJE&9p%H5k56ho5~J@w
zb@KAK=HuHMJ<0kxEa30qmNg%%^mzd<(fD$Wm;47kN8o3DNJ6@wC(7S{Jm2}bZj=vC
zinnP#@crJmXc^D%c)gbS@7g_`ynqU+=h=^+_4lv;w1;L&^D(3G9*u`I{<Qs{Xzkyj
z`~5=w{KkoPJpO+h?+P8zeHvek0XE%*Gql{-=Ovw|<<=AK!b`P`XXBl(<+iSW8Lj{B
zv&(sd=1y`~n8)Fn*Ss#A>K3i0uGhyZTDA`KbX_0Nc#cK{ZDw_Eb{nkJ6=7|Eww7O{
z@fS2!?spf!!@7X|iu{?o{xq$R?f+oe-5<eD)n~Qb)g4*ZGV<Kha#Pnot>v?@VeS8C
z9XdB*gx)P*-E_-0YJ9!MNsaq8-mCHVHU5dl4`}>^#?NW|y2kHmwDeVox#QS%^XAuX
z-ED2#y!l!?xJ@4KE6<y}@~W|QV^<~$>q6_+%cD|5cHzqPmnyXZ(a~|D>HTEu+snLq
zwSbk@YODH)yXJsaFvod?b&l0hpF7IO9Dbfv(eVx+zGL;j&Z&Q{#d*HY>F-P{{<wCm
zVW(3+Fz^^_CuHF)6o%qkkHg<}9Nst%|DVEb`JZ3cNaAekJS(I7i}goj;Y{m%tF9dL
z<==$+rT(2%|7qdJ8vl!>|Fc%Ev2dP2|7RJF!cbg$9RAC~&31X5TZG#~?XkE^_?auu
zv)~<g(s1d?YPsZOU6B{=m;Ifi{n{w}Eb*uQ3AZAiL0WeDImc>#&E<B+jdy83RqE%k
z_T&GsThDo3s4TwI;Vv8g_s@^RKX@Ge$>Z>^2se4)@qF_*^<N|>R(p6Lx_qJVV;!&c
z$Kl(Bzw#LUOqlvB<!s;du6&bl;~T~a-{b-GxlB4G527xOja%}3>zRT)nYJQNIF={;
zrlu05>_oym#k=f0V7N3EvJzX~uyfN*+c#T@#I~KcCbmWsH{P=8rmczSmRqdEwjDR$
zuxUr)<{NL^wRLx5_of?mY)weT@$$h!TKaK1mnR>ON4D+Qe#7R(y0NRQ$znP^D6SjZ
zAl0X`d*so}<&->Vd^(lQ4c1<@URI`y#a!O>A<zBIA6@6P+`jv!gzQRsZ?+`QO5c6c
z<|%nr^zPK2skHN?<y>kyZ6(qZsdCDibRO|L=-O#+9_lPPC{Crl{Z1BU%14Lnj?r}D
zj#0+6SDqHG12(%mQOYMW@~~*xY-0P(vSK2eGf(TDkd5ai<N@E~&hv<6=L%ALPe~qA
zY<5<j8ZA#iPbB2w@wxJ3Vmz~NFubeA*2|;O<q6CKVn&g!WXm2f={#TBgAxfzLt^XB
zEeSKS$>a2$Fqu1M-Xt9pGX+nVg5)6NiykNkxssBHp660i**oZf8kr=oa%8-1tg+I;
z>2hk1#B$M%87xT((#3)`CXb#@kL}IPjLD<2#mw?SMVoHeet9{y*K*cnQl*SFHgPZ~
z{kgGRbl2<`u|*S4AtCFE>8X@ypi70RvNh%mb4(V;_U5H%9>8vmIZlif^NuuQ=}cm>
zC>A9$6VeGvZqr1nSWL<AObaZVR%Tn$fiteubaq_Ek}sRh2r(v(i2&Bvcz$|XF4x{!
z`;(W<{iU3O<l5}M0(ygoi2F-j?bOAxi5JQL=Gu(*VQr5Xk#)nb!TCht)e?u>SF}B%
zJ=|E9ah(9(A`9P>dpvA^PTL~}wIc3=dHd!*z})kqJ?@JUJ0>~uGPjdv5$$n*`g&nz
zlW33oY(!hzOBLruMeB`1%{?sI<GvM<$B%l5`=q_OCq^0f#fUL?vp+O`#nv6NU~-D~
zxGzV<eL3e3`+twNzd<|1eLf=I7nt^@4d?Ie!c0!|cHJ6WkD3DIh4&9`>4%U1tgIMr
zZ#UhVh#j{$_*d>YoEPG|r8L~$`mk2ei18{Nj34|DsXgv15T6|EnA-!ki~jx#wa5J#
zBHj<7KKuWt+8)Ob_fv@R)oAHPj2qOvEi?O%_P9?*T#t$xdHWAj`?}s|Blf91Z~swQ
zZ-%JT*24Q<M6QoAqWQJYnxXc1UyO))QpI_pjQACyX8*DM!TXRW-1=5lqXyesw7w-}
zbBG7+^?e@RhjdKm@`47~E<Bi%b;H|l>icTMpws&3KWLBm@6;ae%Mq(bn>tHqhiEoF
zynWo)Bl7$kw9|f1mBxl)|8d`l_a!(_U>G(QG@h>84-rTEtB9Min%8iIyY3qI-$lLe
v@9Dr^F4)W^j347N3}bg&y8kZhcDL{r>Kxle-7}BVe(kTiHP5C7RJ8stqpf*V


From d2a5f10bf1f123f6d9a7fb4be4cd92f6e95c129d Mon Sep 17 00:00:00 2001
From: David Wang <00107082@163.com>
Date: Sat, 30 Nov 2024 21:49:09 +0800
Subject: [PATCH 307/368] sh: irq: Use seq_put_decimal_ull_width() for decimal
 values

On a system with n CPUs and m interrupts, there will be n*m decimal
values yielded via seq_printf(.."%10u "..) which has significant costs
parsing format string and is less efficient than seq_put_decimal_ull_width().
Stress reading /proc/interrupts indicates ~30% performance improvement with
this patch.

Signed-off-by: David Wang <00107082@163.com>
Reviewed-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Signed-off-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
---
 arch/sh/kernel/irq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index 4e6835de54cf8..9022d8af9d686 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -43,9 +43,9 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 {
 	int j;
 
-	seq_printf(p, "%*s: ", prec, "NMI");
+	seq_printf(p, "%*s:", prec, "NMI");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", per_cpu(irq_stat.__nmi_count, j));
+		seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat.__nmi_count, j), 10);
 	seq_printf(p, "  Non-maskable interrupts\n");
 
 	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));

From 21bcc49974c2a45c6c5e8e5e500ce6642e4328f1 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 22 Dec 2024 09:32:07 +0900
Subject: [PATCH 308/368] sh: Migrate to the generic rule for built-in DTB

Commit 654102df2ac2 ("kbuild: add generic support for built-in
boot DTBs") introduced generic support for built-in DTBs.

Select GENERIC_BUILTIN_DTB when built-in DTB support is enabled.

To keep consistency across architectures, this commit also renames
CONFIG_USE_BUILTIN_DTB to CONFIG_BUILTIN_DTB, and
CONFIG_BUILTIN_DTB_SOURCE to CONFIG_BUILTIN_DTB_NAME.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Signed-off-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
---
 arch/sh/Kbuild            | 1 -
 arch/sh/Kconfig           | 7 ++++---
 arch/sh/boot/dts/Makefile | 2 +-
 arch/sh/kernel/setup.c    | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/sh/Kbuild b/arch/sh/Kbuild
index 056efec72c2a0..0da6c6d6821ad 100644
--- a/arch/sh/Kbuild
+++ b/arch/sh/Kbuild
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-y				+= kernel/ mm/ boards/
 obj-$(CONFIG_SH_FPU_EMU)	+= math-emu/
-obj-$(CONFIG_USE_BUILTIN_DTB)	+= boot/dts/
 
 obj-$(CONFIG_HD6446X_SERIES)	+= cchips/hd6446x/
 
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 04ff5fb9242ed..89185af7bcc98 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -648,10 +648,11 @@ endmenu
 
 menu "Boot options"
 
-config USE_BUILTIN_DTB
+config BUILTIN_DTB
 	bool "Use builtin DTB"
 	default n
 	depends on SH_DEVICE_TREE
+	select GENERIC_BUILTIN_DTB
 	help
 	  Link a device tree blob for particular hardware into the kernel,
 	  suppressing use of the DTB pointer provided by the bootloader.
@@ -659,10 +660,10 @@ config USE_BUILTIN_DTB
 	  not capable of providing a DTB to the kernel, or for experimental
 	  hardware without stable device tree bindings.
 
-config BUILTIN_DTB_SOURCE
+config BUILTIN_DTB_NAME
 	string "Source file for builtin DTB"
 	default ""
-	depends on USE_BUILTIN_DTB
+	depends on BUILTIN_DTB
 	help
 	  Base name (without suffix, relative to arch/sh/boot/dts) for the
 	  a DTS file that will be used to produce the DTB linked into the
diff --git a/arch/sh/boot/dts/Makefile b/arch/sh/boot/dts/Makefile
index 4a6dec9714a9e..d109978a5eb9c 100644
--- a/arch/sh/boot/dts/Makefile
+++ b/arch/sh/boot/dts/Makefile
@@ -1,2 +1,2 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_USE_BUILTIN_DTB) += $(addsuffix .dtb.o, $(CONFIG_BUILTIN_DTB_SOURCE))
+obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .dtb.o, $(CONFIG_BUILTIN_DTB_NAME))
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index f2b6f16a46b85..039a51291002b 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -249,7 +249,7 @@ void __ref sh_fdt_init(phys_addr_t dt_phys)
 	/* Avoid calling an __init function on secondary cpus. */
 	if (done) return;
 
-#ifdef CONFIG_USE_BUILTIN_DTB
+#ifdef CONFIG_BUILTIN_DTB
 	dt_virt = __dtb_start;
 #else
 	dt_virt = phys_to_virt(dt_phys);
@@ -323,7 +323,7 @@ void __init setup_arch(char **cmdline_p)
 	sh_early_platform_driver_probe("earlyprintk", 1, 1);
 
 #ifdef CONFIG_OF_EARLY_FLATTREE
-#ifdef CONFIG_USE_BUILTIN_DTB
+#ifdef CONFIG_BUILTIN_DTB
 	unflatten_and_copy_device_tree();
 #else
 	unflatten_device_tree();

From 909f3c55d887a9f9d4cd2762813cbfcaf640ec57 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 24 Jan 2025 09:39:19 +0100
Subject: [PATCH 309/368] sh: boards: Use imply to enable hardware with complex
 dependencies

If CONFIG_I2C=n:

    WARNING: unmet direct dependencies detected for SND_SOC_AK4642
      Depends on [n]: SOUND [=y] && SND [=y] && SND_SOC [=y] && I2C [=n]
      Selected by [y]:
      - SH_7724_SOLUTION_ENGINE [=y] && CPU_SUBTYPE_SH7724 [=y] && SND_SIMPLE_CARD [=y]

    WARNING: unmet direct dependencies detected for SND_SOC_DA7210
      Depends on [n]: SOUND [=y] && SND [=y] && SND_SOC [=y] && SND_SOC_I2C_AND_SPI [=n]
      Selected by [y]:
      - SH_ECOVEC [=y] && CPU_SUBTYPE_SH7724 [=y] && SND_SIMPLE_CARD [=y]

Fix this by replacing select by imply, instead of adding a dependency on
I2C.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202501240836.OvXqmANX-lkp@intel.com/
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Signed-off-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
---
 arch/sh/boards/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig
index 109bec4dad94a..1af93be61b1ff 100644
--- a/arch/sh/boards/Kconfig
+++ b/arch/sh/boards/Kconfig
@@ -80,8 +80,8 @@ config SH_7724_SOLUTION_ENGINE
 	select SOLUTION_ENGINE
 	depends on CPU_SUBTYPE_SH7724
 	select GPIOLIB
-	select SND_SOC_AK4642 if SND_SIMPLE_CARD
 	select REGULATOR_FIXED_VOLTAGE if REGULATOR
+	imply SND_SOC_AK4642 if SND_SIMPLE_CARD
 	help
 	  Select 7724 SolutionEngine if configuring for a Hitachi SH7724
 	  evaluation board.
@@ -259,8 +259,8 @@ config SH_ECOVEC
 	bool "EcoVec"
 	depends on CPU_SUBTYPE_SH7724
 	select GPIOLIB
-	select SND_SOC_DA7210 if SND_SIMPLE_CARD
 	select REGULATOR_FIXED_VOLTAGE if REGULATOR
+	imply SND_SOC_DA7210 if SND_SIMPLE_CARD
 	help
 	  Renesas "R0P7724LC0011/21RL (EcoVec)" support.
 

From 1c7b17cf0594f33c898004ac1b5576c032f266e2 Mon Sep 17 00:00:00 2001
From: liuye <liuye@kylinos.cn>
Date: Tue, 19 Nov 2024 14:08:42 +0800
Subject: [PATCH 310/368] mm/vmscan: fix hard LOCKUP in function
 isolate_lru_folios
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes the following hard lockup in isolate_lru_folios() during memory
reclaim.  If the LRU mostly contains ineligible folios this may trigger
watchdog.

watchdog: Watchdog detected hard LOCKUP on cpu 173
RIP: 0010:native_queued_spin_lock_slowpath+0x255/0x2a0
Call Trace:
	_raw_spin_lock_irqsave+0x31/0x40
	folio_lruvec_lock_irqsave+0x5f/0x90
	folio_batch_move_lru+0x91/0x150
	lru_add_drain_per_cpu+0x1c/0x40
	process_one_work+0x17d/0x350
	worker_thread+0x27b/0x3a0
	kthread+0xe8/0x120
	ret_from_fork+0x34/0x50
	ret_from_fork_asm+0x1b/0x30

lruvec->lru_lock owner：

PID: 2865     TASK: ffff888139214d40  CPU: 40   COMMAND: "kswapd0"
 #0 [fffffe0000945e60] crash_nmi_callback at ffffffffa567a555
 #1 [fffffe0000945e68] nmi_handle at ffffffffa563b171
 #2 [fffffe0000945eb0] default_do_nmi at ffffffffa6575920
 #3 [fffffe0000945ed0] exc_nmi at ffffffffa6575af4
 #4 [fffffe0000945ef0] end_repeat_nmi at ffffffffa6601dde
    [exception RIP: isolate_lru_folios+403]
    RIP: ffffffffa597df53  RSP: ffffc90006fb7c28  RFLAGS: 00000002
    RAX: 0000000000000001  RBX: ffffc90006fb7c60  RCX: ffffea04a2196f88
    RDX: ffffc90006fb7c60  RSI: ffffc90006fb7c60  RDI: ffffea04a2197048
    RBP: ffff88812cbd3010   R8: ffffea04a2197008   R9: 0000000000000001
    R10: 0000000000000000  R11: 0000000000000001  R12: ffffea04a2197008
    R13: ffffea04a2197048  R14: ffffc90006fb7de8  R15: 0000000003e3e937
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
    <NMI exception stack>
 #5 [ffffc90006fb7c28] isolate_lru_folios at ffffffffa597df53
 #6 [ffffc90006fb7cf8] shrink_active_list at ffffffffa597f788
 #7 [ffffc90006fb7da8] balance_pgdat at ffffffffa5986db0
 #8 [ffffc90006fb7ec0] kswapd at ffffffffa5987354
 #9 [ffffc90006fb7ef8] kthread at ffffffffa5748238
crash>

Scenario:
User processe are requesting a large amount of memory and keep page active.
Then a module continuously requests memory from ZONE_DMA32 area.
Memory reclaim will be triggered due to ZONE_DMA32 watermark alarm reached.
However pages in the LRU(active_anon) list are mostly from
the ZONE_NORMAL area.

Reproduce:
Terminal 1: Construct to continuously increase pages active(anon).
mkdir /tmp/memory
mount -t tmpfs -o size=1024000M tmpfs /tmp/memory
dd if=/dev/zero of=/tmp/memory/block bs=4M
tail /tmp/memory/block

Terminal 2:
vmstat -a 1
active will increase.
procs ---memory--- ---swap-- ---io---- -system-- ---cpu--- ...
 r  b   swpd   free  inact active   si   so    bi    bo
 1  0   0 1445623076 45898836 83646008    0    0     0
 1  0   0 1445623076 43450228 86094616    0    0     0
 1  0   0 1445623076 41003480 88541364    0    0     0
 1  0   0 1445623076 38557088 90987756    0    0     0
 1  0   0 1445623076 36109688 93435156    0    0     0
 1  0   0 1445619552 33663256 95881632    0    0     0
 1  0   0 1445619804 31217140 98327792    0    0     0
 1  0   0 1445619804 28769988 100774944    0    0     0
 1  0   0 1445619804 26322348 103222584    0    0     0
 1  0   0 1445619804 23875592 105669340    0    0     0

cat /proc/meminfo | head
Active(anon) increase.
MemTotal:       1579941036 kB
MemFree:        1445618500 kB
MemAvailable:   1453013224 kB
Buffers:            6516 kB
Cached:         128653956 kB
SwapCached:            0 kB
Active:         118110812 kB
Inactive:       11436620 kB
Active(anon):   115345744 kB
Inactive(anon):   945292 kB

When the Active(anon) is 115345744 kB, insmod module triggers
the ZONE_DMA32 watermark.

perf record -e vmscan:mm_vmscan_lru_isolate -aR
perf script
isolate_mode=0 classzone=1 order=1 nr_requested=32 nr_scanned=2
nr_skipped=2 nr_taken=0 lru=active_anon
isolate_mode=0 classzone=1 order=1 nr_requested=32 nr_scanned=0
nr_skipped=0 nr_taken=0 lru=active_anon
isolate_mode=0 classzone=1 order=0 nr_requested=32 nr_scanned=28835844
nr_skipped=28835844 nr_taken=0 lru=active_anon
isolate_mode=0 classzone=1 order=1 nr_requested=32 nr_scanned=28835844
nr_skipped=28835844 nr_taken=0 lru=active_anon
isolate_mode=0 classzone=1 order=0 nr_requested=32 nr_scanned=29
nr_skipped=29 nr_taken=0 lru=active_anon
isolate_mode=0 classzone=1 order=0 nr_requested=32 nr_scanned=0
nr_skipped=0 nr_taken=0 lru=active_anon

See nr_scanned=28835844.
28835844 * 4k = 115343376KB approximately equal to 115345744 kB.

If increase Active(anon) to 1000G then insmod module triggers
the ZONE_DMA32 watermark. hard lockup will occur.

In my device nr_scanned = 0000000003e3e937 when hard lockup.
Convert to memory size 0x0000000003e3e937 * 4KB = 261072092 KB.

   [ffffc90006fb7c28] isolate_lru_folios at ffffffffa597df53
    ffffc90006fb7c30: 0000000000000020 0000000000000000
    ffffc90006fb7c40: ffffc90006fb7d40 ffff88812cbd3000
    ffffc90006fb7c50: ffffc90006fb7d30 0000000106fb7de8
    ffffc90006fb7c60: ffffea04a2197008 ffffea0006ed4a48
    ffffc90006fb7c70: 0000000000000000 0000000000000000
    ffffc90006fb7c80: 0000000000000000 0000000000000000
    ffffc90006fb7c90: 0000000000000000 0000000000000000
    ffffc90006fb7ca0: 0000000000000000 0000000003e3e937
    ffffc90006fb7cb0: 0000000000000000 0000000000000000
    ffffc90006fb7cc0: 8d7c0b56b7874b00 ffff88812cbd3000

About the Fixes:
Why did it take eight years to be discovered?

The problem requires the following conditions to occur:
1. The device memory should be large enough.
2. Pages in the LRU(active_anon) list are mostly from the ZONE_NORMAL area.
3. The memory in ZONE_DMA32 needs to reach the watermark.

If the memory is not large enough, or if the usage design of ZONE_DMA32
area memory is reasonable, this problem is difficult to detect.

notes:
The problem is most likely to occur in ZONE_DMA32 and ZONE_NORMAL,
but other suitable scenarios may also trigger the problem.

Link: https://lkml.kernel.org/r/20241119060842.274072-1-liuye@kylinos.cn
Fixes: b2e18757f2c9 ("mm, vmscan: begin reclaiming pages on a per-node basis")
Signed-off-by: liuye <liuye@kylinos.cn>
Cc: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Yang Shi <yang@os.amperecomputing.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 1 +
 mm/vmscan.c          | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index a5f475335aea8..b13b72645db33 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -222,6 +222,7 @@ enum {
 };
 
 #define SWAP_CLUSTER_MAX 32UL
+#define SWAP_CLUSTER_MAX_SKIPPED (SWAP_CLUSTER_MAX << 10)
 #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX
 
 /* Bit flag in swap_map */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 683ec56d4f608..5b2f12425b28c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1692,6 +1692,7 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
 	unsigned long nr_skipped[MAX_NR_ZONES] = { 0, };
 	unsigned long skipped = 0;
 	unsigned long scan, total_scan, nr_pages;
+	unsigned long max_nr_skipped = 0;
 	LIST_HEAD(folios_skipped);
 
 	total_scan = 0;
@@ -1706,9 +1707,12 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
 		nr_pages = folio_nr_pages(folio);
 		total_scan += nr_pages;
 
-		if (folio_zonenum(folio) > sc->reclaim_idx) {
+		/* Using max_nr_skipped to prevent hard LOCKUP*/
+		if (max_nr_skipped < SWAP_CLUSTER_MAX_SKIPPED &&
+		    (folio_zonenum(folio) > sc->reclaim_idx)) {
 			nr_skipped[folio_zonenum(folio)] += nr_pages;
 			move_to = &folios_skipped;
+			max_nr_skipped++;
 			goto move;
 		}
 

From 6e8e04291d81867680552b49f40fa5c746b641d8 Mon Sep 17 00:00:00 2001
From: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Date: Tue, 28 Jan 2025 08:16:31 +0900
Subject: [PATCH 311/368] mm/zsmalloc: add __maybe_unused attribute for
 is_first_zpdesc()

Commit c1b3bb73d55e ("mm/zsmalloc: use zpdesc in
trylock_zspage()/lock_zspage()") introduces is_first_zpdesc() function.
However, the function is only used when CONFIG_DEBUG_VM=y.

When building with LLVM=1 and W=1 option, the following warning is
generated:
  $ make -j12 W=1 LLVM=1 mm/zsmalloc.o
  mm/zsmalloc.c:455:20: error: function 'is_first_zpdesc' is not needed and will not be emitted [-Werror,-Wunneeded-internal-declaration]
    455 | static inline bool is_first_zpdesc(struct zpdesc *zpdesc)
        |                    ^~~~~~~~~~~~~~~
  1 error generated.

Fix the warning by adding __maybe_unused attribute to the function.
No functional change intended.

Link: https://lkml.kernel.org/r/20250127231631.4363-1-42.hyeyoo@gmail.com
Fixes: c1b3bb73d55e ("mm/zsmalloc: use zpdesc in trylock_zspage()/lock_zspage()")
Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202501240958.4ILzuBrH-lkp@intel.com/
Cc: Alex Shi <alexs@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/zsmalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 817626a351f89..6d0e47f7ae339 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -452,7 +452,7 @@ static DEFINE_PER_CPU(struct mapping_area, zs_map_area) = {
 	.lock	= INIT_LOCAL_LOCK(lock),
 };
 
-static inline bool is_first_zpdesc(struct zpdesc *zpdesc)
+static inline bool __maybe_unused is_first_zpdesc(struct zpdesc *zpdesc)
 {
 	return PagePrivate(zpdesc_page(zpdesc));
 }

From f921da2c34692dfec5f72b5ae347b1bea22bb369 Mon Sep 17 00:00:00 2001
From: Heming Zhao <heming.zhao@suse.com>
Date: Tue, 21 Jan 2025 19:22:03 +0800
Subject: [PATCH 312/368] ocfs2: fix incorrect CPU endianness conversion
 causing mount failure

Commit 23aab037106d ("ocfs2: fix UBSAN warning in ocfs2_verify_volume()")
introduced a regression bug.  The blksz_bits value is already converted to
CPU endian in the previous code; therefore, the code shouldn't use
le32_to_cpu() anymore.

Link: https://lkml.kernel.org/r/20250121112204.12834-1-heming.zhao@suse.com
Fixes: 23aab037106d ("ocfs2: fix UBSAN warning in ocfs2_verify_volume()")
Signed-off-by: Heming Zhao <heming.zhao@suse.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Jun Piao <piaojun@huawei.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index e0b91dbaa0acb..8bb5022f30824 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -2285,7 +2285,7 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
 			mlog(ML_ERROR, "found superblock with incorrect block "
 			     "size bits: found %u, should be 9, 10, 11, or 12\n",
 			     blksz_bits);
-		} else if ((1 << le32_to_cpu(blksz_bits)) != blksz) {
+		} else if ((1 << blksz_bits) != blksz) {
 			mlog(ML_ERROR, "found superblock with incorrect block "
 			     "size: found %u, should be %u\n", 1 << blksz_bits, blksz);
 		} else if (le16_to_cpu(di->id2.i_super.s_major_rev_level) !=

From a479b078fddb0ad7f9e3c6da22d9cf8f2b5c7799 Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@fujitsu.com>
Date: Fri, 10 Jan 2025 20:21:32 +0800
Subject: [PATCH 313/368] mm/vmscan: accumulate nr_demoted for accurate
 demotion statistics

In shrink_folio_list(), demote_folio_list() can be called 2 times.
Currently stat->nr_demoted will only store the last nr_demoted( the later
nr_demoted is always zero, the former nr_demoted will get lost), as a
result number of demoted pages is not accurate.

Accumulate the nr_demoted count across multiple calls to
demote_folio_list(), ensuring accurate reporting of demotion statistics.

[lizhijian@fujitsu.com: introduce local nr_demoted to fix nr_reclaimed double counting]
  Link: https://lkml.kernel.org/r/20250111015253.425693-1-lizhijian@fujitsu.com
Link: https://lkml.kernel.org/r/20250110122133.423481-1-lizhijian@fujitsu.com
Fixes: f77f0c751478 ("mm,memcg: provide per-cgroup counters for NUMA balancing operations")
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
Acked-by: Kaiyang Zhao <kaiyang2@cs.cmu.edu>
Tested-by: Donet Tom <donettom@linux.ibm.com>
Reviewed-by: Donet Tom <donettom@linux.ibm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/vmscan.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5b2f12425b28c..c767d71c43d7d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1086,7 +1086,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
 	struct folio_batch free_folios;
 	LIST_HEAD(ret_folios);
 	LIST_HEAD(demote_folios);
-	unsigned int nr_reclaimed = 0;
+	unsigned int nr_reclaimed = 0, nr_demoted = 0;
 	unsigned int pgactivate = 0;
 	bool do_demote_pass;
 	struct swap_iocb *plug = NULL;
@@ -1550,8 +1550,9 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
 	/* 'folio_list' is always empty here */
 
 	/* Migrate folios selected for demotion */
-	stat->nr_demoted = demote_folio_list(&demote_folios, pgdat);
-	nr_reclaimed += stat->nr_demoted;
+	nr_demoted = demote_folio_list(&demote_folios, pgdat);
+	nr_reclaimed += nr_demoted;
+	stat->nr_demoted += nr_demoted;
 	/* Folios that could not be demoted are still in @demote_folios */
 	if (!list_empty(&demote_folios)) {
 		/* Folios which weren't demoted go back on @folio_list */

From 4ebc417ef9cb34010a71270421fe320ec5d88aa2 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 10 Jan 2025 11:36:33 +0100
Subject: [PATCH 314/368] scripts/gdb: fix aarch64 userspace detection in
 get_current_task

At least recent gdb releases (seen with 14.2) return SP_EL0 as signed long
which lets the right-shift always return 0.

Link: https://lkml.kernel.org/r/dcd2fabc-9131-4b48-8419-6444e2d67454@siemens.com
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Kieran Bingham <kbingham@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 scripts/gdb/linux/cpus.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/gdb/linux/cpus.py b/scripts/gdb/linux/cpus.py
index 2f11c4f9c345a..13eb8b3901b8f 100644
--- a/scripts/gdb/linux/cpus.py
+++ b/scripts/gdb/linux/cpus.py
@@ -167,7 +167,7 @@ def get_current_task(cpu):
             var_ptr = gdb.parse_and_eval("&pcpu_hot.current_task")
             return per_cpu(var_ptr, cpu).dereference()
     elif utils.is_target_arch("aarch64"):
-        current_task_addr = gdb.parse_and_eval("$SP_EL0")
+        current_task_addr = gdb.parse_and_eval("(unsigned long)$SP_EL0")
         if (current_task_addr >> 63) != 0:
             current_task = current_task_addr.cast(task_ptr_type)
             return current_task.dereference()

From bc404701349fbd3d94e389a06ccfc0948129ab24 Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosry.ahmed@linux.dev>
Date: Thu, 23 Jan 2025 23:13:44 +0000
Subject: [PATCH 315/368] MAINTAINERS: mailmap: update Yosry Ahmed's email
 address

Moving to a linux.dev email address.

Link: https://lkml.kernel.org/r/20250123231344.817358-1-yosry.ahmed@linux.dev
Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Nhat Pham <nphamcs@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 .mailmap    | 1 +
 MAINTAINERS | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.mailmap b/.mailmap
index 17dd8eb2630e6..b2a0050b1397a 100644
--- a/.mailmap
+++ b/.mailmap
@@ -761,6 +761,7 @@ Wolfram Sang <wsa@kernel.org> <wsa@the-dreams.de>
 Yakir Yang <kuankuan.y@gmail.com> <ykk@rock-chips.com>
 Yanteng Si <si.yanteng@linux.dev> <siyanteng@loongson.cn>
 Ying Huang <huang.ying.caritas@gmail.com> <ying.huang@intel.com>
+Yosry Ahmed <yosry.ahmed@linux.dev> <yosryahmed@google.com>
 Yusuke Goda <goda.yusuke@renesas.com>
 Zack Rusin <zack.rusin@broadcom.com> <zackr@vmware.com>
 Zhu Yanjun <zyjzyj2000@gmail.com> <yanjunz@nvidia.com>
diff --git a/MAINTAINERS b/MAINTAINERS
index bc8ce7af3303f..d269d3c6e3171 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -26213,7 +26213,7 @@ K:	zstd
 
 ZSWAP COMPRESSED SWAP CACHING
 M:	Johannes Weiner <hannes@cmpxchg.org>
-M:	Yosry Ahmed <yosryahmed@google.com>
+M:	Yosry Ahmed <yosry.ahmed@linux.dev>
 M:	Nhat Pham <nphamcs@gmail.com>
 R:	Chengming Zhou <chengming.zhou@linux.dev>
 L:	linux-mm@kvack.org

From c3d8ced37e6f724cdcc5382b40858a2136c47591 Mon Sep 17 00:00:00 2001
From: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com>
Date: Mon, 20 Jan 2025 15:56:59 -0500
Subject: [PATCH 316/368] mailmap: add an entry for Hamza Mahfooz

Map my previous work email to my current one.

Link: https://lkml.kernel.org/r/20250120205659.139027-1-hamzamahfooz@linux.microsoft.com
Signed-off-by: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Hans verkuil <hverkuil@xs4all.nl>
Cc: Matthieu Baerts <matttbe@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 .mailmap | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.mailmap b/.mailmap
index b2a0050b1397a..8d721d390e9df 100644
--- a/.mailmap
+++ b/.mailmap
@@ -261,6 +261,7 @@ Guo Ren <guoren@kernel.org> <ren_guo@c-sky.com>
 Guru Das Srinagesh <quic_gurus@quicinc.com> <gurus@codeaurora.org>
 Gustavo Padovan <gustavo@las.ic.unicamp.br>
 Gustavo Padovan <padovan@profusion.mobi>
+Hamza Mahfooz <hamzamahfooz@linux.microsoft.com> <hamza.mahfooz@amd.com>
 Hanjun Guo <guohanjun@huawei.com> <hanjun.guo@linaro.org>
 Hans Verkuil <hverkuil@xs4all.nl> <hansverk@cisco.com>
 Hans Verkuil <hverkuil@xs4all.nl> <hverkuil-cisco@xs4all.nl>

From 488b5b9eca68497b533ced059be5eff19578bbca Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 27 Jan 2025 18:42:33 +0000
Subject: [PATCH 317/368] mm: kmemleak: fix upper boundary check for physical
 address objects

Memblock allocations are registered by kmemleak separately, based on their
physical address.  During the scanning stage, it checks whether an object
is within the min_low_pfn and max_low_pfn boundaries and ignores it
otherwise.

With the recent addition of __percpu pointer leak detection (commit
6c99d4eb7c5e ("kmemleak: enable tracking for percpu pointers")), kmemleak
started reporting leaks in setup_zone_pageset() and
setup_per_cpu_pageset().  These were caused by the node_data[0] object
(initialised in alloc_node_data()) ending on the PFN_PHYS(max_low_pfn)
boundary.  The non-strict upper boundary check introduced by commit
84c326299191 ("mm: kmemleak: check physical address when scan") causes the
pg_data_t object to be ignored (not scanned) and the __percpu pointers it
contains to be reported as leaks.

Make the max_low_pfn upper boundary check strict when deciding whether to
ignore a physical address object and not scan it.

Link: https://lkml.kernel.org/r/20250127184233.2974311-1-catalin.marinas@arm.com
Fixes: 84c326299191 ("mm: kmemleak: check physical address when scan")
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Jakub Kicinski <kuba@kernel.org>
Tested-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Cc: Patrick Wang <patrick.wang.shcn@gmail.com>
Cc: <stable@vger.kernel.org>	[6.0.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/kmemleak.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 982bb5ef32331..c6ed68604136a 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1689,7 +1689,7 @@ static void kmemleak_scan(void)
 			unsigned long phys = object->pointer;
 
 			if (PHYS_PFN(phys) < min_low_pfn ||
-			    PHYS_PFN(phys + object->size) >= max_low_pfn)
+			    PHYS_PFN(phys + object->size) > max_low_pfn)
 				__paint_it(object, KMEMLEAK_BLACK);
 		}
 

From 4c80187001d3e2876dfe7e011b9eac3b6270156f Mon Sep 17 00:00:00 2001
From: Bruno Faccini <bfaccini@nvidia.com>
Date: Mon, 27 Jan 2025 09:16:23 -0800
Subject: [PATCH 318/368] mm/fake-numa: handle cases with no SRAT info
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Handle more gracefully cases where no SRAT information is available, like
in VMs with no Numa support, and allow fake-numa configuration to complete
successfully in these cases

Link: https://lkml.kernel.org/r/20250127171623.1523171-1-bfaccini@nvidia.com
Fixes: 63db8170bf34 (“mm/fake-numa: allow later numa node hotplug”)
Signed-off-by: Bruno Faccini <bfaccini@nvidia.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hyeonggon Yoo <hyeonggon.yoo@sk.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Len Brown <lenb@kernel.org>
Cc: "Mike Rapoport (IBM)" <rppt@kernel.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/acpi/numa/srat.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c
index 59fffe34c9d04..00ac0d7bb8c9f 100644
--- a/drivers/acpi/numa/srat.c
+++ b/drivers/acpi/numa/srat.c
@@ -95,9 +95,13 @@ int __init fix_pxm_node_maps(int max_nid)
 	int i, j, index = -1, count = 0;
 	nodemask_t nodes_to_enable;
 
-	if (numa_off || srat_disabled())
+	if (numa_off)
 		return -1;
 
+	/* no or incomplete node/PXM mapping set, nothing to do */
+	if (srat_disabled())
+		return 0;
+
 	/* find fake nodes PXM mapping */
 	for (i = 0; i < MAX_NUMNODES; i++) {
 		if (node_to_pxm_map[i] != PXM_INVAL) {
@@ -117,6 +121,11 @@ int __init fix_pxm_node_maps(int max_nid)
 			}
 		}
 	}
+	if (index == -1) {
+		pr_debug("No node/PXM mapping has been set\n");
+		/* nothing more to be done */
+		return 0;
+	}
 	if (WARN(index != max_nid, "%d max nid  when expected %d\n",
 		      index, max_nid))
 		return -1;

From 64c37e134b120fb462fb4a80694bfb8e7be77b14 Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>
Date: Mon, 27 Jan 2025 12:02:21 -0500
Subject: [PATCH 319/368] kernel: be more careful about dup_mmap() failures and
 uprobe registering

If a memory allocation fails during dup_mmap(), the maple tree can be left
in an unsafe state for other iterators besides the exit path.  All the
locks are dropped before the exit_mmap() call (in mm/mmap.c), but the
incomplete mm_struct can be reached through (at least) the rmap finding
the vmas which have a pointer back to the mm_struct.

Up to this point, there have been no issues with being able to find an
mm_struct that was only partially initialised.  Syzbot was able to make
the incomplete mm_struct fail with recent forking changes, so it has been
proven unsafe to use the mm_struct that hasn't been initialised, as
referenced in the link below.

Although 8ac662f5da19f ("fork: avoid inappropriate uprobe access to
invalid mm") fixed the uprobe access, it does not completely remove the
race.

This patch sets the MMF_OOM_SKIP to avoid the iteration of the vmas on the
oom side (even though this is extremely unlikely to be selected as an oom
victim in the race window), and sets MMF_UNSTABLE to avoid other potential
users from using a partially initialised mm_struct.

When registering vmas for uprobe, skip the vmas in an mm that is marked
unstable.  Modifying a vma in an unstable mm may cause issues if the mm
isn't fully initialised.

Link: https://lore.kernel.org/all/6756d273.050a0220.2477f.003d.GAE@google.com/
Link: https://lkml.kernel.org/r/20250127170221.1761366-1-Liam.Howlett@oracle.com
Fixes: d24062914837 ("fork: use __mt_dup() to duplicate maple tree in dup_mmap()")
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Jann Horn <jannh@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Peng Zhang <zhangpeng.00@bytedance.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/events/uprobes.c |  4 ++++
 kernel/fork.c           | 17 ++++++++++++++---
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index e421a5f2ec7d0..2ca797cbe465f 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -28,6 +28,7 @@
 #include <linux/rcupdate_trace.h>
 #include <linux/workqueue.h>
 #include <linux/srcu.h>
+#include <linux/oom.h>          /* check_stable_address_space */
 
 #include <linux/uprobes.h>
 
@@ -1260,6 +1261,9 @@ register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new)
 		 * returns NULL in find_active_uprobe_rcu().
 		 */
 		mmap_write_lock(mm);
+		if (check_stable_address_space(mm))
+			goto unlock;
+
 		vma = find_vma(mm, info->vaddr);
 		if (!vma || !valid_vma(vma, is_register) ||
 		    file_inode(vma->vm_file) != uprobe->inode)
diff --git a/kernel/fork.c b/kernel/fork.c
index cba5ede2c6398..735405a9c5f32 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -760,7 +760,8 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 		mt_set_in_rcu(vmi.mas.tree);
 		ksm_fork(mm, oldmm);
 		khugepaged_fork(mm, oldmm);
-	} else if (mpnt) {
+	} else {
+
 		/*
 		 * The entire maple tree has already been duplicated. If the
 		 * mmap duplication fails, mark the failure point with
@@ -768,8 +769,18 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 		 * stop releasing VMAs that have not been duplicated after this
 		 * point.
 		 */
-		mas_set_range(&vmi.mas, mpnt->vm_start, mpnt->vm_end - 1);
-		mas_store(&vmi.mas, XA_ZERO_ENTRY);
+		if (mpnt) {
+			mas_set_range(&vmi.mas, mpnt->vm_start, mpnt->vm_end - 1);
+			mas_store(&vmi.mas, XA_ZERO_ENTRY);
+			/* Avoid OOM iterating a broken tree */
+			set_bit(MMF_OOM_SKIP, &mm->flags);
+		}
+		/*
+		 * The mm_struct is going to exit, but the locks will be dropped
+		 * first.  Set the mm_struct as unstable is advisable as it is
+		 * not fully initialised.
+		 */
+		set_bit(MMF_UNSTABLE, &mm->flags);
 	}
 out:
 	mmap_write_unlock(mm);

From 6268f0a166ebcf5a31577036f4c1e613d5ab4fb1 Mon Sep 17 00:00:00 2001
From: yangge <yangge1116@126.com>
Date: Sat, 25 Jan 2025 14:53:57 +0800
Subject: [PATCH 320/368] mm: compaction: use the proper flag to determine
 watermarks

There are 4 NUMA nodes on my machine, and each NUMA node has 32GB of
memory.  I have configured 16GB of CMA memory on each NUMA node, and
starting a 32GB virtual machine with device passthrough is extremely slow,
taking almost an hour.

Long term GUP cannot allocate memory from CMA area, so a maximum of 16 GB
of no-CMA memory on a NUMA node can be used as virtual machine memory.
There is 16GB of free CMA memory on a NUMA node, which is sufficient to
pass the order-0 watermark check, causing the __compaction_suitable()
function to consistently return true.

For costly allocations, if the __compaction_suitable() function always
returns true, it causes the __alloc_pages_slowpath() function to fail to
exit at the appropriate point.  This prevents timely fallback to
allocating memory on other nodes, ultimately resulting in excessively long
virtual machine startup times.

Call trace:
__alloc_pages_slowpath
    if (compact_result == COMPACT_SKIPPED ||
        compact_result == COMPACT_DEFERRED)
        goto nopage; // should exit __alloc_pages_slowpath() from here

We could use the real unmovable allocation context to have
__zone_watermark_unusable_free() subtract CMA pages, and thus we won't
pass the order-0 check anymore once the non-CMA part is exhausted.  There
is some risk that in some different scenario the compaction could in fact
migrate pages from the exhausted non-CMA part of the zone to the CMA part
and succeed, and we'll skip it instead.  But only __GFP_NORETRY
allocations should be affected in the immediate "goto nopage" when
compaction is skipped, others will attempt with DEF_COMPACT_PRIORITY
anyway and won't fail without trying to compact-migrate the non-CMA
pageblocks into CMA pageblocks first, so it should be fine.

After this fix, it only takes a few tens of seconds to start a 32GB
virtual machine with device passthrough functionality.

Link: https://lore.kernel.org/lkml/1736335854-548-1-git-send-email-yangge1116@126.com/
Link: https://lkml.kernel.org/r/1737788037-8439-1-git-send-email-yangge1116@126.com
Signed-off-by: yangge <yangge1116@126.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Barry Song <21cnbao@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/compaction.c | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/mm/compaction.c b/mm/compaction.c
index bcc0df0066dc3..12ed8425fa175 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2491,7 +2491,8 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
  */
 static enum compact_result
 compaction_suit_allocation_order(struct zone *zone, unsigned int order,
-				 int highest_zoneidx, unsigned int alloc_flags)
+				 int highest_zoneidx, unsigned int alloc_flags,
+				 bool async)
 {
 	unsigned long watermark;
 
@@ -2500,6 +2501,23 @@ compaction_suit_allocation_order(struct zone *zone, unsigned int order,
 			      alloc_flags))
 		return COMPACT_SUCCESS;
 
+	/*
+	 * For unmovable allocations (without ALLOC_CMA), check if there is enough
+	 * free memory in the non-CMA pageblocks. Otherwise compaction could form
+	 * the high-order page in CMA pageblocks, which would not help the
+	 * allocation to succeed. However, limit the check to costly order async
+	 * compaction (such as opportunistic THP attempts) because there is the
+	 * possibility that compaction would migrate pages from non-CMA to CMA
+	 * pageblock.
+	 */
+	if (order > PAGE_ALLOC_COSTLY_ORDER && async &&
+	    !(alloc_flags & ALLOC_CMA)) {
+		watermark = low_wmark_pages(zone) + compact_gap(order);
+		if (!__zone_watermark_ok(zone, 0, watermark, highest_zoneidx,
+					   0, zone_page_state(zone, NR_FREE_PAGES)))
+			return COMPACT_SKIPPED;
+	}
+
 	if (!compaction_suitable(zone, order, highest_zoneidx))
 		return COMPACT_SKIPPED;
 
@@ -2535,7 +2553,8 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
 	if (!is_via_compact_memory(cc->order)) {
 		ret = compaction_suit_allocation_order(cc->zone, cc->order,
 						       cc->highest_zoneidx,
-						       cc->alloc_flags);
+						       cc->alloc_flags,
+						       cc->mode == MIGRATE_ASYNC);
 		if (ret != COMPACT_CONTINUE)
 			return ret;
 	}
@@ -3038,7 +3057,8 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat)
 
 		ret = compaction_suit_allocation_order(zone,
 				pgdat->kcompactd_max_order,
-				highest_zoneidx, ALLOC_WMARK_MIN);
+				highest_zoneidx, ALLOC_WMARK_MIN,
+				false);
 		if (ret == COMPACT_CONTINUE)
 			return true;
 	}
@@ -3079,7 +3099,8 @@ static void kcompactd_do_work(pg_data_t *pgdat)
 			continue;
 
 		ret = compaction_suit_allocation_order(zone,
-				cc.order, zoneid, ALLOC_WMARK_MIN);
+				cc.order, zoneid, ALLOC_WMARK_MIN,
+				false);
 		if (ret != COMPACT_CONTINUE)
 			continue;
 

From 6438ef381c183444f7f9d1de18f22661cba1e946 Mon Sep 17 00:00:00 2001
From: Nikita Zhandarovich <n.zhandarovich@fintech.ru>
Date: Sat, 25 Jan 2025 07:20:53 +0900
Subject: [PATCH 321/368] nilfs2: fix possible int overflows in nilfs_fiemap()

Since nilfs_bmap_lookup_contig() in nilfs_fiemap() calculates its result
by being prepared to go through potentially maxblocks == INT_MAX blocks,
the value in n may experience an overflow caused by left shift of blkbits.

While it is extremely unlikely to occur, play it safe and cast right hand
expression to wider type to mitigate the issue.

Found by Linux Verification Center (linuxtesting.org) with static analysis
tool SVACE.

Link: https://lkml.kernel.org/r/20250124222133.5323-1-konishi.ryusuke@gmail.com
Fixes: 622daaff0a89 ("nilfs2: fiemap support")
Signed-off-by: Nikita Zhandarovich <n.zhandarovich@fintech.ru>
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/nilfs2/inode.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index e8015d24a82cd..6613b8fcceb0d 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -1186,7 +1186,7 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 			if (size) {
 				if (phys && blkphy << blkbits == phys + size) {
 					/* The current extent goes on */
-					size += n << blkbits;
+					size += (u64)n << blkbits;
 				} else {
 					/* Terminate the current extent */
 					ret = fiemap_fill_next_extent(
@@ -1199,14 +1199,14 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 					flags = FIEMAP_EXTENT_MERGED;
 					logical = blkoff << blkbits;
 					phys = blkphy << blkbits;
-					size = n << blkbits;
+					size = (u64)n << blkbits;
 				}
 			} else {
 				/* Start a new extent */
 				flags = FIEMAP_EXTENT_MERGED;
 				logical = blkoff << blkbits;
 				phys = blkphy << blkbits;
-				size = n << blkbits;
+				size = (u64)n << blkbits;
 			}
 			blkoff += n;
 		}

From e64f81946adf68cd75e2207dd9a51668348a4af8 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Fri, 24 Jan 2025 13:01:38 +0100
Subject: [PATCH 322/368] kfence: skip __GFP_THISNODE allocations on NUMA
 systems

On NUMA systems, __GFP_THISNODE indicates that an allocation _must_ be on
a particular node, and failure to allocate on the desired node will result
in a failed allocation.

Skip __GFP_THISNODE allocations if we are running on a NUMA system, since
KFENCE can't guarantee which node its pool pages are allocated on.

Link: https://lkml.kernel.org/r/20250124120145.410066-1-elver@google.com
Fixes: 236e9f153852 ("kfence: skip all GFP_ZONEMASK allocations")
Signed-off-by: Marco Elver <elver@google.com>
Reported-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Chistoph Lameter <cl@linux.com>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/kfence/core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 67fc321db79b7..102048821c222 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -21,6 +21,7 @@
 #include <linux/log2.h>
 #include <linux/memblock.h>
 #include <linux/moduleparam.h>
+#include <linux/nodemask.h>
 #include <linux/notifier.h>
 #include <linux/panic_notifier.h>
 #include <linux/random.h>
@@ -1084,6 +1085,7 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
 	 * properties (e.g. reside in DMAable memory).
 	 */
 	if ((flags & GFP_ZONEMASK) ||
+	    ((flags & __GFP_THISNODE) && num_online_nodes() > 1) ||
 	    (s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32))) {
 		atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]);
 		return NULL;

From 1ccae30ecd98671325fa6954f9934bad298b56a2 Mon Sep 17 00:00:00 2001
From: Christopher Obbard <christopher.obbard@linaro.org>
Date: Wed, 22 Jan 2025 12:04:27 +0000
Subject: [PATCH 323/368] .mailmap: update email address for Christopher Obbard

Update my email address.

Link: https://lkml.kernel.org/r/20250122-wip-obbardc-update-email-v2-1-12bde6b79ad0@linaro.org
Signed-off-by: Christopher Obbard <christopher.obbard@linaro.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 .mailmap | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.mailmap b/.mailmap
index 8d721d390e9df..fec6b455b576a 100644
--- a/.mailmap
+++ b/.mailmap
@@ -165,6 +165,7 @@ Christian Brauner <brauner@kernel.org> <christian.brauner@canonical.com>
 Christian Brauner <brauner@kernel.org> <christian.brauner@ubuntu.com>
 Christian Marangi <ansuelsmth@gmail.com>
 Christophe Ricard <christophe.ricard@gmail.com>
+Christopher Obbard <christopher.obbard@linaro.org> <chris.obbard@collabora.com>
 Christoph Hellwig <hch@lst.de>
 Chuck Lever <chuck.lever@oracle.com> <cel@kernel.org>
 Chuck Lever <chuck.lever@oracle.com> <cel@netapp.com>

From 498c48c66eb600535f1221652509eefb2dce7770 Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong@tencent.com>
Date: Thu, 30 Jan 2025 19:51:31 +0800
Subject: [PATCH 324/368] mm, swap: fix reclaim offset calculation error during
 allocation

There is a code error that will cause the swap entry allocator to reclaim
and check the whole cluster with an unexpected tail offset instead of the
part that needs to be reclaimed.  This may cause corruption of the swap
map, so fix it.

Link: https://lkml.kernel.org/r/20250130115131.37777-1-ryncsn@gmail.com
Fixes: 3b644773eefd ("mm, swap: reduce contention on device lock")
Signed-off-by: Kairui Song <kasong@tencent.com>
Cc: Chris Li <chrisl@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/swapfile.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 6e867c16ea934..ba19430dd4ead 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -794,7 +794,7 @@ static unsigned int alloc_swap_scan_cluster(struct swap_info_struct *si,
 		if (!cluster_scan_range(si, ci, offset, nr_pages, &need_reclaim))
 			continue;
 		if (need_reclaim) {
-			ret = cluster_reclaim_range(si, ci, start, end);
+			ret = cluster_reclaim_range(si, ci, offset, offset + nr_pages);
 			/*
 			 * Reclaim drops ci->lock and cluster could be used
 			 * by another order. Not checking flag as off-list

From 1aaf8c122918aa8897605a9aa1e8ed6600d6f930 Mon Sep 17 00:00:00 2001
From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
Date: Tue, 21 Jan 2025 10:01:59 +0800
Subject: [PATCH 325/368] mm: gup: fix infinite loop within
 __get_longterm_locked

We can run into an infinite loop in __get_longterm_locked() when
collect_longterm_unpinnable_folios() finds only folios that are isolated
from the LRU or were never added to the LRU.  This can happen when all
folios to be pinned are never added to the LRU, for example when
vm_ops->fault allocated pages using cma_alloc() and never added them to
the LRU.

Fix it by simply taking a look at the list in the single caller, to see if
anything was added.

[zhaoyang.huang@unisoc.com: move definition of local]
  Link: https://lkml.kernel.org/r/20250122012604.3654667-1-zhaoyang.huang@unisoc.com
Link: https://lkml.kernel.org/r/20250121020159.3636477-1-zhaoyang.huang@unisoc.com
Fixes: 67e139b02d99 ("mm/gup.c: refactor check_and_migrate_movable_pages()")
Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Aijun Sun <aijun.sun@unisoc.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/gup.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index 9aaf338cc1f48..3883b307780ea 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2320,13 +2320,13 @@ static void pofs_unpin(struct pages_or_folios *pofs)
 /*
  * Returns the number of collected folios. Return value is always >= 0.
  */
-static unsigned long collect_longterm_unpinnable_folios(
+static void collect_longterm_unpinnable_folios(
 		struct list_head *movable_folio_list,
 		struct pages_or_folios *pofs)
 {
-	unsigned long i, collected = 0;
 	struct folio *prev_folio = NULL;
 	bool drain_allow = true;
+	unsigned long i;
 
 	for (i = 0; i < pofs->nr_entries; i++) {
 		struct folio *folio = pofs_get_folio(pofs, i);
@@ -2338,8 +2338,6 @@ static unsigned long collect_longterm_unpinnable_folios(
 		if (folio_is_longterm_pinnable(folio))
 			continue;
 
-		collected++;
-
 		if (folio_is_device_coherent(folio))
 			continue;
 
@@ -2361,8 +2359,6 @@ static unsigned long collect_longterm_unpinnable_folios(
 				    NR_ISOLATED_ANON + folio_is_file_lru(folio),
 				    folio_nr_pages(folio));
 	}
-
-	return collected;
 }
 
 /*
@@ -2439,11 +2435,9 @@ static long
 check_and_migrate_movable_pages_or_folios(struct pages_or_folios *pofs)
 {
 	LIST_HEAD(movable_folio_list);
-	unsigned long collected;
 
-	collected = collect_longterm_unpinnable_folios(&movable_folio_list,
-						       pofs);
-	if (!collected)
+	collect_longterm_unpinnable_folios(&movable_folio_list, pofs);
+	if (list_empty(&movable_folio_list))
 		return 0;
 
 	return migrate_longterm_unpinnable_folios(&movable_folio_list, pofs);

From 76e961157e078bc5d3cd2df08317e00b00a829eb Mon Sep 17 00:00:00 2001
From: "Ritesh Harjani (IBM)" <ritesh.list@gmail.com>
Date: Sat, 11 Jan 2025 16:36:55 +0530
Subject: [PATCH 326/368] mm/hugetlb: fix hugepage allocation for interleaved
 memory nodes

gather_bootmem_prealloc() assumes the start nid as 0 and size as
num_node_state(N_MEMORY).  That means in case if memory attached numa
nodes are interleaved, then gather_bootmem_prealloc_parallel() will fail
to scan few of these nodes.

Since memory attached numa nodes can be interleaved in any fashion, hence
ensure that the current code checks for all numa node ids
(.size = nr_node_ids). Let's still keep max_threads as N_MEMORY, so that
it can distributes all nr_node_ids among the these many no. threads.

e.g. qemu cmdline
========================
numa_cmd="-numa node,nodeid=1,memdev=mem1,cpus=2-3 -numa node,nodeid=0,cpus=0-1 -numa dist,src=0,dst=1,val=20"
mem_cmd="-object memory-backend-ram,id=mem1,size=16G"

w/o this patch for cmdline (default_hugepagesz=1GB hugepagesz=1GB hugepages=2):
==========================
~ # cat /proc/meminfo  |grep -i huge
AnonHugePages:         0 kB
ShmemHugePages:        0 kB
FileHugePages:         0 kB
HugePages_Total:       0
HugePages_Free:        0
HugePages_Rsvd:        0
HugePages_Surp:        0
Hugepagesize:    1048576 kB
Hugetlb:               0 kB

with this patch for cmdline (default_hugepagesz=1GB hugepagesz=1GB hugepages=2):
===========================
~ # cat /proc/meminfo |grep -i huge
AnonHugePages:         0 kB
ShmemHugePages:        0 kB
FileHugePages:         0 kB
HugePages_Total:       2
HugePages_Free:        2
HugePages_Rsvd:        0
HugePages_Surp:        0
Hugepagesize:    1048576 kB
Hugetlb:         2097152 kB

Link: https://lkml.kernel.org/r/f8d8dad3a5471d284f54185f65d575a6aaab692b.1736592534.git.ritesh.list@gmail.com
Fixes: b78b27d02930 ("hugetlb: parallelize 1G hugetlb initialization")
Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Reported-by: Pavithra Prakash <pavrampu@linux.ibm.com>
Suggested-by: Muchun Song <muchun.song@linux.dev>
Tested-by: Sourabh Jain <sourabhjain@linux.ibm.com>
Reviewed-by: Luiz Capitulino <luizcap@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Donet Tom <donettom@linux.ibm.com>
Cc: Gang Li <gang.li@linux.dev>
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/hugetlb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3b25b69aa94f1..65068671e460a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3309,7 +3309,7 @@ static void __init gather_bootmem_prealloc(void)
 		.thread_fn	= gather_bootmem_prealloc_parallel,
 		.fn_arg		= NULL,
 		.start		= 0,
-		.size		= num_node_state(N_MEMORY),
+		.size		= nr_node_ids,
 		.align		= 1,
 		.min_chunk	= 1,
 		.max_threads	= num_node_state(N_MEMORY),

From e5eaa1bbe2813ac34788e485283be75f9d07137b Mon Sep 17 00:00:00 2001
From: Carlos Bilbao <carlos.bilbao@kernel.org>
Date: Wed, 29 Jan 2025 19:22:44 -0600
Subject: [PATCH 327/368] mailmap, MAINTAINERS, docs: update Carlos's email
 address

Update .mailmap to reflect my new (and final) primary email address,
carlos.bilbao@kernel.org.  Also update contact information in files
Documentation/translations/sp_SP/index.rst and MAINTAINERS.

Link: https://lkml.kernel.org/r/20250130012248.1196208-1-carlos.bilbao@kernel.org
Signed-off-by: Carlos Bilbao <carlos.bilbao@kernel.org>
Cc: Carlos Bilbao <bilbao@vt.edu>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Mattew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 .mailmap                                   | 4 +++-
 Documentation/translations/sp_SP/index.rst | 2 +-
 MAINTAINERS                                | 8 ++++----
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/.mailmap b/.mailmap
index fec6b455b576a..9a270c53675b1 100644
--- a/.mailmap
+++ b/.mailmap
@@ -148,7 +148,9 @@ Bryan Tan <bryan-bt.tan@broadcom.com> <bryantan@vmware.com>
 Cai Huoqing <cai.huoqing@linux.dev> <caihuoqing@baidu.com>
 Can Guo <quic_cang@quicinc.com> <cang@codeaurora.org>
 Carl Huang <quic_cjhuang@quicinc.com> <cjhuang@codeaurora.org>
-Carlos Bilbao <carlos.bilbao.osdev@gmail.com> <carlos.bilbao@amd.com>
+Carlos Bilbao <carlos.bilbao@kernel.org> <carlos.bilbao@amd.com>
+Carlos Bilbao <carlos.bilbao@kernel.org> <carlos.bilbao.osdev@gmail.com>
+Carlos Bilbao <carlos.bilbao@kernel.org> <bilbao@vt.edu>
 Changbin Du <changbin.du@intel.com> <changbin.du@gmail.com>
 Changbin Du <changbin.du@intel.com> <changbin.du@intel.com>
 Chao Yu <chao@kernel.org> <chao2.yu@samsung.com>
diff --git a/Documentation/translations/sp_SP/index.rst b/Documentation/translations/sp_SP/index.rst
index aae7018b0d1a2..2b50283e16089 100644
--- a/Documentation/translations/sp_SP/index.rst
+++ b/Documentation/translations/sp_SP/index.rst
@@ -7,7 +7,7 @@ Traducción al español
 
 	\kerneldocCJKoff
 
-:maintainer: Carlos Bilbao <carlos.bilbao.osdev@gmail.com>
+:maintainer: Carlos Bilbao <carlos.bilbao@kernel.org>
 
 .. _sp_disclaimer:
 
diff --git a/MAINTAINERS b/MAINTAINERS
index d269d3c6e3171..1824df1f61f07 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1090,7 +1090,7 @@ F:	drivers/video/fbdev/geode/
 
 AMD HSMP DRIVER
 M:	Naveen Krishna Chatradhi <naveenkrishna.chatradhi@amd.com>
-R:	Carlos Bilbao <carlos.bilbao.osdev@gmail.com>
+R:	Carlos Bilbao <carlos.bilbao@kernel.org>
 L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
 F:	Documentation/arch/x86/amd_hsmp.rst
@@ -5856,7 +5856,7 @@ F:	drivers/usb/atm/cxacru.c
 
 CONFIDENTIAL COMPUTING THREAT MODEL FOR X86 VIRTUALIZATION (SNP/TDX)
 M:	Elena Reshetova <elena.reshetova@intel.com>
-M:	Carlos Bilbao <carlos.bilbao.osdev@gmail.com>
+M:	Carlos Bilbao <carlos.bilbao@kernel.org>
 S:	Maintained
 F:	Documentation/security/snp-tdx-threat-model.rst
 
@@ -11323,7 +11323,7 @@ S:	Orphan
 F:	drivers/video/fbdev/imsttfb.c
 
 INDEX OF FURTHER KERNEL DOCUMENTATION
-M:	Carlos Bilbao <carlos.bilbao.osdev@gmail.com>
+M:	Carlos Bilbao <carlos.bilbao@kernel.org>
 S:	Maintained
 F:	Documentation/process/kernel-docs.rst
 
@@ -22205,7 +22205,7 @@ Q:	http://patchwork.linuxtv.org/project/linux-media/list/
 F:	drivers/media/dvb-frontends/sp2*
 
 SPANISH DOCUMENTATION
-M:	Carlos Bilbao <carlos.bilbao.osdev@gmail.com>
+M:	Carlos Bilbao <carlos.bilbao@kernel.org>
 R:	Avadhut Naik <avadhut.naik@amd.com>
 S:	Maintained
 F:	Documentation/translations/sp_SP/

From 0ca2a41e0ccc573845428b686ff09e9322c82b16 Mon Sep 17 00:00:00 2001
From: Tamir Duberstein <tamird@gmail.com>
Date: Wed, 29 Jan 2025 16:13:49 -0500
Subject: [PATCH 328/368] MAINTAINERS: add lib/test_xarray.c

Ensure test-only changes are sent to the relevant maintainer.

Link: https://lkml.kernel.org/r/20250129-xarray-test-maintainer-v1-1-482e31f30f47@gmail.com
Signed-off-by: Tamir Duberstein <tamird@gmail.com>
Cc: Mattew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 1824df1f61f07..f52a004982c9f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -25734,6 +25734,7 @@ F:	Documentation/core-api/xarray.rst
 F:	include/linux/idr.h
 F:	include/linux/xarray.h
 F:	lib/idr.c
+F:	lib/test_xarray.c
 F:	lib/xarray.c
 F:	tools/testing/radix-tree
 

From 050339050f6f2b18d32a61a0f725f423804ad2a5 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 30 Jan 2025 16:09:20 -0800
Subject: [PATCH 329/368] revert "xarray: port tests to kunit"

Revert c7bb5cf9fc4e ("xarray: port tests to kunit").  It broke the build
when compiing the xarray userspace test harness code.

Reported-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Closes: https://lkml.kernel.org/r/07cf896e-adf8-414f-a629-a808fc26014a@oracle.com
Cc: David Gow <davidgow@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Tamir Duberstein <tamird@gmail.com>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/m68k/configs/amiga_defconfig    |   1 +
 arch/m68k/configs/apollo_defconfig   |   1 +
 arch/m68k/configs/atari_defconfig    |   1 +
 arch/m68k/configs/bvme6000_defconfig |   1 +
 arch/m68k/configs/hp300_defconfig    |   1 +
 arch/m68k/configs/mac_defconfig      |   1 +
 arch/m68k/configs/multi_defconfig    |   1 +
 arch/m68k/configs/mvme147_defconfig  |   1 +
 arch/m68k/configs/mvme16x_defconfig  |   1 +
 arch/m68k/configs/q40_defconfig      |   1 +
 arch/m68k/configs/sun3_defconfig     |   1 +
 arch/m68k/configs/sun3x_defconfig    |   1 +
 arch/powerpc/configs/ppc64_defconfig |   1 +
 lib/Kconfig.debug                    |  18 +-
 lib/Makefile                         |   2 +-
 lib/test_xarray.c                    | 671 +++++++++++----------------
 16 files changed, 294 insertions(+), 410 deletions(-)

diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 8acfa66e10954..dbf2ea561c855 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -626,6 +626,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 35e9a08723048..b0fd199cc0a4e 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -583,6 +583,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 32891ddd3cc59..bb5b2d3b6c103 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -603,6 +603,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index ca276f0db3dd1..8315a13bab73b 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -575,6 +575,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index e83f14fe1a4f8..350370657e5fe 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -585,6 +585,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 6b58be24da793..f942b47557026 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -602,6 +602,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 0e8d24f825656..b1eaad02efab9 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -689,6 +689,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 24a7608c13ac7..6309a4442bb3f 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -575,6 +575,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index c415f75821f39..3feb0731f8142 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -576,6 +576,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 2c715a8ff551e..ea04b1b0da7d4 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -592,6 +592,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 15ff37fcccbfb..f52d9af92153d 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -572,6 +572,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 40a44bf9f48d1..f348447824da9 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -573,6 +573,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index e9c46b59ebbcb..465eb96c755e0 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -448,6 +448,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 775966cf6114e..1af972a92d06f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2456,22 +2456,8 @@ config TEST_BITMAP
 config TEST_UUID
 	tristate "Test functions located in the uuid module at runtime"
 
-config XARRAY_KUNIT
-	tristate "KUnit test XArray code at runtime" if !KUNIT_ALL_TESTS
-	depends on KUNIT
-	default KUNIT_ALL_TESTS
-	help
-	  Enable this option to test the Xarray code at boot.
-
-	  KUnit tests run during boot and output the results to the debug log
-	  in TAP format (http://testanything.org/). Only useful for kernel devs
-	  running the KUnit test harness, and not intended for inclusion into a
-	  production build.
-
-	  For more information on KUnit and unit tests in general please refer
-	  to the KUnit documentation in Documentation/dev-tools/kunit/.
-
-	  If unsure, say N.
+config TEST_XARRAY
+	tristate "Test the XArray code at runtime"
 
 config TEST_MAPLE_TREE
 	tristate "Test the Maple Tree code at runtime or module load"
diff --git a/lib/Makefile b/lib/Makefile
index f1c6e9d76a7c0..d5cfc7afbbb82 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -94,6 +94,7 @@ GCOV_PROFILE_test_bitmap.o := n
 endif
 
 obj-$(CONFIG_TEST_UUID) += test_uuid.o
+obj-$(CONFIG_TEST_XARRAY) += test_xarray.o
 obj-$(CONFIG_TEST_MAPLE_TREE) += test_maple_tree.o
 obj-$(CONFIG_TEST_PARMAN) += test_parman.o
 obj-$(CONFIG_TEST_KMOD) += test_kmod.o
@@ -372,7 +373,6 @@ CFLAGS_bitfield_kunit.o := $(DISABLE_STRUCTLEAK_PLUGIN)
 obj-$(CONFIG_BITFIELD_KUNIT) += bitfield_kunit.o
 obj-$(CONFIG_CHECKSUM_KUNIT) += checksum_kunit.o
 obj-$(CONFIG_UTIL_MACROS_KUNIT) += util_macros_kunit.o
-obj-$(CONFIG_XARRAY_KUNIT) += test_xarray.o
 obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o
 obj-$(CONFIG_HASHTABLE_KUNIT_TEST) += hashtable_test.o
 obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index eab5971d0a481..6932a26f4927c 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -6,10 +6,11 @@
  * Author: Matthew Wilcox <willy@infradead.org>
  */
 
-#include <kunit/test.h>
-
-#include <linux/module.h>
 #include <linux/xarray.h>
+#include <linux/module.h>
+
+static unsigned int tests_run;
+static unsigned int tests_passed;
 
 static const unsigned int order_limit =
 		IS_ENABLED(CONFIG_XARRAY_MULTI) ? BITS_PER_LONG : 1;
@@ -19,12 +20,15 @@ static const unsigned int order_limit =
 void xa_dump(const struct xarray *xa) { }
 # endif
 #undef XA_BUG_ON
-#define XA_BUG_ON(xa, x) do {		\
-	if (x) {			\
-		KUNIT_FAIL(test, #x);	\
-		xa_dump(xa);		\
-		dump_stack();		\
-	}				\
+#define XA_BUG_ON(xa, x) do {					\
+	tests_run++;						\
+	if (x) {						\
+		printk("BUG at %s:%d\n", __func__, __LINE__);	\
+		xa_dump(xa);					\
+		dump_stack();					\
+	} else {						\
+		tests_passed++;					\
+	}							\
 } while (0)
 #endif
 
@@ -38,13 +42,13 @@ static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp)
 	return xa_store(xa, index, xa_mk_index(index), gfp);
 }
 
-static void xa_insert_index(struct kunit *test, struct xarray *xa, unsigned long index)
+static void xa_insert_index(struct xarray *xa, unsigned long index)
 {
 	XA_BUG_ON(xa, xa_insert(xa, index, xa_mk_index(index),
 				GFP_KERNEL) != 0);
 }
 
-static void xa_alloc_index(struct kunit *test, struct xarray *xa, unsigned long index, gfp_t gfp)
+static void xa_alloc_index(struct xarray *xa, unsigned long index, gfp_t gfp)
 {
 	u32 id;
 
@@ -53,7 +57,7 @@ static void xa_alloc_index(struct kunit *test, struct xarray *xa, unsigned long
 	XA_BUG_ON(xa, id != index);
 }
 
-static void xa_erase_index(struct kunit *test, struct xarray *xa, unsigned long index)
+static void xa_erase_index(struct xarray *xa, unsigned long index)
 {
 	XA_BUG_ON(xa, xa_erase(xa, index) != xa_mk_index(index));
 	XA_BUG_ON(xa, xa_load(xa, index) != NULL);
@@ -79,15 +83,8 @@ static void *xa_store_order(struct xarray *xa, unsigned long index,
 	return curr;
 }
 
-static inline struct xarray *xa_param(struct kunit *test)
+static noinline void check_xa_err(struct xarray *xa)
 {
-	return *(struct xarray **)test->param_value;
-}
-
-static noinline void check_xa_err(struct kunit *test)
-{
-	struct xarray *xa = xa_param(test);
-
 	XA_BUG_ON(xa, xa_err(xa_store_index(xa, 0, GFP_NOWAIT)) != 0);
 	XA_BUG_ON(xa, xa_err(xa_erase(xa, 0)) != 0);
 #ifndef __KERNEL__
@@ -102,10 +99,8 @@ static noinline void check_xa_err(struct kunit *test)
 //	XA_BUG_ON(xa, xa_err(xa_store(xa, 0, xa_mk_internal(0), 0)) != -EINVAL);
 }
 
-static noinline void check_xas_retry(struct kunit *test)
+static noinline void check_xas_retry(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 0);
 	void *entry;
 
@@ -114,7 +109,7 @@ static noinline void check_xas_retry(struct kunit *test)
 
 	rcu_read_lock();
 	XA_BUG_ON(xa, xas_find(&xas, ULONG_MAX) != xa_mk_value(0));
-	xa_erase_index(test, xa, 1);
+	xa_erase_index(xa, 1);
 	XA_BUG_ON(xa, !xa_is_retry(xas_reload(&xas)));
 	XA_BUG_ON(xa, xas_retry(&xas, NULL));
 	XA_BUG_ON(xa, xas_retry(&xas, xa_mk_value(0)));
@@ -145,14 +140,12 @@ static noinline void check_xas_retry(struct kunit *test)
 	}
 	xas_unlock(&xas);
 
-	xa_erase_index(test, xa, 0);
-	xa_erase_index(test, xa, 1);
+	xa_erase_index(xa, 0);
+	xa_erase_index(xa, 1);
 }
 
-static noinline void check_xa_load(struct kunit *test)
+static noinline void check_xa_load(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned long i, j;
 
 	for (i = 0; i < 1024; i++) {
@@ -174,15 +167,13 @@ static noinline void check_xa_load(struct kunit *test)
 			else
 				XA_BUG_ON(xa, entry);
 		}
-		xa_erase_index(test, xa, i);
+		xa_erase_index(xa, i);
 	}
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_xa_mark_1(struct kunit *test, unsigned long index)
+static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned int order;
 	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 8 : 1;
 
@@ -202,7 +193,7 @@ static noinline void check_xa_mark_1(struct kunit *test, unsigned long index)
 	XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_1));
 
 	/* Storing NULL clears marks, and they can't be set again */
-	xa_erase_index(test, xa, index);
+	xa_erase_index(xa, index);
 	XA_BUG_ON(xa, !xa_empty(xa));
 	XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
 	xa_set_mark(xa, index, XA_MARK_0);
@@ -253,17 +244,15 @@ static noinline void check_xa_mark_1(struct kunit *test, unsigned long index)
 		XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_0));
 		XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_1));
 		XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_2));
-		xa_erase_index(test, xa, index);
-		xa_erase_index(test, xa, next);
+		xa_erase_index(xa, index);
+		xa_erase_index(xa, next);
 		XA_BUG_ON(xa, !xa_empty(xa));
 	}
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_xa_mark_2(struct kunit *test)
+static noinline void check_xa_mark_2(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 0);
 	unsigned long index;
 	unsigned int count = 0;
@@ -300,11 +289,9 @@ static noinline void check_xa_mark_2(struct kunit *test)
 	xa_destroy(xa);
 }
 
-static noinline void check_xa_mark_3(struct kunit *test)
+static noinline void check_xa_mark_3(struct xarray *xa)
 {
 #ifdef CONFIG_XARRAY_MULTI
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 0x41);
 	void *entry;
 	int count = 0;
@@ -323,21 +310,19 @@ static noinline void check_xa_mark_3(struct kunit *test)
 #endif
 }
 
-static noinline void check_xa_mark(struct kunit *test)
+static noinline void check_xa_mark(struct xarray *xa)
 {
 	unsigned long index;
 
 	for (index = 0; index < 16384; index += 4)
-		check_xa_mark_1(test, index);
+		check_xa_mark_1(xa, index);
 
-	check_xa_mark_2(test);
-	check_xa_mark_3(test);
+	check_xa_mark_2(xa);
+	check_xa_mark_3(xa);
 }
 
-static noinline void check_xa_shrink(struct kunit *test)
+static noinline void check_xa_shrink(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 1);
 	struct xa_node *node;
 	unsigned int order;
@@ -362,7 +347,7 @@ static noinline void check_xa_shrink(struct kunit *test)
 	XA_BUG_ON(xa, xas_load(&xas) != NULL);
 	xas_unlock(&xas);
 	XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(0));
-	xa_erase_index(test, xa, 0);
+	xa_erase_index(xa, 0);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
 	for (order = 0; order < max_order; order++) {
@@ -379,49 +364,45 @@ static noinline void check_xa_shrink(struct kunit *test)
 		XA_BUG_ON(xa, xa_head(xa) == node);
 		rcu_read_unlock();
 		XA_BUG_ON(xa, xa_load(xa, max + 1) != NULL);
-		xa_erase_index(test, xa, ULONG_MAX);
+		xa_erase_index(xa, ULONG_MAX);
 		XA_BUG_ON(xa, xa->xa_head != node);
-		xa_erase_index(test, xa, 0);
+		xa_erase_index(xa, 0);
 	}
 }
 
-static noinline void check_insert(struct kunit *test)
+static noinline void check_insert(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned long i;
 
 	for (i = 0; i < 1024; i++) {
-		xa_insert_index(test, xa, i);
+		xa_insert_index(xa, i);
 		XA_BUG_ON(xa, xa_load(xa, i - 1) != NULL);
 		XA_BUG_ON(xa, xa_load(xa, i + 1) != NULL);
-		xa_erase_index(test, xa, i);
+		xa_erase_index(xa, i);
 	}
 
 	for (i = 10; i < BITS_PER_LONG; i++) {
-		xa_insert_index(test, xa, 1UL << i);
+		xa_insert_index(xa, 1UL << i);
 		XA_BUG_ON(xa, xa_load(xa, (1UL << i) - 1) != NULL);
 		XA_BUG_ON(xa, xa_load(xa, (1UL << i) + 1) != NULL);
-		xa_erase_index(test, xa, 1UL << i);
+		xa_erase_index(xa, 1UL << i);
 
-		xa_insert_index(test, xa, (1UL << i) - 1);
+		xa_insert_index(xa, (1UL << i) - 1);
 		XA_BUG_ON(xa, xa_load(xa, (1UL << i) - 2) != NULL);
 		XA_BUG_ON(xa, xa_load(xa, 1UL << i) != NULL);
-		xa_erase_index(test, xa, (1UL << i) - 1);
+		xa_erase_index(xa, (1UL << i) - 1);
 	}
 
-	xa_insert_index(test, xa, ~0UL);
+	xa_insert_index(xa, ~0UL);
 	XA_BUG_ON(xa, xa_load(xa, 0UL) != NULL);
 	XA_BUG_ON(xa, xa_load(xa, ~1UL) != NULL);
-	xa_erase_index(test, xa, ~0UL);
+	xa_erase_index(xa, ~0UL);
 
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_cmpxchg(struct kunit *test)
+static noinline void check_cmpxchg(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	void *FIVE = xa_mk_value(5);
 	void *SIX = xa_mk_value(6);
 	void *LOTS = xa_mk_value(12345678);
@@ -437,16 +418,14 @@ static noinline void check_cmpxchg(struct kunit *test)
 	XA_BUG_ON(xa, xa_insert(xa, 5, FIVE, GFP_KERNEL) != -EBUSY);
 	XA_BUG_ON(xa, xa_cmpxchg(xa, 5, FIVE, NULL, GFP_KERNEL) != FIVE);
 	XA_BUG_ON(xa, xa_insert(xa, 5, FIVE, GFP_KERNEL) == -EBUSY);
-	xa_erase_index(test, xa, 12345678);
-	xa_erase_index(test, xa, 5);
+	xa_erase_index(xa, 12345678);
+	xa_erase_index(xa, 5);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_cmpxchg_order(struct kunit *test)
+static noinline void check_cmpxchg_order(struct xarray *xa)
 {
 #ifdef CONFIG_XARRAY_MULTI
-	struct xarray *xa = xa_param(test);
-
 	void *FIVE = xa_mk_value(5);
 	unsigned int i, order = 3;
 
@@ -497,10 +476,8 @@ static noinline void check_cmpxchg_order(struct kunit *test)
 #endif
 }
 
-static noinline void check_reserve(struct kunit *test)
+static noinline void check_reserve(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	void *entry;
 	unsigned long index;
 	int count;
@@ -517,7 +494,7 @@ static noinline void check_reserve(struct kunit *test)
 	XA_BUG_ON(xa, xa_reserve(xa, 12345678, GFP_KERNEL) != 0);
 	XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_NOWAIT) != NULL);
 	xa_release(xa, 12345678);
-	xa_erase_index(test, xa, 12345678);
+	xa_erase_index(xa, 12345678);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
 	/* cmpxchg sees a reserved entry as ZERO */
@@ -525,7 +502,7 @@ static noinline void check_reserve(struct kunit *test)
 	XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, XA_ZERO_ENTRY,
 				xa_mk_value(12345678), GFP_NOWAIT) != NULL);
 	xa_release(xa, 12345678);
-	xa_erase_index(test, xa, 12345678);
+	xa_erase_index(xa, 12345678);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
 	/* xa_insert treats it as busy */
@@ -565,10 +542,8 @@ static noinline void check_reserve(struct kunit *test)
 	xa_destroy(xa);
 }
 
-static noinline void check_xas_erase(struct kunit *test)
+static noinline void check_xas_erase(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 0);
 	void *entry;
 	unsigned long i, j;
@@ -606,11 +581,9 @@ static noinline void check_xas_erase(struct kunit *test)
 }
 
 #ifdef CONFIG_XARRAY_MULTI
-static noinline void check_multi_store_1(struct kunit *test, unsigned long index,
+static noinline void check_multi_store_1(struct xarray *xa, unsigned long index,
 		unsigned int order)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, index);
 	unsigned long min = index & ~((1UL << order) - 1);
 	unsigned long max = min + (1UL << order);
@@ -629,15 +602,13 @@ static noinline void check_multi_store_1(struct kunit *test, unsigned long index
 	XA_BUG_ON(xa, xa_load(xa, max) != NULL);
 	XA_BUG_ON(xa, xa_load(xa, min - 1) != NULL);
 
-	xa_erase_index(test, xa, min);
+	xa_erase_index(xa, min);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_multi_store_2(struct kunit *test, unsigned long index,
+static noinline void check_multi_store_2(struct xarray *xa, unsigned long index,
 		unsigned int order)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, index);
 	xa_store_order(xa, index, order, xa_mk_value(0), GFP_KERNEL);
 
@@ -649,11 +620,9 @@ static noinline void check_multi_store_2(struct kunit *test, unsigned long index
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_multi_store_3(struct kunit *test, unsigned long index,
+static noinline void check_multi_store_3(struct xarray *xa, unsigned long index,
 		unsigned int order)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 0);
 	void *entry;
 	int n = 0;
@@ -678,11 +647,9 @@ static noinline void check_multi_store_3(struct kunit *test, unsigned long index
 }
 #endif
 
-static noinline void check_multi_store(struct kunit *test)
+static noinline void check_multi_store(struct xarray *xa)
 {
 #ifdef CONFIG_XARRAY_MULTI
-	struct xarray *xa = xa_param(test);
-
 	unsigned long i, j, k;
 	unsigned int max_order = (sizeof(long) == 4) ? 30 : 60;
 
@@ -747,28 +714,26 @@ static noinline void check_multi_store(struct kunit *test)
 	}
 
 	for (i = 0; i < 20; i++) {
-		check_multi_store_1(test, 200, i);
-		check_multi_store_1(test, 0, i);
-		check_multi_store_1(test, (1UL << i) + 1, i);
+		check_multi_store_1(xa, 200, i);
+		check_multi_store_1(xa, 0, i);
+		check_multi_store_1(xa, (1UL << i) + 1, i);
 	}
-	check_multi_store_2(test, 4095, 9);
+	check_multi_store_2(xa, 4095, 9);
 
 	for (i = 1; i < 20; i++) {
-		check_multi_store_3(test, 0, i);
-		check_multi_store_3(test, 1UL << i, i);
+		check_multi_store_3(xa, 0, i);
+		check_multi_store_3(xa, 1UL << i, i);
 	}
 #endif
 }
 
 #ifdef CONFIG_XARRAY_MULTI
 /* mimics page cache __filemap_add_folio() */
-static noinline void check_xa_multi_store_adv_add(struct kunit *test,
+static noinline void check_xa_multi_store_adv_add(struct xarray *xa,
 						  unsigned long index,
 						  unsigned int order,
 						  void *p)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, index);
 	unsigned int nrpages = 1UL << order;
 
@@ -796,12 +761,10 @@ static noinline void check_xa_multi_store_adv_add(struct kunit *test,
 }
 
 /* mimics page_cache_delete() */
-static noinline void check_xa_multi_store_adv_del_entry(struct kunit *test,
+static noinline void check_xa_multi_store_adv_del_entry(struct xarray *xa,
 							unsigned long index,
 							unsigned int order)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, index);
 
 	xas_set_order(&xas, index, order);
@@ -809,14 +772,12 @@ static noinline void check_xa_multi_store_adv_del_entry(struct kunit *test,
 	xas_init_marks(&xas);
 }
 
-static noinline void check_xa_multi_store_adv_delete(struct kunit *test,
+static noinline void check_xa_multi_store_adv_delete(struct xarray *xa,
 						     unsigned long index,
 						     unsigned int order)
 {
-	struct xarray *xa = xa_param(test);
-
 	xa_lock_irq(xa);
-	check_xa_multi_store_adv_del_entry(test, index, order);
+	check_xa_multi_store_adv_del_entry(xa, index, order);
 	xa_unlock_irq(xa);
 }
 
@@ -853,12 +814,10 @@ static unsigned long some_val = 0xdeadbeef;
 static unsigned long some_val_2 = 0xdeaddead;
 
 /* mimics the page cache usage */
-static noinline void check_xa_multi_store_adv(struct kunit *test,
+static noinline void check_xa_multi_store_adv(struct xarray *xa,
 					      unsigned long pos,
 					      unsigned int order)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned int nrpages = 1UL << order;
 	unsigned long index, base, next_index, next_next_index;
 	unsigned int i;
@@ -868,7 +827,7 @@ static noinline void check_xa_multi_store_adv(struct kunit *test,
 	next_index = round_down(base + nrpages, nrpages);
 	next_next_index = round_down(next_index + nrpages, nrpages);
 
-	check_xa_multi_store_adv_add(test, base, order, &some_val);
+	check_xa_multi_store_adv_add(xa, base, order, &some_val);
 
 	for (i = 0; i < nrpages; i++)
 		XA_BUG_ON(xa, test_get_entry(xa, base + i) != &some_val);
@@ -876,20 +835,20 @@ static noinline void check_xa_multi_store_adv(struct kunit *test,
 	XA_BUG_ON(xa, test_get_entry(xa, next_index) != NULL);
 
 	/* Use order 0 for the next item */
-	check_xa_multi_store_adv_add(test, next_index, 0, &some_val_2);
+	check_xa_multi_store_adv_add(xa, next_index, 0, &some_val_2);
 	XA_BUG_ON(xa, test_get_entry(xa, next_index) != &some_val_2);
 
 	/* Remove the next item */
-	check_xa_multi_store_adv_delete(test, next_index, 0);
+	check_xa_multi_store_adv_delete(xa, next_index, 0);
 
 	/* Now use order for a new pointer */
-	check_xa_multi_store_adv_add(test, next_index, order, &some_val_2);
+	check_xa_multi_store_adv_add(xa, next_index, order, &some_val_2);
 
 	for (i = 0; i < nrpages; i++)
 		XA_BUG_ON(xa, test_get_entry(xa, next_index + i) != &some_val_2);
 
-	check_xa_multi_store_adv_delete(test, next_index, order);
-	check_xa_multi_store_adv_delete(test, base, order);
+	check_xa_multi_store_adv_delete(xa, next_index, order);
+	check_xa_multi_store_adv_delete(xa, base, order);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
 	/* starting fresh again */
@@ -897,7 +856,7 @@ static noinline void check_xa_multi_store_adv(struct kunit *test,
 	/* let's test some holes now */
 
 	/* hole at base and next_next */
-	check_xa_multi_store_adv_add(test, next_index, order, &some_val_2);
+	check_xa_multi_store_adv_add(xa, next_index, order, &some_val_2);
 
 	for (i = 0; i < nrpages; i++)
 		XA_BUG_ON(xa, test_get_entry(xa, base + i) != NULL);
@@ -908,12 +867,12 @@ static noinline void check_xa_multi_store_adv(struct kunit *test,
 	for (i = 0; i < nrpages; i++)
 		XA_BUG_ON(xa, test_get_entry(xa, next_next_index + i) != NULL);
 
-	check_xa_multi_store_adv_delete(test, next_index, order);
+	check_xa_multi_store_adv_delete(xa, next_index, order);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
 	/* hole at base and next */
 
-	check_xa_multi_store_adv_add(test, next_next_index, order, &some_val_2);
+	check_xa_multi_store_adv_add(xa, next_next_index, order, &some_val_2);
 
 	for (i = 0; i < nrpages; i++)
 		XA_BUG_ON(xa, test_get_entry(xa, base + i) != NULL);
@@ -924,12 +883,12 @@ static noinline void check_xa_multi_store_adv(struct kunit *test,
 	for (i = 0; i < nrpages; i++)
 		XA_BUG_ON(xa, test_get_entry(xa, next_next_index + i) != &some_val_2);
 
-	check_xa_multi_store_adv_delete(test, next_next_index, order);
+	check_xa_multi_store_adv_delete(xa, next_next_index, order);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 #endif
 
-static noinline void check_multi_store_advanced(struct kunit *test)
+static noinline void check_multi_store_advanced(struct xarray *xa)
 {
 #ifdef CONFIG_XARRAY_MULTI
 	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1;
@@ -941,59 +900,59 @@ static noinline void check_multi_store_advanced(struct kunit *test)
 	 */
 	for (pos = 7; pos < end; pos = (pos * pos) + 564) {
 		for (i = 0; i < max_order; i++) {
-			check_xa_multi_store_adv(test, pos, i);
-			check_xa_multi_store_adv(test, pos + 157, i);
+			check_xa_multi_store_adv(xa, pos, i);
+			check_xa_multi_store_adv(xa, pos + 157, i);
 		}
 	}
 #endif
 }
 
-static noinline void check_xa_alloc_1(struct kunit *test, struct xarray *xa, unsigned int base)
+static noinline void check_xa_alloc_1(struct xarray *xa, unsigned int base)
 {
 	int i;
 	u32 id;
 
 	XA_BUG_ON(xa, !xa_empty(xa));
 	/* An empty array should assign %base to the first alloc */
-	xa_alloc_index(test, xa, base, GFP_KERNEL);
+	xa_alloc_index(xa, base, GFP_KERNEL);
 
 	/* Erasing it should make the array empty again */
-	xa_erase_index(test, xa, base);
+	xa_erase_index(xa, base);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
 	/* And it should assign %base again */
-	xa_alloc_index(test, xa, base, GFP_KERNEL);
+	xa_alloc_index(xa, base, GFP_KERNEL);
 
 	/* Allocating and then erasing a lot should not lose base */
 	for (i = base + 1; i < 2 * XA_CHUNK_SIZE; i++)
-		xa_alloc_index(test, xa, i, GFP_KERNEL);
+		xa_alloc_index(xa, i, GFP_KERNEL);
 	for (i = base; i < 2 * XA_CHUNK_SIZE; i++)
-		xa_erase_index(test, xa, i);
-	xa_alloc_index(test, xa, base, GFP_KERNEL);
+		xa_erase_index(xa, i);
+	xa_alloc_index(xa, base, GFP_KERNEL);
 
 	/* Destroying the array should do the same as erasing */
 	xa_destroy(xa);
 
 	/* And it should assign %base again */
-	xa_alloc_index(test, xa, base, GFP_KERNEL);
+	xa_alloc_index(xa, base, GFP_KERNEL);
 
 	/* The next assigned ID should be base+1 */
-	xa_alloc_index(test, xa, base + 1, GFP_KERNEL);
-	xa_erase_index(test, xa, base + 1);
+	xa_alloc_index(xa, base + 1, GFP_KERNEL);
+	xa_erase_index(xa, base + 1);
 
 	/* Storing a value should mark it used */
 	xa_store_index(xa, base + 1, GFP_KERNEL);
-	xa_alloc_index(test, xa, base + 2, GFP_KERNEL);
+	xa_alloc_index(xa, base + 2, GFP_KERNEL);
 
 	/* If we then erase base, it should be free */
-	xa_erase_index(test, xa, base);
-	xa_alloc_index(test, xa, base, GFP_KERNEL);
+	xa_erase_index(xa, base);
+	xa_alloc_index(xa, base, GFP_KERNEL);
 
-	xa_erase_index(test, xa, base + 1);
-	xa_erase_index(test, xa, base + 2);
+	xa_erase_index(xa, base + 1);
+	xa_erase_index(xa, base + 2);
 
 	for (i = 1; i < 5000; i++) {
-		xa_alloc_index(test, xa, base + i, GFP_KERNEL);
+		xa_alloc_index(xa, base + i, GFP_KERNEL);
 	}
 
 	xa_destroy(xa);
@@ -1016,14 +975,14 @@ static noinline void check_xa_alloc_1(struct kunit *test, struct xarray *xa, uns
 
 	XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(10), XA_LIMIT(10, 5),
 				GFP_KERNEL) != -EBUSY);
-	XA_BUG_ON(xa, xa_store_index(xa, 3, GFP_KERNEL) != NULL);
+	XA_BUG_ON(xa, xa_store_index(xa, 3, GFP_KERNEL) != 0);
 	XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(10), XA_LIMIT(10, 5),
 				GFP_KERNEL) != -EBUSY);
-	xa_erase_index(test, xa, 3);
+	xa_erase_index(xa, 3);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_xa_alloc_2(struct kunit *test, struct xarray *xa, unsigned int base)
+static noinline void check_xa_alloc_2(struct xarray *xa, unsigned int base)
 {
 	unsigned int i, id;
 	unsigned long index;
@@ -1059,7 +1018,7 @@ static noinline void check_xa_alloc_2(struct kunit *test, struct xarray *xa, uns
 	XA_BUG_ON(xa, id != 5);
 
 	xa_for_each(xa, index, entry) {
-		xa_erase_index(test, xa, index);
+		xa_erase_index(xa, index);
 	}
 
 	for (i = base; i < base + 9; i++) {
@@ -1074,7 +1033,7 @@ static noinline void check_xa_alloc_2(struct kunit *test, struct xarray *xa, uns
 	xa_destroy(xa);
 }
 
-static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, unsigned int base)
+static noinline void check_xa_alloc_3(struct xarray *xa, unsigned int base)
 {
 	struct xa_limit limit = XA_LIMIT(1, 0x3fff);
 	u32 next = 0;
@@ -1090,8 +1049,8 @@ static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, uns
 	XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(0x3ffd), limit,
 				&next, GFP_KERNEL) != 0);
 	XA_BUG_ON(xa, id != 0x3ffd);
-	xa_erase_index(test, xa, 0x3ffd);
-	xa_erase_index(test, xa, 1);
+	xa_erase_index(xa, 0x3ffd);
+	xa_erase_index(xa, 1);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
 	for (i = 0x3ffe; i < 0x4003; i++) {
@@ -1106,8 +1065,8 @@ static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, uns
 
 	/* Check wrap-around is handled correctly */
 	if (base != 0)
-		xa_erase_index(test, xa, base);
-	xa_erase_index(test, xa, base + 1);
+		xa_erase_index(xa, base);
+	xa_erase_index(xa, base + 1);
 	next = UINT_MAX;
 	XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(UINT_MAX),
 				xa_limit_32b, &next, GFP_KERNEL) != 0);
@@ -1120,7 +1079,7 @@ static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, uns
 	XA_BUG_ON(xa, id != base + 1);
 
 	xa_for_each(xa, index, entry)
-		xa_erase_index(test, xa, index);
+		xa_erase_index(xa, index);
 
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
@@ -1128,21 +1087,19 @@ static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, uns
 static DEFINE_XARRAY_ALLOC(xa0);
 static DEFINE_XARRAY_ALLOC1(xa1);
 
-static noinline void check_xa_alloc(struct kunit *test)
+static noinline void check_xa_alloc(void)
 {
-	check_xa_alloc_1(test, &xa0, 0);
-	check_xa_alloc_1(test, &xa1, 1);
-	check_xa_alloc_2(test, &xa0, 0);
-	check_xa_alloc_2(test, &xa1, 1);
-	check_xa_alloc_3(test, &xa0, 0);
-	check_xa_alloc_3(test, &xa1, 1);
+	check_xa_alloc_1(&xa0, 0);
+	check_xa_alloc_1(&xa1, 1);
+	check_xa_alloc_2(&xa0, 0);
+	check_xa_alloc_2(&xa1, 1);
+	check_xa_alloc_3(&xa0, 0);
+	check_xa_alloc_3(&xa1, 1);
 }
 
-static noinline void __check_store_iter(struct kunit *test, unsigned long start,
+static noinline void __check_store_iter(struct xarray *xa, unsigned long start,
 			unsigned int order, unsigned int present)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE_ORDER(xas, xa, start, order);
 	void *entry;
 	unsigned int count = 0;
@@ -1166,54 +1123,50 @@ static noinline void __check_store_iter(struct kunit *test, unsigned long start,
 	XA_BUG_ON(xa, xa_load(xa, start) != xa_mk_index(start));
 	XA_BUG_ON(xa, xa_load(xa, start + (1UL << order) - 1) !=
 			xa_mk_index(start));
-	xa_erase_index(test, xa, start);
+	xa_erase_index(xa, start);
 }
 
-static noinline void check_store_iter(struct kunit *test)
+static noinline void check_store_iter(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned int i, j;
 	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1;
 
 	for (i = 0; i < max_order; i++) {
 		unsigned int min = 1 << i;
 		unsigned int max = (2 << i) - 1;
-		__check_store_iter(test, 0, i, 0);
+		__check_store_iter(xa, 0, i, 0);
 		XA_BUG_ON(xa, !xa_empty(xa));
-		__check_store_iter(test, min, i, 0);
+		__check_store_iter(xa, min, i, 0);
 		XA_BUG_ON(xa, !xa_empty(xa));
 
 		xa_store_index(xa, min, GFP_KERNEL);
-		__check_store_iter(test, min, i, 1);
+		__check_store_iter(xa, min, i, 1);
 		XA_BUG_ON(xa, !xa_empty(xa));
 		xa_store_index(xa, max, GFP_KERNEL);
-		__check_store_iter(test, min, i, 1);
+		__check_store_iter(xa, min, i, 1);
 		XA_BUG_ON(xa, !xa_empty(xa));
 
 		for (j = 0; j < min; j++)
 			xa_store_index(xa, j, GFP_KERNEL);
-		__check_store_iter(test, 0, i, min);
+		__check_store_iter(xa, 0, i, min);
 		XA_BUG_ON(xa, !xa_empty(xa));
 		for (j = 0; j < min; j++)
 			xa_store_index(xa, min + j, GFP_KERNEL);
-		__check_store_iter(test, min, i, min);
+		__check_store_iter(xa, min, i, min);
 		XA_BUG_ON(xa, !xa_empty(xa));
 	}
 #ifdef CONFIG_XARRAY_MULTI
 	xa_store_index(xa, 63, GFP_KERNEL);
 	xa_store_index(xa, 65, GFP_KERNEL);
-	__check_store_iter(test, 64, 2, 1);
-	xa_erase_index(test, xa, 63);
+	__check_store_iter(xa, 64, 2, 1);
+	xa_erase_index(xa, 63);
 #endif
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_multi_find_1(struct kunit *test, unsigned int order)
+static noinline void check_multi_find_1(struct xarray *xa, unsigned order)
 {
 #ifdef CONFIG_XARRAY_MULTI
-	struct xarray *xa = xa_param(test);
-
 	unsigned long multi = 3 << order;
 	unsigned long next = 4 << order;
 	unsigned long index;
@@ -1236,17 +1189,15 @@ static noinline void check_multi_find_1(struct kunit *test, unsigned int order)
 	XA_BUG_ON(xa, xa_find_after(xa, &index, next, XA_PRESENT) != NULL);
 	XA_BUG_ON(xa, index != next);
 
-	xa_erase_index(test, xa, multi);
-	xa_erase_index(test, xa, next);
-	xa_erase_index(test, xa, next + 1);
+	xa_erase_index(xa, multi);
+	xa_erase_index(xa, next);
+	xa_erase_index(xa, next + 1);
 	XA_BUG_ON(xa, !xa_empty(xa));
 #endif
 }
 
-static noinline void check_multi_find_2(struct kunit *test)
+static noinline void check_multi_find_2(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 10 : 1;
 	unsigned int i, j;
 	void *entry;
@@ -1260,19 +1211,17 @@ static noinline void check_multi_find_2(struct kunit *test)
 					GFP_KERNEL);
 			rcu_read_lock();
 			xas_for_each(&xas, entry, ULONG_MAX) {
-				xa_erase_index(test, xa, index);
+				xa_erase_index(xa, index);
 			}
 			rcu_read_unlock();
-			xa_erase_index(test, xa, index - 1);
+			xa_erase_index(xa, index - 1);
 			XA_BUG_ON(xa, !xa_empty(xa));
 		}
 	}
 }
 
-static noinline void check_multi_find_3(struct kunit *test)
+static noinline void check_multi_find_3(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned int order;
 
 	for (order = 5; order < order_limit; order++) {
@@ -1281,14 +1230,12 @@ static noinline void check_multi_find_3(struct kunit *test)
 		XA_BUG_ON(xa, !xa_empty(xa));
 		xa_store_order(xa, 0, order - 4, xa_mk_index(0), GFP_KERNEL);
 		XA_BUG_ON(xa, xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT));
-		xa_erase_index(test, xa, 0);
+		xa_erase_index(xa, 0);
 	}
 }
 
-static noinline void check_find_1(struct kunit *test)
+static noinline void check_find_1(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned long i, j, k;
 
 	XA_BUG_ON(xa, !xa_empty(xa));
@@ -1325,20 +1272,18 @@ static noinline void check_find_1(struct kunit *test)
 				else
 					XA_BUG_ON(xa, entry != NULL);
 			}
-			xa_erase_index(test, xa, j);
+			xa_erase_index(xa, j);
 			XA_BUG_ON(xa, xa_get_mark(xa, j, XA_MARK_0));
 			XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_0));
 		}
-		xa_erase_index(test, xa, i);
+		xa_erase_index(xa, i);
 		XA_BUG_ON(xa, xa_get_mark(xa, i, XA_MARK_0));
 	}
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_find_2(struct kunit *test)
+static noinline void check_find_2(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	void *entry;
 	unsigned long i, j, index;
 
@@ -1358,10 +1303,8 @@ static noinline void check_find_2(struct kunit *test)
 	xa_destroy(xa);
 }
 
-static noinline void check_find_3(struct kunit *test)
+static noinline void check_find_3(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 0);
 	unsigned long i, j, k;
 	void *entry;
@@ -1385,10 +1328,8 @@ static noinline void check_find_3(struct kunit *test)
 	xa_destroy(xa);
 }
 
-static noinline void check_find_4(struct kunit *test)
+static noinline void check_find_4(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned long index = 0;
 	void *entry;
 
@@ -1400,22 +1341,22 @@ static noinline void check_find_4(struct kunit *test)
 	entry = xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT);
 	XA_BUG_ON(xa, entry);
 
-	xa_erase_index(test, xa, ULONG_MAX);
+	xa_erase_index(xa, ULONG_MAX);
 }
 
-static noinline void check_find(struct kunit *test)
+static noinline void check_find(struct xarray *xa)
 {
 	unsigned i;
 
-	check_find_1(test);
-	check_find_2(test);
-	check_find_3(test);
-	check_find_4(test);
+	check_find_1(xa);
+	check_find_2(xa);
+	check_find_3(xa);
+	check_find_4(xa);
 
 	for (i = 2; i < 10; i++)
-		check_multi_find_1(test, i);
-	check_multi_find_2(test);
-	check_multi_find_3(test);
+		check_multi_find_1(xa, i);
+	check_multi_find_2(xa);
+	check_multi_find_3(xa);
 }
 
 /* See find_swap_entry() in mm/shmem.c */
@@ -1441,10 +1382,8 @@ static noinline unsigned long xa_find_entry(struct xarray *xa, void *item)
 	return entry ? xas.xa_index : -1;
 }
 
-static noinline void check_find_entry(struct kunit *test)
+static noinline void check_find_entry(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 #ifdef CONFIG_XARRAY_MULTI
 	unsigned int order;
 	unsigned long offset, index;
@@ -1471,14 +1410,12 @@ static noinline void check_find_entry(struct kunit *test)
 	xa_store_index(xa, ULONG_MAX, GFP_KERNEL);
 	XA_BUG_ON(xa, xa_find_entry(xa, xa) != -1);
 	XA_BUG_ON(xa, xa_find_entry(xa, xa_mk_index(ULONG_MAX)) != -1);
-	xa_erase_index(test, xa, ULONG_MAX);
+	xa_erase_index(xa, ULONG_MAX);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_pause(struct kunit *test)
+static noinline void check_pause(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 0);
 	void *entry;
 	unsigned int order;
@@ -1548,10 +1485,8 @@ static noinline void check_pause(struct kunit *test)
 
 }
 
-static noinline void check_move_tiny(struct kunit *test)
+static noinline void check_move_tiny(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 0);
 
 	XA_BUG_ON(xa, !xa_empty(xa));
@@ -1568,14 +1503,12 @@ static noinline void check_move_tiny(struct kunit *test)
 	XA_BUG_ON(xa, xas_prev(&xas) != xa_mk_index(0));
 	XA_BUG_ON(xa, xas_prev(&xas) != NULL);
 	rcu_read_unlock();
-	xa_erase_index(test, xa, 0);
+	xa_erase_index(xa, 0);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_move_max(struct kunit *test)
+static noinline void check_move_max(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 0);
 
 	xa_store_index(xa, ULONG_MAX, GFP_KERNEL);
@@ -1591,14 +1524,12 @@ static noinline void check_move_max(struct kunit *test)
 	XA_BUG_ON(xa, xas_find(&xas, ULONG_MAX) != NULL);
 	rcu_read_unlock();
 
-	xa_erase_index(test, xa, ULONG_MAX);
+	xa_erase_index(xa, ULONG_MAX);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_move_small(struct kunit *test, unsigned long idx)
+static noinline void check_move_small(struct xarray *xa, unsigned long idx)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 0);
 	unsigned long i;
 
@@ -1640,15 +1571,13 @@ static noinline void check_move_small(struct kunit *test, unsigned long idx)
 	XA_BUG_ON(xa, xas.xa_index != ULONG_MAX);
 	rcu_read_unlock();
 
-	xa_erase_index(test, xa, 0);
-	xa_erase_index(test, xa, idx);
+	xa_erase_index(xa, 0);
+	xa_erase_index(xa, idx);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_move(struct kunit *test)
+static noinline void check_move(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, (1 << 16) - 1);
 	unsigned long i;
 
@@ -1675,7 +1604,7 @@ static noinline void check_move(struct kunit *test)
 	rcu_read_unlock();
 
 	for (i = (1 << 8); i < (1 << 15); i++)
-		xa_erase_index(test, xa, i);
+		xa_erase_index(xa, i);
 
 	i = xas.xa_index;
 
@@ -1706,17 +1635,17 @@ static noinline void check_move(struct kunit *test)
 
 	xa_destroy(xa);
 
-	check_move_tiny(test);
-	check_move_max(test);
+	check_move_tiny(xa);
+	check_move_max(xa);
 
 	for (i = 0; i < 16; i++)
-		check_move_small(test, 1UL << i);
+		check_move_small(xa, 1UL << i);
 
 	for (i = 2; i < 16; i++)
-		check_move_small(test, (1UL << i) - 1);
+		check_move_small(xa, (1UL << i) - 1);
 }
 
-static noinline void xa_store_many_order(struct kunit *test, struct xarray *xa,
+static noinline void xa_store_many_order(struct xarray *xa,
 		unsigned long index, unsigned order)
 {
 	XA_STATE_ORDER(xas, xa, index, order);
@@ -1739,34 +1668,30 @@ static noinline void xa_store_many_order(struct kunit *test, struct xarray *xa,
 	XA_BUG_ON(xa, xas_error(&xas));
 }
 
-static noinline void check_create_range_1(struct kunit *test,
+static noinline void check_create_range_1(struct xarray *xa,
 		unsigned long index, unsigned order)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned long i;
 
-	xa_store_many_order(test, xa, index, order);
+	xa_store_many_order(xa, index, order);
 	for (i = index; i < index + (1UL << order); i++)
-		xa_erase_index(test, xa, i);
+		xa_erase_index(xa, i);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_create_range_2(struct kunit *test, unsigned int order)
+static noinline void check_create_range_2(struct xarray *xa, unsigned order)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned long i;
 	unsigned long nr = 1UL << order;
 
 	for (i = 0; i < nr * nr; i += nr)
-		xa_store_many_order(test, xa, i, order);
+		xa_store_many_order(xa, i, order);
 	for (i = 0; i < nr * nr; i++)
-		xa_erase_index(test, xa, i);
+		xa_erase_index(xa, i);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_create_range_3(struct kunit *test)
+static noinline void check_create_range_3(void)
 {
 	XA_STATE(xas, NULL, 0);
 	xas_set_err(&xas, -EEXIST);
@@ -1774,11 +1699,9 @@ static noinline void check_create_range_3(struct kunit *test)
 	XA_BUG_ON(NULL, xas_error(&xas) != -EEXIST);
 }
 
-static noinline void check_create_range_4(struct kunit *test,
+static noinline void check_create_range_4(struct xarray *xa,
 		unsigned long index, unsigned order)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE_ORDER(xas, xa, index, order);
 	unsigned long base = xas.xa_index;
 	unsigned long i = 0;
@@ -1804,15 +1727,13 @@ static noinline void check_create_range_4(struct kunit *test,
 	XA_BUG_ON(xa, xas_error(&xas));
 
 	for (i = base; i < base + (1UL << order); i++)
-		xa_erase_index(test, xa, i);
+		xa_erase_index(xa, i);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_create_range_5(struct kunit *test,
+static noinline void check_create_range_5(struct xarray *xa,
 		unsigned long index, unsigned int order)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE_ORDER(xas, xa, index, order);
 	unsigned int i;
 
@@ -1829,46 +1750,44 @@ static noinline void check_create_range_5(struct kunit *test,
 	xa_destroy(xa);
 }
 
-static noinline void check_create_range(struct kunit *test)
+static noinline void check_create_range(struct xarray *xa)
 {
 	unsigned int order;
 	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 12 : 1;
 
 	for (order = 0; order < max_order; order++) {
-		check_create_range_1(test, 0, order);
-		check_create_range_1(test, 1U << order, order);
-		check_create_range_1(test, 2U << order, order);
-		check_create_range_1(test, 3U << order, order);
-		check_create_range_1(test, 1U << 24, order);
+		check_create_range_1(xa, 0, order);
+		check_create_range_1(xa, 1U << order, order);
+		check_create_range_1(xa, 2U << order, order);
+		check_create_range_1(xa, 3U << order, order);
+		check_create_range_1(xa, 1U << 24, order);
 		if (order < 10)
-			check_create_range_2(test, order);
-
-		check_create_range_4(test, 0, order);
-		check_create_range_4(test, 1U << order, order);
-		check_create_range_4(test, 2U << order, order);
-		check_create_range_4(test, 3U << order, order);
-		check_create_range_4(test, 1U << 24, order);
-
-		check_create_range_4(test, 1, order);
-		check_create_range_4(test, (1U << order) + 1, order);
-		check_create_range_4(test, (2U << order) + 1, order);
-		check_create_range_4(test, (2U << order) - 1, order);
-		check_create_range_4(test, (3U << order) + 1, order);
-		check_create_range_4(test, (3U << order) - 1, order);
-		check_create_range_4(test, (1U << 24) + 1, order);
-
-		check_create_range_5(test, 0, order);
-		check_create_range_5(test, (1U << order), order);
+			check_create_range_2(xa, order);
+
+		check_create_range_4(xa, 0, order);
+		check_create_range_4(xa, 1U << order, order);
+		check_create_range_4(xa, 2U << order, order);
+		check_create_range_4(xa, 3U << order, order);
+		check_create_range_4(xa, 1U << 24, order);
+
+		check_create_range_4(xa, 1, order);
+		check_create_range_4(xa, (1U << order) + 1, order);
+		check_create_range_4(xa, (2U << order) + 1, order);
+		check_create_range_4(xa, (2U << order) - 1, order);
+		check_create_range_4(xa, (3U << order) + 1, order);
+		check_create_range_4(xa, (3U << order) - 1, order);
+		check_create_range_4(xa, (1U << 24) + 1, order);
+
+		check_create_range_5(xa, 0, order);
+		check_create_range_5(xa, (1U << order), order);
 	}
 
-	check_create_range_3(test);
+	check_create_range_3();
 }
 
-static noinline void __check_store_range(struct kunit *test, unsigned long first,
+static noinline void __check_store_range(struct xarray *xa, unsigned long first,
 		unsigned long last)
 {
-	struct xarray *xa = xa_param(test);
-
 #ifdef CONFIG_XARRAY_MULTI
 	xa_store_range(xa, first, last, xa_mk_index(first), GFP_KERNEL);
 
@@ -1883,28 +1802,26 @@ static noinline void __check_store_range(struct kunit *test, unsigned long first
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_store_range(struct kunit *test)
+static noinline void check_store_range(struct xarray *xa)
 {
 	unsigned long i, j;
 
 	for (i = 0; i < 128; i++) {
 		for (j = i; j < 128; j++) {
-			__check_store_range(test, i, j);
-			__check_store_range(test, 128 + i, 128 + j);
-			__check_store_range(test, 4095 + i, 4095 + j);
-			__check_store_range(test, 4096 + i, 4096 + j);
-			__check_store_range(test, 123456 + i, 123456 + j);
-			__check_store_range(test, (1 << 24) + i, (1 << 24) + j);
+			__check_store_range(xa, i, j);
+			__check_store_range(xa, 128 + i, 128 + j);
+			__check_store_range(xa, 4095 + i, 4095 + j);
+			__check_store_range(xa, 4096 + i, 4096 + j);
+			__check_store_range(xa, 123456 + i, 123456 + j);
+			__check_store_range(xa, (1 << 24) + i, (1 << 24) + j);
 		}
 	}
 }
 
 #ifdef CONFIG_XARRAY_MULTI
-static void check_split_1(struct kunit *test, unsigned long index,
+static void check_split_1(struct xarray *xa, unsigned long index,
 				unsigned int order, unsigned int new_order)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE_ORDER(xas, xa, index, new_order);
 	unsigned int i, found;
 	void *entry;
@@ -1940,30 +1857,26 @@ static void check_split_1(struct kunit *test, unsigned long index,
 	xa_destroy(xa);
 }
 
-static noinline void check_split(struct kunit *test)
+static noinline void check_split(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned int order, new_order;
 
 	XA_BUG_ON(xa, !xa_empty(xa));
 
 	for (order = 1; order < 2 * XA_CHUNK_SHIFT; order++) {
 		for (new_order = 0; new_order < order; new_order++) {
-			check_split_1(test, 0, order, new_order);
-			check_split_1(test, 1UL << order, order, new_order);
-			check_split_1(test, 3UL << order, order, new_order);
+			check_split_1(xa, 0, order, new_order);
+			check_split_1(xa, 1UL << order, order, new_order);
+			check_split_1(xa, 3UL << order, order, new_order);
 		}
 	}
 }
 #else
-static void check_split(struct kunit *test) { }
+static void check_split(struct xarray *xa) { }
 #endif
 
-static void check_align_1(struct kunit *test, char *name)
+static void check_align_1(struct xarray *xa, char *name)
 {
-	struct xarray *xa = xa_param(test);
-
 	int i;
 	unsigned int id;
 	unsigned long index;
@@ -1983,10 +1896,8 @@ static void check_align_1(struct kunit *test, char *name)
  * We should always be able to store without allocating memory after
  * reserving a slot.
  */
-static void check_align_2(struct kunit *test, char *name)
+static void check_align_2(struct xarray *xa, char *name)
 {
-	struct xarray *xa = xa_param(test);
-
 	int i;
 
 	XA_BUG_ON(xa, !xa_empty(xa));
@@ -2005,15 +1916,15 @@ static void check_align_2(struct kunit *test, char *name)
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_align(struct kunit *test)
+static noinline void check_align(struct xarray *xa)
 {
 	char name[] = "Motorola 68000";
 
-	check_align_1(test, name);
-	check_align_1(test, name + 1);
-	check_align_1(test, name + 2);
-	check_align_1(test, name + 3);
-	check_align_2(test, name);
+	check_align_1(xa, name);
+	check_align_1(xa, name + 1);
+	check_align_1(xa, name + 2);
+	check_align_1(xa, name + 3);
+	check_align_2(xa, name);
 }
 
 static LIST_HEAD(shadow_nodes);
@@ -2029,7 +1940,7 @@ static void test_update_node(struct xa_node *node)
 	}
 }
 
-static noinline void shadow_remove(struct kunit *test, struct xarray *xa)
+static noinline void shadow_remove(struct xarray *xa)
 {
 	struct xa_node *node;
 
@@ -2043,17 +1954,8 @@ static noinline void shadow_remove(struct kunit *test, struct xarray *xa)
 	xa_unlock(xa);
 }
 
-struct workingset_testcase {
-	struct xarray *xa;
-	unsigned long index;
-};
-
-static noinline void check_workingset(struct kunit *test)
+static noinline void check_workingset(struct xarray *xa, unsigned long index)
 {
-	struct workingset_testcase tc = *(struct workingset_testcase *)test->param_value;
-	struct xarray *xa = tc.xa;
-	unsigned long index = tc.index;
-
 	XA_STATE(xas, xa, index);
 	xas_set_update(&xas, test_update_node);
 
@@ -2076,7 +1978,7 @@ static noinline void check_workingset(struct kunit *test)
 	xas_unlock(&xas);
 	XA_BUG_ON(xa, list_empty(&shadow_nodes));
 
-	shadow_remove(test, xa);
+	shadow_remove(xa);
 	XA_BUG_ON(xa, !list_empty(&shadow_nodes));
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
@@ -2085,11 +1987,9 @@ static noinline void check_workingset(struct kunit *test)
  * Check that the pointer / value / sibling entries are accounted the
  * way we expect them to be.
  */
-static noinline void check_account(struct kunit *test)
+static noinline void check_account(struct xarray *xa)
 {
 #ifdef CONFIG_XARRAY_MULTI
-	struct xarray *xa = xa_param(test);
-
 	unsigned int order;
 
 	for (order = 1; order < 12; order++) {
@@ -2116,10 +2016,8 @@ static noinline void check_account(struct kunit *test)
 #endif
 }
 
-static noinline void check_get_order(struct kunit *test)
+static noinline void check_get_order(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1;
 	unsigned int order;
 	unsigned long i, j;
@@ -2138,10 +2036,8 @@ static noinline void check_get_order(struct kunit *test)
 	}
 }
 
-static noinline void check_xas_get_order(struct kunit *test)
+static noinline void check_xas_get_order(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 0);
 
 	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1;
@@ -2173,10 +2069,8 @@ static noinline void check_xas_get_order(struct kunit *test)
 	}
 }
 
-static noinline void check_xas_conflict_get_order(struct kunit *test)
+static noinline void check_xas_conflict_get_order(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 0);
 
 	void *entry;
@@ -2233,10 +2127,8 @@ static noinline void check_xas_conflict_get_order(struct kunit *test)
 }
 
 
-static noinline void check_destroy(struct kunit *test)
+static noinline void check_destroy(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned long index;
 
 	XA_BUG_ON(xa, !xa_empty(xa));
@@ -2269,59 +2161,52 @@ static noinline void check_destroy(struct kunit *test)
 }
 
 static DEFINE_XARRAY(array);
-static struct xarray *arrays[] = { &array };
-KUNIT_ARRAY_PARAM(array, arrays, NULL);
-
-static struct xarray *xa0s[] = { &xa0 };
-KUNIT_ARRAY_PARAM(xa0, xa0s, NULL);
-
-static struct workingset_testcase workingset_testcases[] = {
-	{ &array, 0 },
-	{ &array, 64 },
-	{ &array, 4096 },
-};
-KUNIT_ARRAY_PARAM(workingset, workingset_testcases, NULL);
-
-static struct kunit_case xarray_cases[] = {
-	KUNIT_CASE_PARAM(check_xa_err, array_gen_params),
-	KUNIT_CASE_PARAM(check_xas_retry, array_gen_params),
-	KUNIT_CASE_PARAM(check_xa_load, array_gen_params),
-	KUNIT_CASE_PARAM(check_xa_mark, array_gen_params),
-	KUNIT_CASE_PARAM(check_xa_shrink, array_gen_params),
-	KUNIT_CASE_PARAM(check_xas_erase, array_gen_params),
-	KUNIT_CASE_PARAM(check_insert, array_gen_params),
-	KUNIT_CASE_PARAM(check_cmpxchg, array_gen_params),
-	KUNIT_CASE_PARAM(check_cmpxchg_order, array_gen_params),
-	KUNIT_CASE_PARAM(check_reserve, array_gen_params),
-	KUNIT_CASE_PARAM(check_reserve, xa0_gen_params),
-	KUNIT_CASE_PARAM(check_multi_store, array_gen_params),
-	KUNIT_CASE_PARAM(check_multi_store_advanced, array_gen_params),
-	KUNIT_CASE_PARAM(check_get_order, array_gen_params),
-	KUNIT_CASE_PARAM(check_xas_get_order, array_gen_params),
-	KUNIT_CASE_PARAM(check_xas_conflict_get_order, array_gen_params),
-	KUNIT_CASE(check_xa_alloc),
-	KUNIT_CASE_PARAM(check_find, array_gen_params),
-	KUNIT_CASE_PARAM(check_find_entry, array_gen_params),
-	KUNIT_CASE_PARAM(check_pause, array_gen_params),
-	KUNIT_CASE_PARAM(check_account, array_gen_params),
-	KUNIT_CASE_PARAM(check_destroy, array_gen_params),
-	KUNIT_CASE_PARAM(check_move, array_gen_params),
-	KUNIT_CASE_PARAM(check_create_range, array_gen_params),
-	KUNIT_CASE_PARAM(check_store_range, array_gen_params),
-	KUNIT_CASE_PARAM(check_store_iter, array_gen_params),
-	KUNIT_CASE_PARAM(check_align, xa0_gen_params),
-	KUNIT_CASE_PARAM(check_split, array_gen_params),
-	KUNIT_CASE_PARAM(check_workingset, workingset_gen_params),
-	{},
-};
-
-static struct kunit_suite xarray_suite = {
-	.name = "xarray",
-	.test_cases = xarray_cases,
-};
-
-kunit_test_suite(xarray_suite);
 
+static int xarray_checks(void)
+{
+	check_xa_err(&array);
+	check_xas_retry(&array);
+	check_xa_load(&array);
+	check_xa_mark(&array);
+	check_xa_shrink(&array);
+	check_xas_erase(&array);
+	check_insert(&array);
+	check_cmpxchg(&array);
+	check_cmpxchg_order(&array);
+	check_reserve(&array);
+	check_reserve(&xa0);
+	check_multi_store(&array);
+	check_multi_store_advanced(&array);
+	check_get_order(&array);
+	check_xas_get_order(&array);
+	check_xas_conflict_get_order(&array);
+	check_xa_alloc();
+	check_find(&array);
+	check_find_entry(&array);
+	check_pause(&array);
+	check_account(&array);
+	check_destroy(&array);
+	check_move(&array);
+	check_create_range(&array);
+	check_store_range(&array);
+	check_store_iter(&array);
+	check_align(&xa0);
+	check_split(&array);
+
+	check_workingset(&array, 0);
+	check_workingset(&array, 64);
+	check_workingset(&array, 4096);
+
+	printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
+	return (tests_run == tests_passed) ? 0 : -EINVAL;
+}
+
+static void xarray_exit(void)
+{
+}
+
+module_init(xarray_checks);
+module_exit(xarray_exit);
 MODULE_AUTHOR("Matthew Wilcox <willy@infradead.org>");
 MODULE_DESCRIPTION("XArray API test module");
 MODULE_LICENSE("GPL");

From e5b2a356dc8a88708d97bd47cca3b8f7ed7af6cb Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 30 Jan 2025 16:16:20 -0800
Subject: [PATCH 330/368] MAINTAINERS: include linux-mm for xarray maintenance

MM developers have an interest in the xarray code.

Cc: David Gow <davidgow@google.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Tamir Duberstein <tamird@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index f52a004982c9f..ab7463b2f165c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -25729,6 +25729,7 @@ F:	arch/x86/entry/vdso/
 XARRAY
 M:	Matthew Wilcox <willy@infradead.org>
 L:	linux-fsdevel@vger.kernel.org
+L:	linux-mm@kvack.org
 S:	Supported
 F:	Documentation/core-api/xarray.rst
 F:	include/linux/idr.h

From 46ded709232344b5750a852747a8881763c721ab Mon Sep 17 00:00:00 2001
From: Florian Fainelli <florian.fainelli@broadcom.com>
Date: Wed, 29 Jan 2025 15:13:42 -0800
Subject: [PATCH 331/368] net: bcmgenet: Correct overlaying of PHY and MAC
 Wake-on-LAN

Some Wake-on-LAN modes such as WAKE_FILTER may only be supported by the MAC,
while others might be only supported by the PHY. Make sure that the .get_wol()
returns the union of both rather than only that of the PHY if the PHY supports
Wake-on-LAN.

When disabling Wake-on-LAN, make sure that this is done at both the PHY
and MAC level, rather than doing an early return from the PHY driver.

Fixes: 7e400ff35cbe ("net: bcmgenet: Add support for PHY-based Wake-on-LAN")
Fixes: 9ee09edc05f2 ("net: bcmgenet: Properly overlay PHY and MAC Wake-on-LAN capabilities")
Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com>
Link: https://patch.msgid.link/20250129231342.35013-1-florian.fainelli@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../net/ethernet/broadcom/genet/bcmgenet_wol.c   | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
index 0715ea5bf13ed..3b082114f2e53 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
@@ -41,9 +41,12 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
 	struct device *kdev = &priv->pdev->dev;
+	u32 phy_wolopts = 0;
 
-	if (dev->phydev)
+	if (dev->phydev) {
 		phy_ethtool_get_wol(dev->phydev, wol);
+		phy_wolopts = wol->wolopts;
+	}
 
 	/* MAC is not wake-up capable, return what the PHY does */
 	if (!device_can_wakeup(kdev))
@@ -51,9 +54,14 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 
 	/* Overlay MAC capabilities with that of the PHY queried before */
 	wol->supported |= WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER;
-	wol->wolopts = priv->wolopts;
-	memset(wol->sopass, 0, sizeof(wol->sopass));
+	wol->wolopts |= priv->wolopts;
 
+	/* Return the PHY configured magic password */
+	if (phy_wolopts & WAKE_MAGICSECURE)
+		return;
+
+	/* Otherwise the MAC one */
+	memset(wol->sopass, 0, sizeof(wol->sopass));
 	if (wol->wolopts & WAKE_MAGICSECURE)
 		memcpy(wol->sopass, priv->sopass, sizeof(priv->sopass));
 }
@@ -70,7 +78,7 @@ int bcmgenet_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 	/* Try Wake-on-LAN from the PHY first */
 	if (dev->phydev) {
 		ret = phy_ethtool_set_wol(dev->phydev, wol);
-		if (ret != -EOPNOTSUPP)
+		if (ret != -EOPNOTSUPP && wol->wolopts)
 			return ret;
 	}
 

From c71a192976ded2f2f416d03c4f595cdd4478b825 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 29 Jan 2025 19:15:18 -0800
Subject: [PATCH 332/368] net: ipv6: fix dst refleaks in rpl, seg6 and ioam6
 lwtunnels

dst_cache_get() gives us a reference, we need to release it.

Discovered by the ioam6.sh test, kmemleak was recently fixed
to catch per-cpu memory leaks.

Fixes: 985ec6f5e623 ("net: ipv6: rpl_iptunnel: mitigate 2-realloc issue")
Fixes: 40475b63761a ("net: ipv6: seg6_iptunnel: mitigate 2-realloc issue")
Fixes: dce525185bc9 ("net: ipv6: ioam6_iptunnel: mitigate 2-realloc issue")
Reviewed-by: Justin Iurman <justin.iurman@uliege.be>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20250130031519.2716843-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv6/ioam6_iptunnel.c | 5 +++--
 net/ipv6/rpl_iptunnel.c   | 6 ++++--
 net/ipv6/seg6_iptunnel.c  | 6 ++++--
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index 28e5a89dc2557..3936c137a5727 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -336,7 +336,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
 
 static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	struct dst_entry *dst = skb_dst(skb), *cache_dst;
+	struct dst_entry *dst = skb_dst(skb), *cache_dst = NULL;
 	struct in6_addr orig_daddr;
 	struct ioam6_lwt *ilwt;
 	int err = -EINVAL;
@@ -407,7 +407,6 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 		cache_dst = ip6_route_output(net, NULL, &fl6);
 		if (cache_dst->error) {
 			err = cache_dst->error;
-			dst_release(cache_dst);
 			goto drop;
 		}
 
@@ -426,8 +425,10 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 		return dst_output(net, sk, skb);
 	}
 out:
+	dst_release(cache_dst);
 	return dst->lwtstate->orig_output(net, sk, skb);
 drop:
+	dst_release(cache_dst);
 	kfree_skb(skb);
 	return err;
 }
diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
index 7ba22d2f2bfef..9b7d035631154 100644
--- a/net/ipv6/rpl_iptunnel.c
+++ b/net/ipv6/rpl_iptunnel.c
@@ -232,7 +232,6 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 		dst = ip6_route_output(net, NULL, &fl6);
 		if (dst->error) {
 			err = dst->error;
-			dst_release(dst);
 			goto drop;
 		}
 
@@ -251,6 +250,7 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 	return dst_output(net, sk, skb);
 
 drop:
+	dst_release(dst);
 	kfree_skb(skb);
 	return err;
 }
@@ -269,8 +269,10 @@ static int rpl_input(struct sk_buff *skb)
 	local_bh_enable();
 
 	err = rpl_do_srh(skb, rlwt, dst);
-	if (unlikely(err))
+	if (unlikely(err)) {
+		dst_release(dst);
 		goto drop;
+	}
 
 	if (!dst) {
 		ip6_route_input(skb);
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 4bf937bfc2633..eacc4e91b48ef 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -482,8 +482,10 @@ static int seg6_input_core(struct net *net, struct sock *sk,
 	local_bh_enable();
 
 	err = seg6_do_srh(skb, dst);
-	if (unlikely(err))
+	if (unlikely(err)) {
+		dst_release(dst);
 		goto drop;
+	}
 
 	if (!dst) {
 		ip6_route_input(skb);
@@ -571,7 +573,6 @@ static int seg6_output_core(struct net *net, struct sock *sk,
 		dst = ip6_route_output(net, NULL, &fl6);
 		if (dst->error) {
 			err = dst->error;
-			dst_release(dst);
 			goto drop;
 		}
 
@@ -593,6 +594,7 @@ static int seg6_output_core(struct net *net, struct sock *sk,
 
 	return dst_output(net, sk, skb);
 drop:
+	dst_release(dst);
 	kfree_skb(skb);
 	return err;
 }

From 92191dd1073088753821b862b791dcc83e558e07 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 29 Jan 2025 19:15:19 -0800
Subject: [PATCH 333/368] net: ipv6: fix dst ref loops in rpl, seg6 and ioam6
 lwtunnels

Some lwtunnels have a dst cache for post-transformation dst.
If the packet destination did not change we may end up recording
a reference to the lwtunnel in its own cache, and the lwtunnel
state will never be freed.

Discovered by the ioam6.sh test, kmemleak was recently fixed
to catch per-cpu memory leaks. I'm not sure if rpl and seg6
can actually hit this, but in principle I don't see why not.

Fixes: 8cb3bf8bff3c ("ipv6: ioam: Add support for the ip6ip6 encapsulation")
Fixes: 6c8702c60b88 ("ipv6: sr: add support for SRH encapsulation and injection with lwtunnels")
Fixes: a7a29f9c361f ("net: ipv6: add rpl sr tunnel")
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20250130031519.2716843-2-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv6/ioam6_iptunnel.c | 9 ++++++---
 net/ipv6/rpl_iptunnel.c   | 9 ++++++---
 net/ipv6/seg6_iptunnel.c  | 9 ++++++---
 3 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index 3936c137a5727..2c383c12a4315 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -410,9 +410,12 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 			goto drop;
 		}
 
-		local_bh_disable();
-		dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr);
-		local_bh_enable();
+		/* cache only if we don't create a dst reference loop */
+		if (dst->lwtstate != cache_dst->lwtstate) {
+			local_bh_disable();
+			dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr);
+			local_bh_enable();
+		}
 
 		err = skb_cow_head(skb, LL_RESERVED_SPACE(cache_dst->dev));
 		if (unlikely(err))
diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
index 9b7d035631154..0ac4283acdf20 100644
--- a/net/ipv6/rpl_iptunnel.c
+++ b/net/ipv6/rpl_iptunnel.c
@@ -235,9 +235,12 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 			goto drop;
 		}
 
-		local_bh_disable();
-		dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr);
-		local_bh_enable();
+		/* cache only if we don't create a dst reference loop */
+		if (orig_dst->lwtstate != dst->lwtstate) {
+			local_bh_disable();
+			dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr);
+			local_bh_enable();
+		}
 
 		err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
 		if (unlikely(err))
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index eacc4e91b48ef..33833b2064c07 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -576,9 +576,12 @@ static int seg6_output_core(struct net *net, struct sock *sk,
 			goto drop;
 		}
 
-		local_bh_disable();
-		dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
-		local_bh_enable();
+		/* cache only if we don't create a dst reference loop */
+		if (orig_dst->lwtstate != dst->lwtstate) {
+			local_bh_disable();
+			dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
+			local_bh_enable();
+		}
 
 		err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
 		if (unlikely(err))

From a8aa6a6ddce9b5585f2b74f27f3feea1427fb4e7 Mon Sep 17 00:00:00 2001
From: Jiasheng Jiang <jiashengjiangcool@gmail.com>
Date: Fri, 31 Jan 2025 01:38:32 +0000
Subject: [PATCH 334/368] ice: Add check for devm_kzalloc()

Add check for the return value of devm_kzalloc() to guarantee the success
of allocation.

Fixes: 42c2eb6b1f43 ("ice: Implement devlink-rate API")
Signed-off-by: Jiasheng Jiang <jiashengjiangcool@gmail.com>
Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
Link: https://patch.msgid.link/20250131013832.24805-1-jiashengjiangcool@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/intel/ice/devlink/devlink.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c
index d116e2b10bcea..dbdb83567364c 100644
--- a/drivers/net/ethernet/intel/ice/devlink/devlink.c
+++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c
@@ -981,6 +981,9 @@ static int ice_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv
 
 	/* preallocate memory for ice_sched_node */
 	node = devm_kzalloc(ice_hw_to_dev(pi->hw), sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
 	*priv = node;
 
 	return 0;

From 3f1baa91a1fdf3de9dbad4bd615b35fab347874b Mon Sep 17 00:00:00 2001
From: Sankararaman Jayaraman <sankararaman.jayaraman@broadcom.com>
Date: Fri, 31 Jan 2025 09:53:41 +0530
Subject: [PATCH 335/368] vmxnet3: Fix tx queue race condition with XDP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If XDP traffic runs on a CPU which is greater than or equal to
the number of the Tx queues of the NIC, then vmxnet3_xdp_get_tq()
always picks up queue 0 for transmission as it uses reciprocal scale
instead of simple modulo operation.

vmxnet3_xdp_xmit() and vmxnet3_xdp_xmit_frame() use the above
returned queue without any locking which can lead to race conditions
when multiple XDP xmits run in parallel on different CPU's.

This patch uses a simple module scheme when the current CPU equals or
exceeds the number of Tx queues on the NIC. It also adds locking in
vmxnet3_xdp_xmit() and vmxnet3_xdp_xmit_frame() functions.

Fixes: 54f00cce1178 ("vmxnet3: Add XDP support.")
Signed-off-by: Sankararaman Jayaraman <sankararaman.jayaraman@broadcom.com>
Signed-off-by: Ronak Doshi <ronak.doshi@broadcom.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20250131042340.156547-1-sankararaman.jayaraman@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/vmxnet3/vmxnet3_xdp.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/net/vmxnet3/vmxnet3_xdp.c b/drivers/net/vmxnet3/vmxnet3_xdp.c
index 1341374a4588a..616ecc38d1726 100644
--- a/drivers/net/vmxnet3/vmxnet3_xdp.c
+++ b/drivers/net/vmxnet3/vmxnet3_xdp.c
@@ -28,7 +28,7 @@ vmxnet3_xdp_get_tq(struct vmxnet3_adapter *adapter)
 	if (likely(cpu < tq_number))
 		tq = &adapter->tx_queue[cpu];
 	else
-		tq = &adapter->tx_queue[reciprocal_scale(cpu, tq_number)];
+		tq = &adapter->tx_queue[cpu % tq_number];
 
 	return tq;
 }
@@ -124,6 +124,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
 	u32 buf_size;
 	u32 dw2;
 
+	spin_lock_irq(&tq->tx_lock);
 	dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
 	dw2 |= xdpf->len;
 	ctx.sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill;
@@ -134,6 +135,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
 
 	if (vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) == 0) {
 		tq->stats.tx_ring_full++;
+		spin_unlock_irq(&tq->tx_lock);
 		return -ENOSPC;
 	}
 
@@ -142,8 +144,10 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
 		tbi->dma_addr = dma_map_single(&adapter->pdev->dev,
 					       xdpf->data, buf_size,
 					       DMA_TO_DEVICE);
-		if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr))
+		if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) {
+			spin_unlock_irq(&tq->tx_lock);
 			return -EFAULT;
+		}
 		tbi->map_type |= VMXNET3_MAP_SINGLE;
 	} else { /* XDP buffer from page pool */
 		page = virt_to_page(xdpf->data);
@@ -182,6 +186,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
 	dma_wmb();
 	gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
 						  VMXNET3_TXD_GEN);
+	spin_unlock_irq(&tq->tx_lock);
 
 	/* No need to handle the case when tx_num_deferred doesn't reach
 	 * threshold. Backend driver at hypervisor side will poll and reset
@@ -225,6 +230,7 @@ vmxnet3_xdp_xmit(struct net_device *dev,
 {
 	struct vmxnet3_adapter *adapter = netdev_priv(dev);
 	struct vmxnet3_tx_queue *tq;
+	struct netdev_queue *nq;
 	int i;
 
 	if (unlikely(test_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state)))
@@ -236,6 +242,9 @@ vmxnet3_xdp_xmit(struct net_device *dev,
 	if (tq->stopped)
 		return -ENETDOWN;
 
+	nq = netdev_get_tx_queue(adapter->netdev, tq->qid);
+
+	__netif_tx_lock(nq, smp_processor_id());
 	for (i = 0; i < n; i++) {
 		if (vmxnet3_xdp_xmit_frame(adapter, frames[i], tq, true)) {
 			tq->stats.xdp_xmit_err++;
@@ -243,6 +252,7 @@ vmxnet3_xdp_xmit(struct net_device *dev,
 		}
 	}
 	tq->stats.xdp_xmit += i;
+	__netif_tx_unlock(nq);
 
 	return i;
 }

From 2c4627c8ced77855b106c7104ecab70837d53799 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sun, 2 Feb 2025 10:43:02 -0600
Subject: [PATCH 336/368] tools/power turbostat: version 2025.02.02

Summary of Changes since 2024.11.30:

Fix regression in 2023.11.07 that affinitized forked child
in one-shot mode.

Harden one-shot mode against hotplug online/offline

Enable RAPL SysWatt column by default.

Add initial PTL, CWF platform support.

Harden initial PMT code in response to early use.

Enable first built-in PMT counter: CWF c1e residency

Refuse to run on unsupported platforms without --force,
to encourage updating to a version that supports the system,
and to avoid no-so-useful measurement results.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 364a44a7d7aee..8d5011a0bf60d 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -9559,7 +9559,7 @@ int get_and_dump_counters(void)
 
 void print_version()
 {
-	fprintf(outf, "turbostat version 2025.01.14 - Len Brown <lenb@kernel.org>\n");
+	fprintf(outf, "turbostat version 2025.02.02 - Len Brown <lenb@kernel.org>\n");
 }
 
 #define COMMAND_LINE_SIZE 2048

From 2014c95afecee3e76ca4a56956a936e23283f05b Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 2 Feb 2025 15:39:26 -0800
Subject: [PATCH 337/368] Linux 6.14-rc1

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 4117cc79748bf..9e0d63d9d94b9 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 VERSION = 6
-PATCHLEVEL = 13
+PATCHLEVEL = 14
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
 NAME = Baby Opossum Posse
 
 # *DOCUMENTATION*

From e4d4648eac8b4ef39f412d07715eb26f1ccd7342 Mon Sep 17 00:00:00 2001
From: Fedor Pchelkin <pchelkin@ispras.ru>
Date: Tue, 28 Jan 2025 00:02:01 +0300
Subject: [PATCH 338/368] platform/x86: ideapad-laptop: pass a correct pointer
 to the driver data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

devm_platform_profile_register() expects a pointer to the private driver
data but instead an address of the pointer variable is passed due to a
typo. This leads to the crashes later:

BUG: unable to handle page fault for address: 00000000fe0d0044
PGD 0 P4D 0
Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI
CPU: 6 UID: 0 PID: 1284 Comm: tuned Tainted: G        W          6.13.0+ #7
Tainted: [W]=WARN
Hardware name: LENOVO 21D0/LNVNB161216, BIOS J6CN45WW 03/17/2023
RIP: 0010:__mutex_lock.constprop.0+0x6bf/0x7f0
Call Trace:
 <TASK>
 dytc_profile_set+0x4a/0x140 [ideapad_laptop]
 _store_and_notify+0x13/0x40 [platform_profile]
 class_for_each_device+0x145/0x180
 platform_profile_store+0xc0/0x130 [platform_profile]
 kernfs_fop_write_iter+0x13e/0x1f0
 vfs_write+0x290/0x450
 ksys_write+0x6c/0xe0
 do_syscall_64+0x82/0x160
 entry_SYSCALL_64_after_hwframe+0x76/0x7e

Found by Linux Verification Center (linuxtesting.org).

Fixes: 249c576f0f9d ("ACPI: platform_profile: Let drivers set drvdata to the class device")
Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
Reviewed-by: Kurt Borja <kuurtb@gmail.com>
Link: https://lore.kernel.org/r/20250127210202.568691-1-pchelkin@ispras.ru
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 drivers/platform/x86/ideapad-laptop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index dfb5d4b8c0465..30bd366d7b58a 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -1121,7 +1121,7 @@ static int ideapad_dytc_profile_init(struct ideapad_private *priv)
 
 	/* Create platform_profile structure and register */
 	priv->dytc->ppdev = devm_platform_profile_register(&priv->platform_device->dev,
-							   "ideapad-laptop", &priv->dytc,
+							   "ideapad-laptop", priv->dytc,
 							   &dytc_profile_ops);
 	if (IS_ERR(priv->dytc->ppdev)) {
 		err = PTR_ERR(priv->dytc->ppdev);

From 583ef25bb2a094813351a727ddec38b35a15b9f8 Mon Sep 17 00:00:00 2001
From: Dmitry Kandybka <d.kandybka@gmail.com>
Date: Fri, 24 Jan 2025 01:07:39 +0300
Subject: [PATCH 339/368] platform/x86/intel: pmc: fix ltr decode in
 pmc_core_ltr_show()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In pmc_core_ltr_show(), promote 'val' to 'u64' to avoid possible integer
overflow. Values (10 bit) are multiplied by the scale, the result of
expression is in a range from 1 to 34,326,183,936 which is bigger then
UINT32_MAX. Compile tested only.

Found by Linux Verification Center (linuxtesting.org) with SVACE.

Signed-off-by: Dmitry Kandybka <d.kandybka@gmail.com>
Reviewed-by: Rajneesh Bhardwaj <irenic.rajneesh@gmail.com>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20250123220739.68087-1-d.kandybka@gmail.com
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 drivers/platform/x86/intel/pmc/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c
index 10f04b9441174..1ee0fb5f8250b 100644
--- a/drivers/platform/x86/intel/pmc/core.c
+++ b/drivers/platform/x86/intel/pmc/core.c
@@ -626,8 +626,8 @@ static u32 convert_ltr_scale(u32 val)
 static int pmc_core_ltr_show(struct seq_file *s, void *unused)
 {
 	struct pmc_dev *pmcdev = s->private;
-	u64 decoded_snoop_ltr, decoded_non_snoop_ltr;
-	u32 ltr_raw_data, scale, val;
+	u64 decoded_snoop_ltr, decoded_non_snoop_ltr, val;
+	u32 ltr_raw_data, scale;
 	u16 snoop_ltr, nonsnoop_ltr;
 	unsigned int i, index, ltr_index = 0;
 

From e0efe83ed325277bb70f9435d4d9fc70bebdcca8 Mon Sep 17 00:00:00 2001
From: Lenny Szubowicz <lszubowi@redhat.com>
Date: Thu, 30 Jan 2025 16:57:54 -0500
Subject: [PATCH 340/368] tg3: Disable tg3 PCIe AER on system reboot

Disable PCIe AER on the tg3 device on system reboot on a limited
list of Dell PowerEdge systems. This prevents a fatal PCIe AER event
on the tg3 device during the ACPI _PTS (prepare to sleep) method for
S5 on those systems. The _PTS is invoked by acpi_enter_sleep_state_prep()
as part of the kernel's reboot sequence as a result of commit
38f34dba806a ("PM: ACPI: reboot: Reinstate S5 for reboot").

There was an earlier fix for this problem by commit 2ca1c94ce0b6
("tg3: Disable tg3 device on system reboot to avoid triggering AER").
But it was discovered that this earlier fix caused a reboot hang
when some Dell PowerEdge servers were booted via ipxe. To address
this reboot hang, the earlier fix was essentially reverted by commit
9fc3bc764334 ("tg3: power down device only on SYSTEM_POWER_OFF").
This re-exposed the tg3 PCIe AER on reboot problem.

This fix is not an ideal solution because the root cause of the AER
is in system firmware. Instead, it's a targeted work-around in the
tg3 driver.

Note also that the PCIe AER must be disabled on the tg3 device even
if the system is configured to use "firmware first" error handling.

V3:
   - Fix sparse warning on improper comparison of pdev->current_state
   - Adhere to netdev comment style

Fixes: 9fc3bc764334 ("tg3: power down device only on SYSTEM_POWER_OFF")
Signed-off-by: Lenny Szubowicz <lszubowi@redhat.com>
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/tg3.c | 58 +++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 1c94bf1db7186..d9d675f1ebfe9 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -55,6 +55,7 @@
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
 #include <linux/crc32poly.h>
+#include <linux/dmi.h>
 
 #include <net/checksum.h>
 #include <net/gso.h>
@@ -18212,6 +18213,50 @@ static int tg3_resume(struct device *device)
 
 static SIMPLE_DEV_PM_OPS(tg3_pm_ops, tg3_suspend, tg3_resume);
 
+/* Systems where ACPI _PTS (Prepare To Sleep) S5 will result in a fatal
+ * PCIe AER event on the tg3 device if the tg3 device is not, or cannot
+ * be, powered down.
+ */
+static const struct dmi_system_id tg3_restart_aer_quirk_table[] = {
+	{
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R440"),
+		},
+	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R540"),
+		},
+	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R640"),
+		},
+	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R650"),
+		},
+	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R740"),
+		},
+	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R750"),
+		},
+	},
+	{}
+};
+
 static void tg3_shutdown(struct pci_dev *pdev)
 {
 	struct net_device *dev = pci_get_drvdata(pdev);
@@ -18228,6 +18273,19 @@ static void tg3_shutdown(struct pci_dev *pdev)
 
 	if (system_state == SYSTEM_POWER_OFF)
 		tg3_power_down(tp);
+	else if (system_state == SYSTEM_RESTART &&
+		 dmi_first_match(tg3_restart_aer_quirk_table) &&
+		 pdev->current_state != PCI_D3cold &&
+		 pdev->current_state != PCI_UNKNOWN) {
+		/* Disable PCIe AER on the tg3 to avoid a fatal
+		 * error during this system restart.
+		 */
+		pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL,
+					   PCI_EXP_DEVCTL_CERE |
+					   PCI_EXP_DEVCTL_NFERE |
+					   PCI_EXP_DEVCTL_FERE |
+					   PCI_EXP_DEVCTL_URRE);
+	}
 
 	rtnl_unlock();
 

From 235174b2bed88501fda689c113c55737f99332d8 Mon Sep 17 00:00:00 2001
From: Yan Zhai <yan@cloudflare.com>
Date: Fri, 31 Jan 2025 00:31:39 -0800
Subject: [PATCH 341/368] udp: gso: do not drop small packets when PMTU reduces

Commit 4094871db1d6 ("udp: only do GSO if # of segs > 1") avoided GSO
for small packets. But the kernel currently dismisses GSO requests only
after checking MTU/PMTU on gso_size. This means any packets, regardless
of their payload sizes, could be dropped when PMTU becomes smaller than
requested gso_size. We encountered this issue in production and it
caused a reliability problem that new QUIC connection cannot be
established before PMTU cache expired, while non GSO sockets still
worked fine at the same time.

Ideally, do not check any GSO related constraints when payload size is
smaller than requested gso_size, and return EMSGSIZE instead of EINVAL
on MTU/PMTU check failure to be more specific on the error cause.

Fixes: 4094871db1d6 ("udp: only do GSO if # of segs > 1")
Signed-off-by: Yan Zhai <yan@cloudflare.com>
Suggested-by: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c                       |  4 ++--
 net/ipv6/udp.c                       |  4 ++--
 tools/testing/selftests/net/udpgso.c | 26 ++++++++++++++++++++++++++
 3 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c472c9a57cf68..a9bb9ce5438ea 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1141,9 +1141,9 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4,
 		const int hlen = skb_network_header_len(skb) +
 				 sizeof(struct udphdr);
 
-		if (hlen + cork->gso_size > cork->fragsize) {
+		if (hlen + min(datalen, cork->gso_size) > cork->fragsize) {
 			kfree_skb(skb);
-			return -EINVAL;
+			return -EMSGSIZE;
 		}
 		if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
 			kfree_skb(skb);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 6671daa67f4fa..c6ea438b5c758 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1389,9 +1389,9 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
 		const int hlen = skb_network_header_len(skb) +
 				 sizeof(struct udphdr);
 
-		if (hlen + cork->gso_size > cork->fragsize) {
+		if (hlen + min(datalen, cork->gso_size) > cork->fragsize) {
 			kfree_skb(skb);
-			return -EINVAL;
+			return -EMSGSIZE;
 		}
 		if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
 			kfree_skb(skb);
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
index 3f2fca02fec53..36ff28af4b190 100644
--- a/tools/testing/selftests/net/udpgso.c
+++ b/tools/testing/selftests/net/udpgso.c
@@ -102,6 +102,19 @@ struct testcase testcases_v4[] = {
 		.gso_len = CONST_MSS_V4,
 		.r_num_mss = 1,
 	},
+	{
+		/* datalen <= MSS < gso_len: will fall back to no GSO */
+		.tlen = CONST_MSS_V4,
+		.gso_len = CONST_MSS_V4 + 1,
+		.r_num_mss = 0,
+		.r_len_last = CONST_MSS_V4,
+	},
+	{
+		/* MSS < datalen < gso_len: fail */
+		.tlen = CONST_MSS_V4 + 1,
+		.gso_len = CONST_MSS_V4 + 2,
+		.tfail = true,
+	},
 	{
 		/* send a single MSS + 1B */
 		.tlen = CONST_MSS_V4 + 1,
@@ -205,6 +218,19 @@ struct testcase testcases_v6[] = {
 		.gso_len = CONST_MSS_V6,
 		.r_num_mss = 1,
 	},
+	{
+		/* datalen <= MSS < gso_len: will fall back to no GSO */
+		.tlen = CONST_MSS_V6,
+		.gso_len = CONST_MSS_V6 + 1,
+		.r_num_mss = 0,
+		.r_len_last = CONST_MSS_V6,
+	},
+	{
+		/* MSS < datalen < gso_len: fail */
+		.tlen = CONST_MSS_V6 + 1,
+		.gso_len = CONST_MSS_V6 + 2,
+		.tfail = true
+	},
 	{
 		/* send a single MSS + 1B */
 		.tlen = CONST_MSS_V6 + 1,

From 902e09c8acde117b00369521f54df817a983d4ab Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 3 Feb 2025 16:16:09 -0500
Subject: [PATCH 342/368] fix braino in "9p: fix ->rename_sem exclusion"

->d_op can bloody well be NULL

Fucked-up-by: Al Viro <viro@zeniv.linux.org.uk>
Fixes: 30d61efe118c "9p: fix ->rename_sem exclusion"
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 903142b324e98..8a605681b26ff 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2967,11 +2967,11 @@ static int __d_unalias(struct dentry *dentry, struct dentry *alias)
 		goto out_err;
 	m2 = &alias->d_parent->d_inode->i_rwsem;
 out_unalias:
-	if (alias->d_op->d_unalias_trylock &&
+	if (alias->d_op && alias->d_op->d_unalias_trylock &&
 	    !alias->d_op->d_unalias_trylock(alias))
 		goto out_err;
 	__d_move(alias, dentry, false);
-	if (alias->d_op->d_unalias_unlock)
+	if (alias->d_op && alias->d_op->d_unalias_unlock)
 		alias->d_op->d_unalias_unlock(alias);
 	ret = 0;
 out_err:

From 3a4e7193ec37ee2476ce726589de4495a066b565 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sat, 1 Feb 2025 16:50:24 -0800
Subject: [PATCH 343/368] MAINTAINERS: list openvswitch docs under its entry

Submissions to the docs seem to not get properly CCed.

Acked-by: Ilya Maximets <i.maximets@ovn.org>
Link: https://patch.msgid.link/20250202005024.964262-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index d1086e53a3176..c7b8c6535a1e2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17706,6 +17706,7 @@ L:	netdev@vger.kernel.org
 L:	dev@openvswitch.org
 S:	Maintained
 W:	http://openvswitch.org
+F:	Documentation/networking/openvswitch.rst
 F:	include/uapi/linux/openvswitch.h
 F:	net/openvswitch/
 F:	tools/testing/selftests/net/openvswitch/

From 4d896b35394144c246daaeb5280a015a630958e7 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sat, 1 Feb 2025 17:47:26 -0800
Subject: [PATCH 344/368] MAINTAINERS: add Kuniyuki Iwashima to TCP reviewers

List Kuniyuki as an official TCP reviewer.

Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20250202014728.1005003-2-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index c7b8c6535a1e2..48677d61c97bd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16614,6 +16614,7 @@ F:	tools/testing/selftests/net/mptcp/
 NETWORKING [TCP]
 M:	Eric Dumazet <edumazet@google.com>
 M:	Neal Cardwell <ncardwell@google.com>
+R:	Kuniyuki Iwashima <kuniyu@amazon.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	Documentation/networking/net_cachelines/tcp_sock.rst

From ae0585b04ab741b536b0db20c12baf24bf7118d2 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sat, 1 Feb 2025 17:47:27 -0800
Subject: [PATCH 345/368] MAINTAINERS: add a general entry for BSD sockets

Create a MAINTAINERS entry for BSD sockets. List the top 3
reviewers as maintainers. The entry is meant to cover core
socket code (of which there isn't much) but also reviews
of any new socket families.

Reviewed-by: Simon Horman <horms@kernel.org>
Acked-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20250202014728.1005003-3-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 MAINTAINERS | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 48677d61c97bd..438d85bb97a22 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16642,6 +16642,22 @@ F:	include/net/tls.h
 F:	include/uapi/linux/tls.h
 F:	net/tls/*
 
+NETWORKING [SOCKETS]
+M:	Eric Dumazet <edumazet@google.com>
+M:	Kuniyuki Iwashima <kuniyu@amazon.com>
+M:	Paolo Abeni <pabeni@redhat.com>
+M:	Willem de Bruijn <willemb@google.com>
+S:	Maintained
+F:	include/linux/sock_diag.h
+F:	include/linux/socket.h
+F:	include/linux/sockptr.h
+F:	include/net/sock.h
+F:	include/net/sock_reuseport.h
+F:	include/uapi/linux/socket.h
+F:	net/core/*sock*
+F:	net/core/scm.c
+F:	net/socket.c
+
 NETXEN (1/10) GbE SUPPORT
 M:	Manish Chopra <manishc@marvell.com>
 M:	Rahul Verma <rahulv@marvell.com>

From 8a2e22f665a0b5c212057031e94b75cfdc11a4a6 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sat, 1 Feb 2025 17:47:28 -0800
Subject: [PATCH 346/368] MAINTAINERS: add entry for UNIX sockets

Add a MAINTAINERS entry for UNIX socket, Kuniyuki has been
the de-facto maintainer of this code for a while.

Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20250202014728.1005003-4-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 MAINTAINERS | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 438d85bb97a22..74b09dad46626 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16658,6 +16658,15 @@ F:	net/core/*sock*
 F:	net/core/scm.c
 F:	net/socket.c
 
+NETWORKING [UNIX SOCKETS]
+M:	Kuniyuki Iwashima <kuniyu@amazon.com>
+S:	Maintained
+F:	include/net/af_unix.h
+F:	include/net/netns/unix.h
+F:	include/uapi/linux/unix_diag.h
+F:	net/unix/
+F:	tools/testing/selftests/net/af_unix/
+
 NETXEN (1/10) GbE SUPPORT
 M:	Manish Chopra <manishc@marvell.com>
 M:	Rahul Verma <rahulv@marvell.com>

From 1b0332a42656b798bea867631d739de023633ec6 Mon Sep 17 00:00:00 2001
From: Yu-Chun Lin <eleanor15x@gmail.com>
Date: Thu, 30 Jan 2025 22:48:49 +0800
Subject: [PATCH 347/368] kthread: Fix return value on kzalloc() failure in
 kthread_affine_preferred()

kthread_affine_preferred() incorrectly returns 0 instead of -ENOMEM
when kzalloc() fails. Return 'ret' to ensure the correct error code is
propagated.

Fixes: 4d13f4304fa4 ("kthread: Implement preferred affinity")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202501301528.t0cZVbnq-lkp@intel.com/
Signed-off-by: Yu-Chun Lin <eleanor15x@gmail.com>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
---
 kernel/kthread.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/kthread.c b/kernel/kthread.c
index 4005b13ebd7ff..5dc5b0d7238e8 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -859,7 +859,7 @@ int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask)
 	struct kthread *kthread = to_kthread(p);
 	cpumask_var_t affinity;
 	unsigned long flags;
-	int ret;
+	int ret = 0;
 
 	if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE) || kthread->started) {
 		WARN_ON(1);
@@ -892,7 +892,7 @@ int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask)
 out:
 	free_cpumask_var(affinity);
 
-	return 0;
+	return ret;
 }
 
 /*

From 244f8aa46fa9e2f4ea5fe0e04988b395d5e30fc7 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 31 Jan 2025 17:30:37 -0800
Subject: [PATCH 348/368] ethtool: rss: fix hiding unsupported fields in dumps

Commit ec6e57beaf8b ("ethtool: rss: don't report key if device
doesn't support it") intended to stop reporting key fields for
additional rss contexts if device has a global hashing key.

Later we added dump support and the filtering wasn't properly
added there. So we end up reporting the key fields in dumps
but not in dos:

  # ./pyynl/cli.py --spec netlink/specs/ethtool.yaml --do rss-get \
		--json '{"header": {"dev-index":2}, "context": 1 }'
  {
     "header": { ... },
     "context": 1,
     "indir": [0, 1, 2, 3, ...]]
  }

  # ./pyynl/cli.py --spec netlink/specs/ethtool.yaml --dump rss-get
  [
     ... snip context 0 ...
     { "header": { ... },
       "context": 1,
       "indir": [0, 1, 2, 3, ...],
 ->    "input_xfrm": 255,
 ->    "hfunc": 1,
 ->    "hkey": "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"
     }
  ]

Hide these fields correctly.

The drivers/net/hw/rss_ctx.py selftest catches this when run on
a device with single key, already:

  # Check| At /root/./ksft-net-drv/drivers/net/hw/rss_ctx.py, line 381, in test_rss_context_dump:
  # Check|     ksft_ne(set(data.get('hkey', [1])), {0}, "key is all zero")
  # Check failed {0} == {0} key is all zero
  not ok 8 rss_ctx.test_rss_context_dump

Fixes: f6122900f4e2 ("ethtool: rss: support dumping RSS contexts")
Reviewed-by: Gal Pressman <gal@nvidia.com>
Reviewed-by: Joe Damato <jdamato@fastly.com>
Link: https://patch.msgid.link/20250201013040.725123-2-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ethtool/rss.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c
index 7cb106b590aba..58df9ad02ce8a 100644
--- a/net/ethtool/rss.c
+++ b/net/ethtool/rss.c
@@ -107,6 +107,8 @@ rss_prepare_ctx(const struct rss_req_info *request, struct net_device *dev,
 	u32 total_size, indir_bytes;
 	u8 *rss_config;
 
+	data->no_key_fields = !dev->ethtool_ops->rxfh_per_ctx_key;
+
 	ctx = xa_load(&dev->ethtool->rss_ctx, request->rss_context);
 	if (!ctx)
 		return -ENOENT;
@@ -153,7 +155,6 @@ rss_prepare_data(const struct ethnl_req_info *req_base,
 		if (!ops->cap_rss_ctx_supported && !ops->create_rxfh_context)
 			return -EOPNOTSUPP;
 
-		data->no_key_fields = !ops->rxfh_per_ctx_key;
 		return rss_prepare_ctx(request, dev, data, info);
 	}
 

From 2b91cc1214b165c25ac9b0885db89a0d3224028a Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 31 Jan 2025 17:30:38 -0800
Subject: [PATCH 349/368] ethtool: ntuple: fix rss + ring_cookie check

The info.flow_type is for RXFH commands, ntuple flow_type is inside
the flow spec. The check currently does nothing, as info.flow_type
is 0 (or even uninitialized by user space) for ETHTOOL_SRXCLSRLINS.

Fixes: 9e43ad7a1ede ("net: ethtool: only allow set_rxnfc with rss + ring_cookie if driver opts in")
Reviewed-by: Gal Pressman <gal@nvidia.com>
Reviewed-by: Joe Damato <jdamato@fastly.com>
Link: https://patch.msgid.link/20250201013040.725123-3-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ethtool/ioctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 34bee42e12470..7609ce2b2c5e2 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -993,7 +993,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
 		return rc;
 
 	/* Nonzero ring with RSS only makes sense if NIC adds them together */
-	if (cmd == ETHTOOL_SRXCLSRLINS && info.flow_type & FLOW_RSS &&
+	if (cmd == ETHTOOL_SRXCLSRLINS && info.fs.flow_type & FLOW_RSS &&
 	    !ops->cap_rss_rxnfc_adds &&
 	    ethtool_get_flow_spec_ring(info.fs.ring_cookie))
 		return -EINVAL;

From de379dfd9ada2995699052f4a1ecebe5d8f8d70f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 31 Jan 2025 17:30:39 -0800
Subject: [PATCH 350/368] selftests: drv-net: rss_ctx: add missing cleanup in
 queue reconfigure

Commit under Fixes adds ntuple rules but never deletes them.

Fixes: 29a4bc1fe961 ("selftest: extend test_rss_context_queue_reconfigure for action addition")
Reviewed-by: Joe Damato <jdamato@fastly.com>
Link: https://patch.msgid.link/20250201013040.725123-4-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/drivers/net/hw/rss_ctx.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
index ca8a7edff3dda..27e24e20749ff 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
@@ -252,6 +252,7 @@ def test_rss_queue_reconfigure(cfg, main_ctx=True):
         try:
             # this targets queue 4, which doesn't exist
             ntuple2 = ethtool_create(cfg, "-N", flow)
+            defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}")
         except CmdExitFailure:
             pass
         else:
@@ -260,6 +261,7 @@ def test_rss_queue_reconfigure(cfg, main_ctx=True):
         ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 1 0")
         # ntuple rule therefore targets queues 1 and 3
         ntuple2 = ethtool_create(cfg, "-N", flow)
+        defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}")
         # should replace existing filter
         ksft_eq(ntuple, ntuple2)
         _send_traffic_check(cfg, port, ctx_ref, { 'target': (1, 3),

From c3da585509aeb8476886adf75a266c81a9b0df6c Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 31 Jan 2025 17:30:40 -0800
Subject: [PATCH 351/368] selftests: drv-net: rss_ctx: don't fail reconfigure
 test if queue offset not supported

Vast majority of drivers does not support queue offset.
Simply return if the rss context + queue ntuple fails.

Reviewed-by: Joe Damato <jdamato@fastly.com>
Link: https://patch.msgid.link/20250201013040.725123-5-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/drivers/net/hw/rss_ctx.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
index 27e24e20749ff..319aaa004c407 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
@@ -260,7 +260,12 @@ def test_rss_queue_reconfigure(cfg, main_ctx=True):
         # change the table to target queues 0 and 2
         ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 1 0")
         # ntuple rule therefore targets queues 1 and 3
-        ntuple2 = ethtool_create(cfg, "-N", flow)
+        try:
+            ntuple2 = ethtool_create(cfg, "-N", flow)
+        except CmdExitFailure:
+            ksft_pr("Driver does not support rss + queue offset")
+            return
+
         defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}")
         # should replace existing filter
         ksft_eq(ntuple, ntuple2)

From d3ed6dee73c560fad0a8e152c8e233b3fb3a2e44 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Sat, 1 Feb 2025 19:02:51 +0100
Subject: [PATCH 352/368] net: harmonize tstats and dstats

After the blamed commits below, some UDP tunnel use dstats for
accounting. On the xmit path, all the UDP-base tunnels ends up
using iptunnel_xmit_stats() for stats accounting, and the latter
assumes the relevant (tunnel) network device uses tstats.

The end result is some 'funny' stat report for the mentioned UDP
tunnel, e.g. when no packet is actually dropped and a bunch of
packets are transmitted:

gnv2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1450 qdisc noqueue \
		state UNKNOWN mode DEFAULT group default qlen 1000
    link/ether ee:7d:09:87:90:ea brd ff:ff:ff:ff:ff:ff
    RX:  bytes packets errors dropped  missed   mcast
         14916      23      0      15       0       0
    TX:  bytes packets errors dropped carrier collsns
             0    1566      0       0       0       0

Address the issue ensuring the same binary layout for the overlapping
fields of dstats and tstats. While this solution is a bit hackish, is
smaller and with no performance pitfall compared to other alternatives
i.e. supporting both dstat and tstat in iptunnel_xmit_stats() or
reverting the blamed commit.

With time we should possibly move all the IP-based tunnel (and virtual
devices) to dstats.

Fixes: c77200c07491 ("bareudp: Handle stats using NETDEV_PCPU_STAT_DSTATS.")
Fixes: 6fa6de302246 ("geneve: Handle stats using NETDEV_PCPU_STAT_DSTATS.")
Fixes: be226352e8dc ("vxlan: Handle stats using NETDEV_PCPU_STAT_DSTATS.")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Guillaume Nault <gnault@redhat.com>
Link: https://patch.msgid.link/2e1c444cf0f63ae472baff29862c4c869be17031.1738432804.git.pabeni@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h |  2 +-
 net/core/dev.c            | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2a59034a5fa2f..03bb584c62cf8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2904,9 +2904,9 @@ struct pcpu_sw_netstats {
 struct pcpu_dstats {
 	u64_stats_t		rx_packets;
 	u64_stats_t		rx_bytes;
-	u64_stats_t		rx_drops;
 	u64_stats_t		tx_packets;
 	u64_stats_t		tx_bytes;
+	u64_stats_t		rx_drops;
 	u64_stats_t		tx_drops;
 	struct u64_stats_sync	syncp;
 } __aligned(8 * sizeof(u64));
diff --git a/net/core/dev.c b/net/core/dev.c
index c0021cbd28fc1..b91658e8aedb4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -11286,6 +11286,20 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 	const struct net_device_ops *ops = dev->netdev_ops;
 	const struct net_device_core_stats __percpu *p;
 
+	/*
+	 * IPv{4,6} and udp tunnels share common stat helpers and use
+	 * different stat type (NETDEV_PCPU_STAT_TSTATS vs
+	 * NETDEV_PCPU_STAT_DSTATS). Ensure the accounting is consistent.
+	 */
+	BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, rx_bytes) !=
+		     offsetof(struct pcpu_dstats, rx_bytes));
+	BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, rx_packets) !=
+		     offsetof(struct pcpu_dstats, rx_packets));
+	BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, tx_bytes) !=
+		     offsetof(struct pcpu_dstats, tx_bytes));
+	BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, tx_packets) !=
+		     offsetof(struct pcpu_dstats, tx_packets));
+
 	if (ops->ndo_get_stats64) {
 		memset(storage, 0, sizeof(*storage));
 		ops->ndo_get_stats64(dev, storage);

From a787ab73e2e43c0a3df10bc8d9b9b7a679129d49 Mon Sep 17 00:00:00 2001
From: Jithu Joseph <jithu.joseph@intel.com>
Date: Fri, 31 Jan 2025 12:53:15 -0800
Subject: [PATCH 353/368] platform/x86/intel/ifs: Update documentation with
 image download path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The documentation previously listed the path to download In Field Scan
(IFS) test images as "TBD".

Update the documentation to include the correct image download
location. Also move the download link to the appropriate section within
the documentation.

Reported-by: Anisse Astier <anisse@astier.eu>
Signed-off-by: Jithu Joseph <jithu.joseph@intel.com>
Link: https://lore.kernel.org/r/20250131205315.1585663-1-jithu.joseph@intel.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 drivers/platform/x86/intel/ifs/ifs.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/platform/x86/intel/ifs/ifs.h b/drivers/platform/x86/intel/ifs/ifs.h
index 5c3c0dfa1bf83..f369fb0d3d82f 100644
--- a/drivers/platform/x86/intel/ifs/ifs.h
+++ b/drivers/platform/x86/intel/ifs/ifs.h
@@ -23,12 +23,14 @@
  * IFS Image
  * ---------
  *
- * Intel provides a firmware file containing the scan tests via
- * github [#f1]_.  Similar to microcode there is a separate file for each
+ * Intel provides firmware files containing the scan tests via the webpage [#f1]_.
+ * Look under "In-Field Scan Test Images Download" section towards the
+ * end of the page. Similar to microcode, there are separate files for each
  * family-model-stepping. IFS Images are not applicable for some test types.
  * Wherever applicable the sysfs directory would provide a "current_batch" file
  * (see below) for loading the image.
  *
+ * .. [#f1] https://intel.com/InFieldScan
  *
  * IFS Image Loading
  * -----------------
@@ -125,9 +127,6 @@
  * 2) Hardware allows for some number of cores to be tested in parallel.
  * The driver does not make use of this, it only tests one core at a time.
  *
- * .. [#f1] https://github.com/intel/TBD
- *
- *
  * Structural Based Functional Test at Field (SBAF):
  * -------------------------------------------------
  *

From 4241a702e0d0c2ca9364cfac08dbf134264962de Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 3 Feb 2025 11:03:04 +0000
Subject: [PATCH 354/368] rxrpc: Fix the rxrpc_connection attend queue handling

The rxrpc_connection attend queue is never used because conn::attend_link
is never initialised and so is always NULL'd out and thus always appears to
be busy.  This requires the following fix:

 (1) Fix this the attend queue problem by initialising conn::attend_link.

And, consequently, two further fixes for things masked by the above bug:

 (2) Fix rxrpc_input_conn_event() to handle being invoked with a NULL
     sk_buff pointer - something that can now happen with the above change.

 (3) Fix the RXRPC_SKB_MARK_SERVICE_CONN_SECURED message to carry a pointer
     to the connection and a ref on it.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Jakub Kicinski <kuba@kernel.org>
cc: "David S. Miller" <davem@davemloft.net>
cc: Eric Dumazet <edumazet@google.com>
cc: Paolo Abeni <pabeni@redhat.com>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
cc: netdev@vger.kernel.org
Fixes: f2cce89a074e ("rxrpc: Implement a mechanism to send an event notification to a connection")
Link: https://patch.msgid.link/20250203110307.7265-3-dhowells@redhat.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/trace/events/rxrpc.h |  1 +
 net/rxrpc/conn_event.c       | 17 ++++++++++-------
 net/rxrpc/conn_object.c      |  1 +
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 2f119d18a061f..cad50d91077ef 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -219,6 +219,7 @@
 	EM(rxrpc_conn_get_conn_input,		"GET inp-conn") \
 	EM(rxrpc_conn_get_idle,			"GET idle    ") \
 	EM(rxrpc_conn_get_poke_abort,		"GET pk-abort") \
+	EM(rxrpc_conn_get_poke_secured,		"GET secured ") \
 	EM(rxrpc_conn_get_poke_timer,		"GET poke    ") \
 	EM(rxrpc_conn_get_service_conn,		"GET svc-conn") \
 	EM(rxrpc_conn_new_client,		"NEW client  ") \
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 713e04394ceb7..74bb49b936cd4 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -272,6 +272,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
 			 * we've already received the packet, put it on the
 			 * front of the queue.
 			 */
+			sp->conn = rxrpc_get_connection(conn, rxrpc_conn_get_poke_secured);
 			skb->mark = RXRPC_SKB_MARK_SERVICE_CONN_SECURED;
 			rxrpc_get_skb(skb, rxrpc_skb_get_conn_secured);
 			skb_queue_head(&conn->local->rx_queue, skb);
@@ -437,14 +438,16 @@ void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb)
 	if (test_and_clear_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events))
 		rxrpc_abort_calls(conn);
 
-	switch (skb->mark) {
-	case RXRPC_SKB_MARK_SERVICE_CONN_SECURED:
-		if (conn->state != RXRPC_CONN_SERVICE)
-			break;
+	if (skb) {
+		switch (skb->mark) {
+		case RXRPC_SKB_MARK_SERVICE_CONN_SECURED:
+			if (conn->state != RXRPC_CONN_SERVICE)
+				break;
 
-		for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
-			rxrpc_call_is_secure(conn->channels[loop].call);
-		break;
+			for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
+				rxrpc_call_is_secure(conn->channels[loop].call);
+			break;
+		}
 	}
 
 	/* Process delayed ACKs whose time has come. */
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 7eba4d7d9a380..2f1fd1e2e7e48 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -67,6 +67,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet,
 		INIT_WORK(&conn->destructor, rxrpc_clean_up_connection);
 		INIT_LIST_HEAD(&conn->proc_link);
 		INIT_LIST_HEAD(&conn->link);
+		INIT_LIST_HEAD(&conn->attend_link);
 		mutex_init(&conn->security_lock);
 		mutex_init(&conn->tx_data_alloc_lock);
 		skb_queue_head_init(&conn->rx_queue);

From 028676bb189ed6d1b550a0fc570a9d695b6acfd3 Mon Sep 17 00:00:00 2001
From: Jacob Moroni <mail@jakemoroni.com>
Date: Mon, 3 Feb 2025 09:36:05 -0500
Subject: [PATCH 355/368] net: atlantic: fix warning during hot unplug

Firmware deinitialization performs MMIO accesses which are not
necessary if the device has already been removed. In some cases,
these accesses happen via readx_poll_timeout_atomic which ends up
timing out, resulting in a warning at hw_atl2_utils_fw.c:112:

[  104.595913] Call Trace:
[  104.595915]  <TASK>
[  104.595918]  ? show_regs+0x6c/0x80
[  104.595923]  ? __warn+0x8d/0x150
[  104.595925]  ? aq_a2_fw_deinit+0xcf/0xe0 [atlantic]
[  104.595934]  ? report_bug+0x182/0x1b0
[  104.595938]  ? handle_bug+0x6e/0xb0
[  104.595940]  ? exc_invalid_op+0x18/0x80
[  104.595942]  ? asm_exc_invalid_op+0x1b/0x20
[  104.595944]  ? aq_a2_fw_deinit+0xcf/0xe0 [atlantic]
[  104.595952]  ? aq_a2_fw_deinit+0xcf/0xe0 [atlantic]
[  104.595959]  aq_nic_deinit.part.0+0xbd/0xf0 [atlantic]
[  104.595964]  aq_nic_deinit+0x17/0x30 [atlantic]
[  104.595970]  aq_ndev_close+0x2b/0x40 [atlantic]
[  104.595975]  __dev_close_many+0xad/0x160
[  104.595978]  dev_close_many+0x99/0x170
[  104.595979]  unregister_netdevice_many_notify+0x18b/0xb20
[  104.595981]  ? __call_rcu_common+0xcd/0x700
[  104.595984]  unregister_netdevice_queue+0xc6/0x110
[  104.595986]  unregister_netdev+0x1c/0x30
[  104.595988]  aq_pci_remove+0xb1/0xc0 [atlantic]

Fix this by skipping firmware deinitialization altogether if the
PCI device is no longer present.

Tested with an AQC113 attached via Thunderbolt by performing
repeated unplug cycles while traffic was running via iperf.

Fixes: 97bde5c4f909 ("net: ethernet: aquantia: Support for NIC-specific code")
Signed-off-by: Jacob Moroni <mail@jakemoroni.com>
Reviewed-by: Igor Russkikh <irusskikh@marvell.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20250203143604.24930-3-mail@jakemoroni.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index fe0e3e2a81171..71e50fc65c147 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -1441,7 +1441,9 @@ void aq_nic_deinit(struct aq_nic_s *self, bool link_down)
 	aq_ptp_ring_free(self);
 	aq_ptp_free(self);
 
-	if (likely(self->aq_fw_ops->deinit) && link_down) {
+	/* May be invoked during hot unplug. */
+	if (pci_device_is_present(self->pdev) &&
+	    likely(self->aq_fw_ops->deinit) && link_down) {
 		mutex_lock(&self->fwreq_mutex);
 		self->aq_fw_ops->deinit(self->aq_hw);
 		mutex_unlock(&self->fwreq_mutex);

From a1300691aed9ee852b0a9192e29e2bdc2411a7e6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 3 Feb 2025 17:08:38 +0000
Subject: [PATCH 356/368] net: rose: lock the socket in rose_bind()

syzbot reported a soft lockup in rose_loopback_timer(),
with a repro calling bind() from multiple threads.

rose_bind() must lock the socket to avoid this issue.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: syzbot+7ff41b5215f0c534534e@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/67a0f78d.050a0220.d7c5a.00a0.GAE@google.com/T/#u
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Link: https://patch.msgid.link/20250203170838.3521361-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/rose/af_rose.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 72c65d938a150..a4a668b88a8f2 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -701,11 +701,9 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	struct net_device *dev;
 	ax25_address *source;
 	ax25_uid_assoc *user;
+	int err = -EINVAL;
 	int n;
 
-	if (!sock_flag(sk, SOCK_ZAPPED))
-		return -EINVAL;
-
 	if (addr_len != sizeof(struct sockaddr_rose) && addr_len != sizeof(struct full_sockaddr_rose))
 		return -EINVAL;
 
@@ -718,8 +716,15 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS)
 		return -EINVAL;
 
-	if ((dev = rose_dev_get(&addr->srose_addr)) == NULL)
-		return -EADDRNOTAVAIL;
+	lock_sock(sk);
+
+	if (!sock_flag(sk, SOCK_ZAPPED))
+		goto out_release;
+
+	err = -EADDRNOTAVAIL;
+	dev = rose_dev_get(&addr->srose_addr);
+	if (!dev)
+		goto out_release;
 
 	source = &addr->srose_call;
 
@@ -730,7 +735,8 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	} else {
 		if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) {
 			dev_put(dev);
-			return -EACCES;
+			err = -EACCES;
+			goto out_release;
 		}
 		rose->source_call   = *source;
 	}
@@ -753,8 +759,10 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	rose_insert_socket(sk);
 
 	sock_reset_flag(sk, SOCK_ZAPPED);
-
-	return 0;
+	err = 0;
+out_release:
+	release_sock(sk);
+	return err;
 }
 
 static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags)

From 5368a67307b3b2c347dc8965ac55b888be665934 Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
Date: Tue, 4 Feb 2025 23:19:53 +0100
Subject: [PATCH 357/368] selftests: mptcp: connect: -f: no reconnect

The '-f' parameter is there to force the kernel to emit MPTCP FASTCLOSE
by closing the connection with unread bytes in the receive queue.

The xdisconnect() helper was used to stop the connection, but it does
more than that: it will shut it down, then wait before reconnecting to
the same address. This causes the mptcp_join's "fastclose test" to fail
all the time.

This failure is due to a recent change, with commit 218cc166321f
("selftests: mptcp: avoid spurious errors on disconnect"), but that went
unnoticed because the test is currently ignored. The recent modification
only shown an existing issue: xdisconnect() doesn't need to be used
here, only the shutdown() part is needed.

Fixes: 6bf41020b72b ("selftests: mptcp: update and extend fastclose test-cases")
Cc: stable@vger.kernel.org
Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20250204-net-mptcp-sft-conn-f-v1-1-6b470c72fffa@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_connect.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 414addef9a451..d240d02fa443a 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -1302,7 +1302,7 @@ int main_loop(void)
 		return ret;
 
 	if (cfg_truncate > 0) {
-		xdisconnect(fd);
+		shutdown(fd, SHUT_WR);
 	} else if (--cfg_repeat > 0) {
 		xdisconnect(fd);
 

From 647cef20e649c576dff271e018d5d15d998b629d Mon Sep 17 00:00:00 2001
From: Quang Le <quanglex97@gmail.com>
Date: Mon, 3 Feb 2025 16:58:38 -0800
Subject: [PATCH 358/368] pfifo_tail_enqueue: Drop new packet when sch->limit
 == 0

Expected behaviour:
In case we reach scheduler's limit, pfifo_tail_enqueue() will drop a
packet in scheduler's queue and decrease scheduler's qlen by one.
Then, pfifo_tail_enqueue() enqueue new packet and increase
scheduler's qlen by one. Finally, pfifo_tail_enqueue() return
`NET_XMIT_CN` status code.

Weird behaviour:
In case we set `sch->limit == 0` and trigger pfifo_tail_enqueue() on a
scheduler that has no packet, the 'drop a packet' step will do nothing.
This means the scheduler's qlen still has value equal 0.
Then, we continue to enqueue new packet and increase scheduler's qlen by
one. In summary, we can leverage pfifo_tail_enqueue() to increase qlen by
one and return `NET_XMIT_CN` status code.

The problem is:
Let's say we have two qdiscs: Qdisc_A and Qdisc_B.
 - Qdisc_A's type must have '->graft()' function to create parent/child relationship.
   Let's say Qdisc_A's type is `hfsc`. Enqueue packet to this qdisc will trigger `hfsc_enqueue`.
 - Qdisc_B's type is pfifo_head_drop. Enqueue packet to this qdisc will trigger `pfifo_tail_enqueue`.
 - Qdisc_B is configured to have `sch->limit == 0`.
 - Qdisc_A is configured to route the enqueued's packet to Qdisc_B.

Enqueue packet through Qdisc_A will lead to:
 - hfsc_enqueue(Qdisc_A) -> pfifo_tail_enqueue(Qdisc_B)
 - Qdisc_B->q.qlen += 1
 - pfifo_tail_enqueue() return `NET_XMIT_CN`
 - hfsc_enqueue() check for `NET_XMIT_SUCCESS` and see `NET_XMIT_CN` => hfsc_enqueue() don't increase qlen of Qdisc_A.

The whole process lead to a situation where Qdisc_A->q.qlen == 0 and Qdisc_B->q.qlen == 1.
Replace 'hfsc' with other type (for example: 'drr') still lead to the same problem.
This violate the design where parent's qlen should equal to the sum of its childrens'qlen.

Bug impact: This issue can be used for user->kernel privilege escalation when it is reachable.

Fixes: 57dbb2d83d10 ("sched: add head drop fifo queue")
Reported-by: Quang Le <quanglex97@gmail.com>
Signed-off-by: Quang Le <quanglex97@gmail.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Link: https://patch.msgid.link/20250204005841.223511-2-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/sched/sch_fifo.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index b50b2c2cc09bc..e6bfd39ff3396 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -40,6 +40,9 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 {
 	unsigned int prev_backlog;
 
+	if (unlikely(READ_ONCE(sch->limit) == 0))
+		return qdisc_drop(skb, sch, to_free);
+
 	if (likely(sch->q.qlen < READ_ONCE(sch->limit)))
 		return qdisc_enqueue_tail(skb, sch);
 

From 3fe5648d1df1798ce14b5464b2ea49f10cd9db31 Mon Sep 17 00:00:00 2001
From: Quang Le <quanglex97@gmail.com>
Date: Mon, 3 Feb 2025 16:58:39 -0800
Subject: [PATCH 359/368] selftests/tc-testing: Add a test case for
 pfifo_head_drop qdisc when limit==0

When limit == 0, pfifo_tail_enqueue() must drop new packet and
increase dropped packets count of the qdisc.

All test results:

1..16
ok 1 a519 - Add bfifo qdisc with system default parameters on egress
ok 2 585c - Add pfifo qdisc with system default parameters on egress
ok 3 a86e - Add bfifo qdisc with system default parameters on egress with handle of maximum value
ok 4 9ac8 - Add bfifo qdisc on egress with queue size of 3000 bytes
ok 5 f4e6 - Add pfifo qdisc on egress with queue size of 3000 packets
ok 6 b1b1 - Add bfifo qdisc with system default parameters on egress with invalid handle exceeding maximum value
ok 7 8d5e - Add bfifo qdisc on egress with unsupported argument
ok 8 7787 - Add pfifo qdisc on egress with unsupported argument
ok 9 c4b6 - Replace bfifo qdisc on egress with new queue size
ok 10 3df6 - Replace pfifo qdisc on egress with new queue size
ok 11 7a67 - Add bfifo qdisc on egress with queue size in invalid format
ok 12 1298 - Add duplicate bfifo qdisc on egress
ok 13 45a0 - Delete nonexistent bfifo qdisc
ok 14 972b - Add prio qdisc on egress with invalid format for handles
ok 15 4d39 - Delete bfifo qdisc twice
ok 16 d774 - Check pfifo_head_drop qdisc enqueue behaviour when limit == 0

Signed-off-by: Quang Le <quanglex97@gmail.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Link: https://patch.msgid.link/20250204005841.223511-3-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../tc-testing/tc-tests/qdiscs/fifo.json      | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json
index ae3d286a32b2e..6f20d033670d4 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json
@@ -313,6 +313,29 @@
         "matchPattern": "qdisc bfifo 1: root",
         "matchCount": "0",
         "teardown": [
+	]
+    },
+    {
+        "id": "d774",
+        "name": "Check pfifo_head_drop qdisc enqueue behaviour when limit == 0",
+        "category": [
+            "qdisc",
+            "pfifo_head_drop"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+            "$TC qdisc add dev $DUMMY root handle 1: pfifo_head_drop limit 0",
+            "$IP link set dev $DUMMY up || true"
+        ],
+        "cmdUnderTest": "ping -c2 -W0.01 -I $DUMMY 10.10.10.1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+        "matchPattern": "dropped 2",
+        "matchCount": "1",
+        "teardown": [
         ]
     }
 ]

From 638ba5089324796c2ee49af10427459c2de35f71 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Mon, 3 Feb 2025 16:58:40 -0800
Subject: [PATCH 360/368] netem: Update sch->q.qlen before
 qdisc_tree_reduce_backlog()

qdisc_tree_reduce_backlog() notifies parent qdisc only if child
qdisc becomes empty, therefore we need to reduce the backlog of the
child qdisc before calling it. Otherwise it would miss the opportunity
to call cops->qlen_notify(), in the case of DRR, it resulted in UAF
since DRR uses ->qlen_notify() to maintain its active list.

Fixes: f8d4bc455047 ("net/sched: netem: account for backlog updates from child qdisc")
Cc: Martin Ottens <martin.ottens@fau.de>
Reported-by: Mingi Cho <mincho@theori.io>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Link: https://patch.msgid.link/20250204005841.223511-4-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/sched/sch_netem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 71ec9986ed37f..fdd79d3ccd8ce 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -749,9 +749,9 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 				if (err != NET_XMIT_SUCCESS) {
 					if (net_xmit_drop_count(err))
 						qdisc_qstats_drop(sch);
-					qdisc_tree_reduce_backlog(sch, 1, pkt_len);
 					sch->qstats.backlog -= pkt_len;
 					sch->q.qlen--;
+					qdisc_tree_reduce_backlog(sch, 1, pkt_len);
 				}
 				goto tfifo_dequeue;
 			}

From 91aadc16ee73cf958be6b0896da3caea49b7f414 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Mon, 3 Feb 2025 16:58:41 -0800
Subject: [PATCH 361/368] selftests/tc-testing: Add a test case for
 qdisc_tree_reduce_backlog()

Integrate the test case provided by Mingi Cho into TDC.

All test results:

1..4
ok 1 ca5e - Check class delete notification for ffff:
ok 2 e4b7 - Check class delete notification for root ffff:
ok 3 33a9 - Check ingress is not searchable on backlog update
ok 4 a4b9 - Test class qlen notification

Cc: Mingi Cho <mincho@theori.io>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Link: https://patch.msgid.link/20250204005841.223511-5-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../tc-testing/tc-tests/infra/qdiscs.json     | 34 ++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index d3dd65b05b5f1..9044ac0541672 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -94,5 +94,37 @@
             "$TC qdisc del dev $DUMMY ingress",
             "$IP addr del 10.10.10.10/24 dev $DUMMY"
         ]
-    }
+    },
+    {
+	"id": "a4b9",
+	"name": "Test class qlen notification",
+	"category": [
+	    "qdisc"
+	],
+	"plugins": {
+	    "requires": "nsPlugin"
+	},
+	"setup": [
+            "$IP link set dev $DUMMY up || true",
+            "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+            "$TC qdisc add dev $DUMMY root handle 1: drr",
+            "$TC filter add dev $DUMMY parent 1: basic classid 1:1",
+            "$TC class add dev $DUMMY parent 1: classid 1:1 drr",
+            "$TC qdisc add dev $DUMMY parent 1:1 handle 2: netem",
+            "$TC qdisc add dev $DUMMY parent 2: handle 3: drr",
+            "$TC filter add dev $DUMMY parent 3: basic action drop",
+            "$TC class add dev $DUMMY parent 3: classid 3:1 drr",
+            "$TC class del dev $DUMMY classid 1:1",
+            "$TC class add dev $DUMMY parent 1: classid 1:1 drr"
+        ],
+        "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC qdisc ls dev $DUMMY",
+        "matchPattern": "drr 1: root",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY root handle 1: drr",
+            "$IP addr del 10.10.10.10/24 dev $DUMMY"
+        ]
+   }
 ]

From a70c7b3cbc0688016810bb2e0b9b8a0d6a530045 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 4 Feb 2025 11:10:06 -0500
Subject: [PATCH 362/368] tun: revert fix group permission check

This reverts commit 3ca459eaba1bf96a8c7878de84fa8872259a01e3.

The blamed commit caused a regression when neither tun->owner nor
tun->group is set. This is intended to be allowed, but now requires
CAP_NET_ADMIN.

Discussion in the referenced thread pointed out that the original
issue that prompted this patch can be resolved in userspace.

The relaxed access control may also make a device accessible when it
previously wasn't, while existing users may depend on it to not be.

This is a clean pure git revert, except for fixing the indentation on
the gid_valid line that checkpatch correctly flagged.

Fixes: 3ca459eaba1b ("tun: fix group permission check")
Link: https://lore.kernel.org/netdev/CAFqZXNtkCBT4f+PwyVRmQGoT3p1eVa01fCG_aNtpt6dakXncUg@mail.gmail.com/
Signed-off-by: Willem de Bruijn <willemb@google.com>
Cc: Ondrej Mosnacek <omosnace@redhat.com>
Cc: Stas Sergeev <stsp2@yandex.ru>
Link: https://patch.msgid.link/20250204161015.739430-1-willemdebruijn.kernel@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/tun.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 28624cca91f8d..acf96f2624887 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -574,18 +574,14 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
 	return ret;
 }
 
-static inline bool tun_capable(struct tun_struct *tun)
+static inline bool tun_not_capable(struct tun_struct *tun)
 {
 	const struct cred *cred = current_cred();
 	struct net *net = dev_net(tun->dev);
 
-	if (ns_capable(net->user_ns, CAP_NET_ADMIN))
-		return 1;
-	if (uid_valid(tun->owner) && uid_eq(cred->euid, tun->owner))
-		return 1;
-	if (gid_valid(tun->group) && in_egroup_p(tun->group))
-		return 1;
-	return 0;
+	return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) ||
+		(gid_valid(tun->group) && !in_egroup_p(tun->group))) &&
+		!ns_capable(net->user_ns, CAP_NET_ADMIN);
 }
 
 static void tun_set_real_num_queues(struct tun_struct *tun)
@@ -2782,7 +2778,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 		    !!(tun->flags & IFF_MULTI_QUEUE))
 			return -EINVAL;
 
-		if (!tun_capable(tun))
+		if (tun_not_capable(tun))
 			return -EPERM;
 		err = security_tun_dev_open(tun->security);
 		if (err < 0)

From 811b8f534fd85e17077bd2ac0413bcd16cc8fb9b Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 4 Feb 2025 14:38:39 +0200
Subject: [PATCH 363/368] net: sched: Fix truncation of offloaded action
 statistics

In case of tc offload, when user space queries the kernel for tc action
statistics, tc will query the offloaded statistics from device drivers.
Among other statistics, drivers are expected to pass the number of
packets that hit the action since the last query as a 64-bit number.

Unfortunately, tc treats the number of packets as a 32-bit number,
leading to truncation and incorrect statistics when the number of
packets since the last query exceeds 0xffffffff:

$ tc -s filter show dev swp2 ingress
filter protocol all pref 1 flower chain 0
filter protocol all pref 1 flower chain 0 handle 0x1
  skip_sw
  in_hw in_hw_count 1
        action order 1: mirred (Egress Redirect to device swp1) stolen
        index 1 ref 1 bind 1 installed 58 sec used 0 sec
        Action statistics:
        Sent 1133877034176 bytes 536959475 pkt (dropped 0, overlimits 0 requeues 0)
[...]

According to the above, 2111-byte packets were redirected which is
impossible as only 64-byte packets were transmitted and the MTU was
1500.

Fix by treating packets as a 64-bit number:

$ tc -s filter show dev swp2 ingress
filter protocol all pref 1 flower chain 0
filter protocol all pref 1 flower chain 0 handle 0x1
  skip_sw
  in_hw in_hw_count 1
        action order 1: mirred (Egress Redirect to device swp1) stolen
        index 1 ref 1 bind 1 installed 61 sec used 0 sec
        Action statistics:
        Sent 1370624380864 bytes 21416005951 pkt (dropped 0, overlimits 0 requeues 0)
[...]

Which shows that only 64-byte packets were redirected (1370624380864 /
21416005951 = 64).

Fixes: 380407023526 ("net/sched: Enable netdev drivers to update statistics of offloaded actions")
Reported-by: Joe Botha <joe@atomic.ac>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20250204123839.1151804-1-idosch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/sch_generic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index d635c5b47ebaf..d48c657191cd0 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -851,7 +851,7 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 }
 
 static inline void _bstats_update(struct gnet_stats_basic_sync *bstats,
-				  __u64 bytes, __u32 packets)
+				  __u64 bytes, __u64 packets)
 {
 	u64_stats_update_begin(&bstats->syncp);
 	u64_stats_add(&bstats->bytes, bytes);

From 41b996ce83bf944de5569d6263c8dbd5513e7ed0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Feb 2025 23:05:53 +0000
Subject: [PATCH 364/368] rxrpc: Fix call state set to not include the
 SERVER_SECURING state

The RXRPC_CALL_SERVER_SECURING state doesn't really belong with the other
states in the call's state set as the other states govern the call's Rx/Tx
phase transition and govern when packets can and can't be received or
transmitted.  The "Securing" state doesn't actually govern the reception of
packets and would need to be split depending on whether or not we've
received the last packet yet (to mirror RECV_REQUEST/ACK_REQUEST).

The "Securing" state is more about whether or not we can start forwarding
packets to the application as recvmsg will need to decode them and the
decoding can't take place until the challenge/response exchange has
completed.

Fix this by removing the RXRPC_CALL_SERVER_SECURING state from the state
set and, instead, using a flag, RXRPC_CALL_CONN_CHALLENGING, to track
whether or not we can queue the call for reception by recvmsg() or notify
the kernel app that data is ready.  In the event that we've already
received all the packets, the connection event handler will poke the app
layer in the appropriate manner.

Also there's a race whereby the app layer sees the last packet before rxrpc
has managed to end the rx phase and change the state to one amenable to
allowing a reply.  Fix this by queuing the packet after calling
rxrpc_end_rx_phase().

Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both")
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
Link: https://patch.msgid.link/20250204230558.712536-2-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/rxrpc/ar-internal.h | 2 +-
 net/rxrpc/call_object.c | 6 ++----
 net/rxrpc/conn_event.c  | 4 +---
 net/rxrpc/input.c       | 2 +-
 net/rxrpc/sendmsg.c     | 2 +-
 5 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 718193df9d2e2..f251845fe532c 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -582,6 +582,7 @@ enum rxrpc_call_flag {
 	RXRPC_CALL_EXCLUSIVE,		/* The call uses a once-only connection */
 	RXRPC_CALL_RX_IS_IDLE,		/* recvmsg() is idle - send an ACK */
 	RXRPC_CALL_RECVMSG_READ_ALL,	/* recvmsg() read all of the received data */
+	RXRPC_CALL_CONN_CHALLENGING,	/* The connection is being challenged */
 };
 
 /*
@@ -602,7 +603,6 @@ enum rxrpc_call_state {
 	RXRPC_CALL_CLIENT_AWAIT_REPLY,	/* - client awaiting reply */
 	RXRPC_CALL_CLIENT_RECV_REPLY,	/* - client receiving reply phase */
 	RXRPC_CALL_SERVER_PREALLOC,	/* - service preallocation */
-	RXRPC_CALL_SERVER_SECURING,	/* - server securing request connection */
 	RXRPC_CALL_SERVER_RECV_REQUEST,	/* - server receiving request */
 	RXRPC_CALL_SERVER_ACK_REQUEST,	/* - server pending ACK of request */
 	RXRPC_CALL_SERVER_SEND_REPLY,	/* - server sending reply */
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 5a543c3f6fb08..c4c8b46a68c67 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -22,7 +22,6 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = {
 	[RXRPC_CALL_CLIENT_AWAIT_REPLY]		= "ClAwtRpl",
 	[RXRPC_CALL_CLIENT_RECV_REPLY]		= "ClRcvRpl",
 	[RXRPC_CALL_SERVER_PREALLOC]		= "SvPrealc",
-	[RXRPC_CALL_SERVER_SECURING]		= "SvSecure",
 	[RXRPC_CALL_SERVER_RECV_REQUEST]	= "SvRcvReq",
 	[RXRPC_CALL_SERVER_ACK_REQUEST]		= "SvAckReq",
 	[RXRPC_CALL_SERVER_SEND_REPLY]		= "SvSndRpl",
@@ -453,17 +452,16 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
 	call->cong_tstamp	= skb->tstamp;
 
 	__set_bit(RXRPC_CALL_EXPOSED, &call->flags);
-	rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING);
+	rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST);
 
 	spin_lock(&conn->state_lock);
 
 	switch (conn->state) {
 	case RXRPC_CONN_SERVICE_UNSECURED:
 	case RXRPC_CONN_SERVICE_CHALLENGING:
-		rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING);
+		__set_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags);
 		break;
 	case RXRPC_CONN_SERVICE:
-		rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST);
 		break;
 
 	case RXRPC_CONN_ABORTED:
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 74bb49b936cd4..4d9c5e21ba785 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -228,10 +228,8 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn)
  */
 static void rxrpc_call_is_secure(struct rxrpc_call *call)
 {
-	if (call && __rxrpc_call_state(call) == RXRPC_CALL_SERVER_SECURING) {
-		rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST);
+	if (call && __test_and_clear_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags))
 		rxrpc_notify_socket(call);
-	}
 }
 
 /*
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 4974b5accafa3..4a152f3c831fd 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -657,7 +657,7 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb
 		rxrpc_propose_delay_ACK(call, sp->hdr.serial,
 					rxrpc_propose_ack_input_data);
 	}
-	if (notify) {
+	if (notify && !test_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags)) {
 		trace_rxrpc_notify_socket(call->debug_id, sp->hdr.serial);
 		rxrpc_notify_socket(call);
 	}
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 0e8da909d4f2f..584397aba4a07 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -707,7 +707,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 	} else {
 		switch (rxrpc_call_state(call)) {
 		case RXRPC_CALL_CLIENT_AWAIT_CONN:
-		case RXRPC_CALL_SERVER_SECURING:
+		case RXRPC_CALL_SERVER_RECV_REQUEST:
 			if (p.command == RXRPC_CMD_SEND_ABORT)
 				break;
 			fallthrough;

From 2d7b30aef34dae942e9ab7812b288ce14658ae66 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Feb 2025 23:05:54 +0000
Subject: [PATCH 365/368] rxrpc: Fix race in call state changing vs recvmsg()

There's a race in between the rxrpc I/O thread recording the end of the
receive phase of a call and recvmsg() examining the state of the call to
determine whether it has completed.

The problem is that call->_state records the I/O thread's view of the call,
not the application's view (which may lag), so that alone is not
sufficient.  To this end, the application also checks whether there is
anything left in call->recvmsg_queue for it to pick up.  The call must be
in state RXRPC_CALL_COMPLETE and the recvmsg_queue empty for the call to be
considered fully complete.

In rxrpc_input_queue_data(), the latest skbuff is added to the queue and
then, if it was marked as LAST_PACKET, the state is advanced...  But this
is two separate operations with no locking around them.

As a consequence, the lack of locking means that sendmsg() can jump into
the gap on a service call and attempt to send the reply - but then get
rejected because the I/O thread hasn't advanced the state yet.

Simply flipping the order in which things are done isn't an option as that
impacts the client side, causing the checks in rxrpc_kernel_check_life() as
to whether the call is still alive to race instead.

Fix this by moving the update of call->_state inside the skb queue
spinlocked section where the packet is queued on the I/O thread side.

rxrpc's recvmsg() will then automatically sync against this because it has
to take the call->recvmsg_queue spinlock in order to dequeue the last
packet.

rxrpc's sendmsg() doesn't need amending as the app shouldn't be calling it
to send a reply until recvmsg() indicates it has returned all of the
request.

Fixes: 93368b6bd58a ("rxrpc: Move call state changes from recvmsg to I/O thread")
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
Link: https://patch.msgid.link/20250204230558.712536-3-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/rxrpc/input.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 4a152f3c831fd..9047ba13bd31e 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -448,11 +448,19 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb,
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	bool last = sp->hdr.flags & RXRPC_LAST_PACKET;
 
-	skb_queue_tail(&call->recvmsg_queue, skb);
+	spin_lock_irq(&call->recvmsg_queue.lock);
+
+	__skb_queue_tail(&call->recvmsg_queue, skb);
 	rxrpc_input_update_ack_window(call, window, wtop);
 	trace_rxrpc_receive(call, last ? why + 1 : why, sp->hdr.serial, sp->hdr.seq);
 	if (last)
+		/* Change the state inside the lock so that recvmsg syncs
+		 * correctly with it and using sendmsg() to send a reply
+		 * doesn't race.
+		 */
 		rxrpc_end_rx_phase(call, sp->hdr.serial);
+
+	spin_unlock_irq(&call->recvmsg_queue.lock);
 }
 
 /*

From 1e3835a8aea5118d58ff9daa656395e69c8806b2 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 4 Feb 2025 13:57:29 -0800
Subject: [PATCH 366/368] MAINTAINERS: add entry for ethtool

Michal did an amazing job converting ethtool to Netlink, but never
added an entry to MAINTAINERS for himself. Create a formal entry
so that we can delegate (portions) of this code to folks.

Over the last 3 years majority of the reviews have been done by
Andrew and I. I suppose Michal didn't want to be on the receiving
end of the flood of patches.

Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Link: https://patch.msgid.link/20250204215729.168992-1-kuba@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 MAINTAINERS | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 74b09dad46626..20c8daf3ce620 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16455,6 +16455,16 @@ F:	include/net/dsa.h
 F:	net/dsa/
 F:	tools/testing/selftests/drivers/net/dsa/
 
+NETWORKING [ETHTOOL]
+M:	Andrew Lunn <andrew@lunn.ch>
+M:	Jakub Kicinski <kuba@kernel.org>
+F:	Documentation/netlink/specs/ethtool.yaml
+F:	Documentation/networking/ethtool-netlink.rst
+F:	include/linux/ethtool*
+F:	include/uapi/linux/ethtool*
+F:	net/ethtool/
+F:	tools/testing/selftests/drivers/net/*/ethtool*
+
 NETWORKING [GENERAL]
 M:	"David S. Miller" <davem@davemloft.net>
 M:	Eric Dumazet <edumazet@google.com>

From 82b02a7c459922bbf80e45d5f7e2c4cfef617943 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 4 Feb 2025 13:57:50 -0800
Subject: [PATCH 367/368] MAINTAINERS: add a sample ethtool section entry

I feel like we don't do a good enough keeping authors of driver
APIs around. The ethtool code base was very nicely compartmentalized
by Michal. Establish a precedent of creating MAINTAINERS entries
for "sections" of the ethtool API. Use Andrew and cable test as
a sample entry. The entry should ideally cover 3 elements:
a core file, test(s), and keywords. The last one is important
because we intend the entries to cover core code *and* reviews
of drivers implementing given API!

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20250204215750.169249-1-kuba@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 MAINTAINERS | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 20c8daf3ce620..bd705e9123a3a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16465,6 +16465,12 @@ F:	include/uapi/linux/ethtool*
 F:	net/ethtool/
 F:	tools/testing/selftests/drivers/net/*/ethtool*
 
+NETWORKING [ETHTOOL CABLE TEST]
+M:	Andrew Lunn <andrew@lunn.ch>
+F:	net/ethtool/cabletest.c
+F:	tools/testing/selftests/drivers/net/*/ethtool*
+K:	cable_test
+
 NETWORKING [GENERAL]
 M:	"David S. Miller" <davem@davemloft.net>
 M:	Eric Dumazet <edumazet@google.com>

From 2a64c96356c87aa8af826605943e5524bf45e24d Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Wed, 5 Feb 2025 12:57:47 +0000
Subject: [PATCH 368/368] Revert "net: stmmac: Specify hardware capability
 value when FIFO size isn't specified"

This reverts commit 8865d22656b4, which caused breakage for platforms
which are not using xgmac2 or gmac4. Only these two cores have the
capability of providing the FIFO sizes from hardware capability fields
(which are provided in priv->dma_cap.[tr]x_fifo_size.)

All other cores can not, which results in these two fields containing
zero. We also have platforms that do not provide a value in
priv->plat->[tr]x_fifo_size, resulting in these also being zero.

This causes the new tests introduced by the reverted commit to fail,
and produce e.g.:

	stmmaceth f0804000.eth: Can't specify Rx FIFO size

An example of such a platform which fails is QEMU's npcm750-evb.
This uses dwmac1000 which, as noted above, does not have the capability
to provide the FIFO sizes from hardware.

Therefore, revert the commit to maintain compatibility with the way
the driver used to work.

Reported-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/4e98f967-f636-46fb-9eca-d383b9495b86@roeck-us.net
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Tested-by: Steven Price <steven.price@arm.com>
Fixes: 8865d22656b4 ("net: stmmac: Specify hardware capability value when FIFO size isn't specified")
Link: https://patch.msgid.link/E1tfeyR-003YGJ-Gb@rmk-PC.armlinux.org.uk
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 .../net/ethernet/stmicro/stmmac/stmmac_main.c | 35 +++++++++----------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index d04543e5697b0..b34ebb916b898 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2424,6 +2424,11 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
 	u32 chan = 0;
 	u8 qmode = 0;
 
+	if (rxfifosz == 0)
+		rxfifosz = priv->dma_cap.rx_fifo_size;
+	if (txfifosz == 0)
+		txfifosz = priv->dma_cap.tx_fifo_size;
+
 	/* Split up the shared Tx/Rx FIFO memory on DW QoS Eth and DW XGMAC */
 	if (priv->plat->has_gmac4 || priv->plat->has_xgmac) {
 		rxfifosz /= rx_channels_count;
@@ -2892,6 +2897,11 @@ static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode,
 	int rxfifosz = priv->plat->rx_fifo_size;
 	int txfifosz = priv->plat->tx_fifo_size;
 
+	if (rxfifosz == 0)
+		rxfifosz = priv->dma_cap.rx_fifo_size;
+	if (txfifosz == 0)
+		txfifosz = priv->dma_cap.tx_fifo_size;
+
 	/* Adjust for real per queue fifo size */
 	rxfifosz /= rx_channels_count;
 	txfifosz /= tx_channels_count;
@@ -5868,6 +5878,9 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu)
 	const int mtu = new_mtu;
 	int ret;
 
+	if (txfifosz == 0)
+		txfifosz = priv->dma_cap.tx_fifo_size;
+
 	txfifosz /= priv->plat->tx_queues_to_use;
 
 	if (stmmac_xdp_is_enabled(priv) && new_mtu > ETH_DATA_LEN) {
@@ -7219,29 +7232,15 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 		priv->plat->tx_queues_to_use = priv->dma_cap.number_tx_queues;
 	}
 
-	if (!priv->plat->rx_fifo_size) {
-		if (priv->dma_cap.rx_fifo_size) {
-			priv->plat->rx_fifo_size = priv->dma_cap.rx_fifo_size;
-		} else {
-			dev_err(priv->device, "Can't specify Rx FIFO size\n");
-			return -ENODEV;
-		}
-	} else if (priv->dma_cap.rx_fifo_size &&
-		   priv->plat->rx_fifo_size > priv->dma_cap.rx_fifo_size) {
+	if (priv->dma_cap.rx_fifo_size &&
+	    priv->plat->rx_fifo_size > priv->dma_cap.rx_fifo_size) {
 		dev_warn(priv->device,
 			 "Rx FIFO size (%u) exceeds dma capability\n",
 			 priv->plat->rx_fifo_size);
 		priv->plat->rx_fifo_size = priv->dma_cap.rx_fifo_size;
 	}
-	if (!priv->plat->tx_fifo_size) {
-		if (priv->dma_cap.tx_fifo_size) {
-			priv->plat->tx_fifo_size = priv->dma_cap.tx_fifo_size;
-		} else {
-			dev_err(priv->device, "Can't specify Tx FIFO size\n");
-			return -ENODEV;
-		}
-	} else if (priv->dma_cap.tx_fifo_size &&
-		   priv->plat->tx_fifo_size > priv->dma_cap.tx_fifo_size) {
+	if (priv->dma_cap.tx_fifo_size &&
+	    priv->plat->tx_fifo_size > priv->dma_cap.tx_fifo_size) {
 		dev_warn(priv->device,
 			 "Tx FIFO size (%u) exceeds dma capability\n",
 			 priv->plat->tx_fifo_size);