From ea678ac627e01daf5b4f1da24bf1d0c500e10898 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Thu, 19 Apr 2018 12:34:00 +0530
Subject: [PATCH 001/251] powerpc64/ftrace: Add a field in paca to disable
 ftrace in unsafe code paths

We have some C code that we call into from real mode where we cannot
take any exceptions. Though the C functions themselves are mostly safe,
if these functions are traced, there is a possibility that we may take
an exception. For instance, in certain conditions, the ftrace code uses
WARN(), which uses a 'trap' to do its job.

For such scenarios, introduce a new field in paca 'ftrace_enabled',
which is checked on ftrace entry before continuing. This field can then
be set to zero to disable/pause ftrace, and set to a non-zero value to
resume ftrace.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/paca.h                |  1 +
 arch/powerpc/kernel/asm-offsets.c              |  1 +
 arch/powerpc/kernel/setup_64.c                 |  3 +++
 arch/powerpc/kernel/trace/ftrace_64_mprofile.S | 14 ++++++++++++++
 arch/powerpc/kernel/trace/ftrace_64_pg.S       |  4 ++++
 5 files changed, 23 insertions(+)

diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 4185f1c961250..163f13f31255c 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -223,6 +223,7 @@ struct paca_struct {
 	u8 hmi_event_available;		/* HMI event is available */
 	u8 hmi_p9_special_emu;		/* HMI P9 special emulation */
 #endif
+	u8 ftrace_enabled;		/* Hard disable ftrace */
 
 	/* Stuff for accurate time accounting */
 	struct cpu_accounting_data accounting;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 6bee65f3cfd34..262c44a90ea1f 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -180,6 +180,7 @@ int main(void)
 	OFFSET(PACAKMSR, paca_struct, kernel_msr);
 	OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask);
 	OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened);
+	OFFSET(PACA_FTRACE_ENABLED, paca_struct, ftrace_enabled);
 #ifdef CONFIG_PPC_BOOK3S
 	OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id);
 #ifdef CONFIG_PPC_MM_SLICES
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index b78f142a41488..313136006d1c3 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -252,6 +252,9 @@ static void cpu_ready_for_interrupts(void)
 
 	/* Set IR and DR in PACA MSR */
 	get_paca()->kernel_msr = MSR_KERNEL;
+
+	/* We are now ok to enable ftrace */
+	get_paca()->ftrace_enabled = 1;
 }
 
 unsigned long spr_default_dscr = 0;
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index 3f3e818524227..ae1cbe783ab6b 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -47,6 +47,12 @@ _GLOBAL(ftrace_caller)
 	/* Save all gprs to pt_regs */
 	SAVE_GPR(0, r1)
 	SAVE_10GPRS(2, r1)
+
+	/* Ok to continue? */
+	lbz	r3, PACA_FTRACE_ENABLED(r13)
+	cmpdi	r3, 0
+	beq	ftrace_no_trace
+
 	SAVE_10GPRS(12, r1)
 	SAVE_10GPRS(22, r1)
 
@@ -168,6 +174,14 @@ _GLOBAL(ftrace_graph_stub)
 _GLOBAL(ftrace_stub)
 	blr
 
+ftrace_no_trace:
+	mflr	r3
+	mtctr	r3
+	REST_GPR(3, r1)
+	addi	r1, r1, SWITCH_FRAME_SIZE
+	mtlr	r0
+	bctr
+
 #ifdef CONFIG_LIVEPATCH
 	/*
 	 * This function runs in the mcount context, between two functions. As
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg.S
index f095358da96e0..b7ba51a0f3b60 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_pg.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg.S
@@ -16,6 +16,10 @@
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 _GLOBAL_TOC(ftrace_caller)
+	lbz	r3, PACA_FTRACE_ENABLED(r13)
+	cmpdi	r3, 0
+	beqlr
+
 	/* Taken from output of objdump from lib64/glibc */
 	mflr	r3
 	ld	r11, 0(r1)

From c3e59d7784035ae975eee23c83d05e69237a2ccc Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Thu, 19 Apr 2018 12:34:01 +0530
Subject: [PATCH 002/251] powerpc64/ftrace: Rearrange #ifdef sections in
 ftrace.h

Re-arrange the last #ifdef section in preparation for a subsequent
change.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/ftrace.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index 9abddde372abf..ebfc0b846bb5d 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -68,8 +68,8 @@ struct dyn_arch_ftrace {
 #endif
 #endif
 
-#if defined(CONFIG_FTRACE_SYSCALLS) && !defined(__ASSEMBLY__)
-#ifdef PPC64_ELF_ABI_v1
+#ifndef __ASSEMBLY__
+#if defined(CONFIG_FTRACE_SYSCALLS) && defined(PPC64_ELF_ABI_v1)
 #define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
 static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
 {
@@ -81,7 +81,7 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name
 	 */
 	return !strcmp(sym + 4, name + 3);
 }
-#endif
-#endif /* CONFIG_FTRACE_SYSCALLS && !__ASSEMBLY__ */
+#endif /* CONFIG_FTRACE_SYSCALLS && PPC64_ELF_ABI_v1 */
+#endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_FTRACE */

From acd55b1005768266e899b0da51babcb57e75e846 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Thu, 19 Apr 2018 12:34:02 +0530
Subject: [PATCH 003/251] powerpc64/ftrace: Add helpers to hard disable ftrace

Add some helpers to enable/disable ftrace through paca->ftrace_enabled.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/ftrace.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index ebfc0b846bb5d..3b5e85a72e10c 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -82,6 +82,23 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name
 	return !strcmp(sym + 4, name + 3);
 }
 #endif /* CONFIG_FTRACE_SYSCALLS && PPC64_ELF_ABI_v1 */
+
+#ifdef CONFIG_PPC64
+#include <asm/paca.h>
+
+static inline void this_cpu_disable_ftrace(void)
+{
+	get_paca()->ftrace_enabled = 0;
+}
+
+static inline void this_cpu_enable_ftrace(void)
+{
+	get_paca()->ftrace_enabled = 1;
+}
+#else /* CONFIG_PPC64 */
+static inline void this_cpu_disable_ftrace(void) { }
+static inline void this_cpu_enable_ftrace(void) { }
+#endif /* CONFIG_PPC64 */
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_FTRACE */

From d103978636c27fce216bbc8bb289981047b71bd4 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Thu, 19 Apr 2018 12:34:03 +0530
Subject: [PATCH 004/251] powerpc64/ftrace: Delay enabling ftrace on secondary
 cpus

On the boot cpu, though we enable paca->ftrace_enabled in early_setup()
(via cpu_ready_for_interrupts()), we don't start tracing until much
later since ftrace is not initialized yet and since we only support
DYNAMIC_FTRACE on powerpc. However, it is possible that ftrace has been
initialized by the time some of the secondary cpus start up. In this
case, we will try to trace some of the early boot code which can cause
problems.

To address this, move setting paca->ftrace_enabled from
cpu_ready_for_interrupts() to early_setup() for the boot cpu, and towards
the end of start_secondary() for secondary cpus.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/setup_64.c | 10 +++++++---
 arch/powerpc/kernel/smp.c      |  4 ++++
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 313136006d1c3..7a7ce8ad455e1 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -252,9 +252,6 @@ static void cpu_ready_for_interrupts(void)
 
 	/* Set IR and DR in PACA MSR */
 	get_paca()->kernel_msr = MSR_KERNEL;
-
-	/* We are now ok to enable ftrace */
-	get_paca()->ftrace_enabled = 1;
 }
 
 unsigned long spr_default_dscr = 0;
@@ -349,6 +346,13 @@ void __init early_setup(unsigned long dt_ptr)
 	 */
 	cpu_ready_for_interrupts();
 
+	/*
+	 * We enable ftrace here, but since we only support DYNAMIC_FTRACE, it
+	 * will only actually get enabled on the boot cpu much later once
+	 * ftrace itself has been initialized.
+	 */
+	this_cpu_enable_ftrace();
+
 	DBG(" <- early_setup()\n");
 
 #ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 9ca7148b5881a..9e711cdbe3847 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -59,6 +59,7 @@
 #include <asm/kexec.h>
 #include <asm/asm-prototypes.h>
 #include <asm/cpu_has_feature.h>
+#include <asm/ftrace.h>
 
 #ifdef DEBUG
 #include <asm/udbg.h>
@@ -1066,6 +1067,9 @@ void start_secondary(void *unused)
 
 	local_irq_enable();
 
+	/* We can enable ftrace for secondary cpus now */
+	this_cpu_enable_ftrace();
+
 	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 
 	BUG();

From 424ef0160f439feb2a1a6e796a281e2bfa7b6997 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Thu, 19 Apr 2018 12:34:04 +0530
Subject: [PATCH 005/251] powerpc64/ftrace: Disable ftrace during hotplug

Disable ftrace when a cpu is about to go offline. When the cpu is woken
up, ftrace will get enabled in start_secondary().

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/smp.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 9e711cdbe3847..c96f8fbc19423 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1166,6 +1166,8 @@ int __cpu_disable(void)
 	if (!smp_ops->cpu_disable)
 		return -ENOSYS;
 
+	this_cpu_disable_ftrace();
+
 	err = smp_ops->cpu_disable();
 	if (err)
 		return err;
@@ -1184,6 +1186,12 @@ void __cpu_die(unsigned int cpu)
 
 void cpu_die(void)
 {
+	/*
+	 * Disable on the down path. This will be re-enabled by
+	 * start_secondary() via start_secondary_resume() below
+	 */
+	this_cpu_disable_ftrace();
+
 	if (ppc_md.cpu_die)
 		ppc_md.cpu_die();
 

From a4bc64d305af40446d0ac792bb8a9b2cd20af3d3 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Thu, 19 Apr 2018 12:34:05 +0530
Subject: [PATCH 006/251] powerpc64/ftrace: Disable ftrace during kvm
 entry/exit

During guest entry/exit, we switch over to/from the guest MMU context
and we cannot take exceptions in the hypervisor code.

Since ftrace may be enabled and since it can result in us taking a trap,
disable ftrace by setting paca->ftrace_enabled to zero. There are two
paths through which we enter/exit a guest:
1. If we are the vcore runner, then we enter the guest via
__kvmppc_vcore_entry() and we disable ftrace around this. This is always
the case for Power9, and for the primary thread on Power8.
2. If we are a secondary thread in Power8, then we would be in nap due
to SMT being disabled. We are woken up by an IPI to enter the guest. In
this scenario, we enter the guest through kvm_start_guest(). We disable
ftrace at this point. In this scenario, ftrace would only get re-enabled
on the secondary thread when SMT is re-enabled (via start_secondary()).

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kvm/book3s_hv.c            | 4 ++++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 4d07fca5121c5..f604cbd8fc346 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2911,8 +2911,12 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
 	srcu_idx = srcu_read_lock(&vc->kvm->srcu);
 
+	this_cpu_disable_ftrace();
+
 	trap = __kvmppc_vcore_entry();
 
+	this_cpu_enable_ftrace();
+
 	srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
 
 	trace_hardirqs_off();
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index bd63fa8a08b5d..2c3cbe0067b24 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -342,6 +342,9 @@ kvm_start_guest:
 
 	ld	r2,PACATOC(r13)
 
+	li	r0,0
+	stb	r0,PACA_FTRACE_ENABLED(r13)
+
 	li	r0,KVM_HWTHREAD_IN_KVM
 	stb	r0,HSTATE_HWTHREAD_STATE(r13)
 

From 88b1a8547f4c00e55d54e081fc15e3980debf5c1 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Thu, 19 Apr 2018 12:34:06 +0530
Subject: [PATCH 007/251] powerpc64/kexec: Hard disable ftrace before switching
 to the new kernel

If function_graph tracer is enabled during kexec, we see the below
exception in the simulator:
	root@(none):/# kexec -e
	kvm: exiting hardware virtualization
	kexec_core: Starting new kernel
	[   19.262020070,5] OPAL: Switch to big-endian OS
	kexec: Starting switchover sequence.
	Interrupt to 0xC000000000004380 from 0xC000000000004380
	** Execution stopped: Continuous Interrupt, Instruction caused exception,  **

Now that we have a more effective way to completely disable ftrace on
ppc64, let's also use that before switching to a new kernel during
kexec.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/machine_kexec.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index 2694d078741d0..936c7e2d421e7 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -98,12 +98,14 @@ void machine_kexec(struct kimage *image)
 	int save_ftrace_enabled;
 
 	save_ftrace_enabled = __ftrace_enabled_save();
+	this_cpu_disable_ftrace();
 
 	if (ppc_md.machine_kexec)
 		ppc_md.machine_kexec(image);
 	else
 		default_machine_kexec(image);
 
+	this_cpu_enable_ftrace();
 	__ftrace_enabled_restore(save_ftrace_enabled);
 
 	/* Fall back to normal restart if we're still alive. */

From 250122baed29d90c643be8809d75274336b98fb0 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Thu, 19 Apr 2018 12:34:07 +0530
Subject: [PATCH 008/251] powerpc64/module: Tighten detection of mcount call
 sites with -mprofile-kernel

For R_PPC64_REL24 relocations, we suppress emitting instructions for TOC
load/restore in the relocation stub if the relocation is for _mcount()
call when using -mprofile-kernel ABI.

To detect this, we check if the preceding instructions are per the
standard set of instructions emitted by gcc: either the two instruction
sequence of 'mflr r0; std r0,16(r1)', or the more optimized variant of a
single 'mflr r0'. This is not sufficient since nothing prevents users
from hand coding sequences involving a 'mflr r0' followed by a 'bl'.

For removing the toc save instruction from the stub, we additionally
check if the symbol is "_mcount". Add the same check here as well.

Also rename is_early_mcount_callsite() to is_mprofile_mcount_callsite()
since that is what is being checked. The use of "early" is misleading
since there is nothing involving this function that qualifies as early.

Fixes: 153086644fd1f ("powerpc/ftrace: Add support for -mprofile-kernel ftrace ABI")
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/module_64.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index a2636c250b7be..8413be31d6a4f 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -463,8 +463,11 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs,
 }
 
 #ifdef CC_USING_MPROFILE_KERNEL
-static bool is_early_mcount_callsite(u32 *instruction)
+static bool is_mprofile_mcount_callsite(const char *name, u32 *instruction)
 {
+	if (strcmp("_mcount", name))
+		return false;
+
 	/*
 	 * Check if this is one of the -mprofile-kernel sequences.
 	 */
@@ -496,8 +499,7 @@ static void squash_toc_save_inst(const char *name, unsigned long addr)
 #else
 static void squash_toc_save_inst(const char *name, unsigned long addr) { }
 
-/* without -mprofile-kernel, mcount calls are never early */
-static bool is_early_mcount_callsite(u32 *instruction)
+static bool is_mprofile_mcount_callsite(const char *name, u32 *instruction)
 {
 	return false;
 }
@@ -505,11 +507,11 @@ static bool is_early_mcount_callsite(u32 *instruction)
 
 /* We expect a noop next: if it is, replace it with instruction to
    restore r2. */
-static int restore_r2(u32 *instruction, struct module *me)
+static int restore_r2(const char *name, u32 *instruction, struct module *me)
 {
 	u32 *prev_insn = instruction - 1;
 
-	if (is_early_mcount_callsite(prev_insn))
+	if (is_mprofile_mcount_callsite(name, prev_insn))
 		return 1;
 
 	/*
@@ -650,7 +652,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 				value = stub_for_addr(sechdrs, value, me);
 				if (!value)
 					return -ENOENT;
-				if (!restore_r2((u32 *)location + 1, me))
+				if (!restore_r2(strtab + sym->st_name,
+							(u32 *)location + 1, me))
 					return -ENOEXEC;
 
 				squash_toc_save_inst(strtab + sym->st_name, value);

From 9ef404236438bf4934386dc2aa34ba7f0a9e1934 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Thu, 19 Apr 2018 12:34:08 +0530
Subject: [PATCH 009/251] powerpc64/ftrace: Use the generic version of
 ftrace_replace_code()

Our implementation matches that of the generic version, which also
handles FTRACE_UPDATE_MODIFY_CALL. So, remove our implementation in
favor of the generic version.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/trace/ftrace.c | 36 ------------------------------
 1 file changed, 36 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 4741fe112f05f..80667128db3d2 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -485,42 +485,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
 	return ret;
 }
 
-static int __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
-{
-	unsigned long ftrace_addr = (unsigned long)FTRACE_ADDR;
-	int ret;
-
-	ret = ftrace_update_record(rec, enable);
-
-	switch (ret) {
-	case FTRACE_UPDATE_IGNORE:
-		return 0;
-	case FTRACE_UPDATE_MAKE_CALL:
-		return ftrace_make_call(rec, ftrace_addr);
-	case FTRACE_UPDATE_MAKE_NOP:
-		return ftrace_make_nop(NULL, rec, ftrace_addr);
-	}
-
-	return 0;
-}
-
-void ftrace_replace_code(int enable)
-{
-	struct ftrace_rec_iter *iter;
-	struct dyn_ftrace *rec;
-	int ret;
-
-	for (iter = ftrace_rec_iter_start(); iter;
-	     iter = ftrace_rec_iter_next(iter)) {
-		rec = ftrace_rec_iter_record(iter);
-		ret = __ftrace_replace_code(rec, enable);
-		if (ret) {
-			ftrace_bug(ret, rec);
-			return;
-		}
-	}
-}
-
 /*
  * Use the default ftrace_modify_all_code, but without
  * stop_machine().

From ae30cc05bed2fd7eb05e4fb53f412783f05ccb7b Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Thu, 19 Apr 2018 12:34:09 +0530
Subject: [PATCH 010/251] powerpc64/ftrace: Implement support for
 ftrace_regs_caller()

With -mprofile-kernel, we always save the full register state in
ftrace_caller(). While this works, this is inefficient if we're not
interested in the register state, such as when we're using the function
tracer.

Rename the existing ftrace_caller() as ftrace_regs_caller() and provide
a simpler implementation for ftrace_caller() that is used when registers
are not required to be saved.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/ftrace.h             |   2 -
 arch/powerpc/include/asm/module.h             |   3 +
 arch/powerpc/kernel/module_64.c               |  28 ++-
 arch/powerpc/kernel/trace/ftrace.c            | 184 ++++++++++++++++--
 .../powerpc/kernel/trace/ftrace_64_mprofile.S |  71 ++++++-
 5 files changed, 262 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index 3b5e85a72e10c..f0806a2fd4518 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -49,8 +49,6 @@
 extern void _mcount(void);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
-# define FTRACE_ADDR ((unsigned long)ftrace_caller)
-# define FTRACE_REGS_ADDR FTRACE_ADDR
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
        /* reloction of mcount call site is the same as the address */
diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index 4f6573934792b..18f7214d68b78 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -53,6 +53,9 @@ struct mod_arch_specific {
 #ifdef CONFIG_DYNAMIC_FTRACE
 	unsigned long toc;
 	unsigned long tramp;
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+	unsigned long tramp_regs;
+#endif
 #endif
 
 	/* For module function descriptor dereference */
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 8413be31d6a4f..f7667e2ebfcb8 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -280,6 +280,10 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
 #ifdef CONFIG_DYNAMIC_FTRACE
 	/* make the trampoline to the ftrace_caller */
 	relocs++;
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+	/* an additional one for ftrace_regs_caller */
+	relocs++;
+#endif
 #endif
 
 	pr_debug("Looks like a total of %lu stubs, max\n", relocs);
@@ -765,7 +769,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
  * via the paca (in r13). The target (ftrace_caller()) is responsible for
  * saving and restoring the toc before returning.
  */
-static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, struct module *me)
+static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs,
+				struct module *me, unsigned long addr)
 {
 	struct ppc64_stub_entry *entry;
 	unsigned int i, num_stubs;
@@ -792,9 +797,10 @@ static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, struct module
 	memcpy(entry->jump, stub_insns, sizeof(stub_insns));
 
 	/* Stub uses address relative to kernel toc (from the paca) */
-	reladdr = (unsigned long)ftrace_caller - kernel_toc_addr();
+	reladdr = addr - kernel_toc_addr();
 	if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
-		pr_err("%s: Address of ftrace_caller out of range of kernel_toc.\n", me->name);
+		pr_err("%s: Address of %ps out of range of kernel_toc.\n",
+							me->name, (void *)addr);
 		return 0;
 	}
 
@@ -802,22 +808,30 @@ static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, struct module
 	entry->jump[2] |= PPC_LO(reladdr);
 
 	/* Eventhough we don't use funcdata in the stub, it's needed elsewhere. */
-	entry->funcdata = func_desc((unsigned long)ftrace_caller);
+	entry->funcdata = func_desc(addr);
 	entry->magic = STUB_MAGIC;
 
 	return (unsigned long)entry;
 }
 #else
-static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, struct module *me)
+static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs,
+				struct module *me, unsigned long addr)
 {
-	return stub_for_addr(sechdrs, (unsigned long)ftrace_caller, me);
+	return stub_for_addr(sechdrs, addr, me);
 }
 #endif
 
 int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs)
 {
 	mod->arch.toc = my_r2(sechdrs, mod);
-	mod->arch.tramp = create_ftrace_stub(sechdrs, mod);
+	mod->arch.tramp = create_ftrace_stub(sechdrs, mod,
+					(unsigned long)ftrace_caller);
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+	mod->arch.tramp_regs = create_ftrace_stub(sechdrs, mod,
+					(unsigned long)ftrace_regs_caller);
+	if (!mod->arch.tramp_regs)
+		return -ENOENT;
+#endif
 
 	if (!mod->arch.tramp)
 		return -ENOENT;
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 80667128db3d2..79d2924e75d5f 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -357,6 +357,8 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 {
 	unsigned int op[2];
 	void *ip = (void *)rec->ip;
+	unsigned long entry, ptr, tramp;
+	struct module *mod = rec->arch.mod;
 
 	/* read where this goes */
 	if (probe_kernel_read(op, ip, sizeof(op)))
@@ -368,19 +370,44 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 		return -EINVAL;
 	}
 
-	/* If we never set up a trampoline to ftrace_caller, then bail */
-	if (!rec->arch.mod->arch.tramp) {
+	/* If we never set up ftrace trampoline(s), then bail */
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+	if (!mod->arch.tramp || !mod->arch.tramp_regs) {
+#else
+	if (!mod->arch.tramp) {
+#endif
 		pr_err("No ftrace trampoline\n");
 		return -EINVAL;
 	}
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+	if (rec->flags & FTRACE_FL_REGS)
+		tramp = mod->arch.tramp_regs;
+	else
+#endif
+		tramp = mod->arch.tramp;
+
+	if (module_trampoline_target(mod, tramp, &ptr)) {
+		pr_err("Failed to get trampoline target\n");
+		return -EFAULT;
+	}
+
+	pr_devel("trampoline target %lx", ptr);
+
+	entry = ppc_global_function_entry((void *)addr);
+	/* This should match what was called */
+	if (ptr != entry) {
+		pr_err("addr %lx does not match expected %lx\n", ptr, entry);
+		return -EINVAL;
+	}
+
 	/* Ensure branch is within 24 bits */
-	if (!create_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) {
+	if (!create_branch(ip, tramp, BRANCH_SET_LINK)) {
 		pr_err("Branch out of range\n");
 		return -EINVAL;
 	}
 
-	if (patch_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) {
+	if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
 		pr_err("REL24 out of range!\n");
 		return -EINVAL;
 	}
@@ -388,14 +415,6 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 	return 0;
 }
 
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
-			unsigned long addr)
-{
-	return ftrace_make_call(rec, addr);
-}
-#endif
-
 #else  /* !CONFIG_PPC64: */
 static int
 __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
@@ -472,6 +491,137 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 #endif /* CONFIG_MODULES */
 }
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#ifdef CONFIG_MODULES
+static int
+__ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+					unsigned long addr)
+{
+	unsigned int op;
+	unsigned long ip = rec->ip;
+	unsigned long entry, ptr, tramp;
+	struct module *mod = rec->arch.mod;
+
+	/* If we never set up ftrace trampolines, then bail */
+	if (!mod->arch.tramp || !mod->arch.tramp_regs) {
+		pr_err("No ftrace trampoline\n");
+		return -EINVAL;
+	}
+
+	/* read where this goes */
+	if (probe_kernel_read(&op, (void *)ip, sizeof(int))) {
+		pr_err("Fetching opcode failed.\n");
+		return -EFAULT;
+	}
+
+	/* Make sure that that this is still a 24bit jump */
+	if (!is_bl_op(op)) {
+		pr_err("Not expected bl: opcode is %x\n", op);
+		return -EINVAL;
+	}
+
+	/* lets find where the pointer goes */
+	tramp = find_bl_target(ip, op);
+	entry = ppc_global_function_entry((void *)old_addr);
+
+	pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+	if (tramp != entry) {
+		/* old_addr is not within range, so we must have used a trampoline */
+		if (module_trampoline_target(mod, tramp, &ptr)) {
+			pr_err("Failed to get trampoline target\n");
+			return -EFAULT;
+		}
+
+		pr_devel("trampoline target %lx", ptr);
+
+		/* This should match what was called */
+		if (ptr != entry) {
+			pr_err("addr %lx does not match expected %lx\n", ptr, entry);
+			return -EINVAL;
+		}
+	}
+
+	/* The new target may be within range */
+	if (test_24bit_addr(ip, addr)) {
+		/* within range */
+		if (patch_branch((unsigned int *)ip, addr, BRANCH_SET_LINK)) {
+			pr_err("REL24 out of range!\n");
+			return -EINVAL;
+		}
+
+		return 0;
+	}
+
+	if (rec->flags & FTRACE_FL_REGS)
+		tramp = mod->arch.tramp_regs;
+	else
+		tramp = mod->arch.tramp;
+
+	if (module_trampoline_target(mod, tramp, &ptr)) {
+		pr_err("Failed to get trampoline target\n");
+		return -EFAULT;
+	}
+
+	pr_devel("trampoline target %lx", ptr);
+
+	entry = ppc_global_function_entry((void *)addr);
+	/* This should match what was called */
+	if (ptr != entry) {
+		pr_err("addr %lx does not match expected %lx\n", ptr, entry);
+		return -EINVAL;
+	}
+
+	/* Ensure branch is within 24 bits */
+	if (!create_branch((unsigned int *)ip, tramp, BRANCH_SET_LINK)) {
+		pr_err("Branch out of range\n");
+		return -EINVAL;
+	}
+
+	if (patch_branch((unsigned int *)ip, tramp, BRANCH_SET_LINK)) {
+		pr_err("REL24 out of range!\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+#endif
+
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+			unsigned long addr)
+{
+	unsigned long ip = rec->ip;
+	unsigned int old, new;
+
+	/*
+	 * If the calling address is more that 24 bits away,
+	 * then we had to use a trampoline to make the call.
+	 * Otherwise just update the call site.
+	 */
+	if (test_24bit_addr(ip, addr) && test_24bit_addr(ip, old_addr)) {
+		/* within range */
+		old = ftrace_call_replace(ip, old_addr, 1);
+		new = ftrace_call_replace(ip, addr, 1);
+		return ftrace_modify_code(ip, old, new);
+	}
+
+#ifdef CONFIG_MODULES
+	/*
+	 * Out of range jumps are called from modules.
+	 */
+	if (!rec->arch.mod) {
+		pr_err("No module loaded\n");
+		return -EINVAL;
+	}
+
+	return __ftrace_modify_call(rec, old_addr, addr);
+#else
+	/* We should not get here without modules */
+	return -EINVAL;
+#endif /* CONFIG_MODULES */
+}
+#endif
+
 int ftrace_update_ftrace_func(ftrace_func_t func)
 {
 	unsigned long ip = (unsigned long)(&ftrace_call);
@@ -482,6 +632,16 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
 	new = ftrace_call_replace(ip, (unsigned long)func, 1);
 	ret = ftrace_modify_code(ip, old, new);
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+	/* Also update the regs callback function */
+	if (!ret) {
+		ip = (unsigned long)(&ftrace_regs_call);
+		old = *(unsigned int *)&ftrace_regs_call;
+		new = ftrace_call_replace(ip, (unsigned long)func, 1);
+		ret = ftrace_modify_code(ip, old, new);
+	}
+#endif
+
 	return ret;
 }
 
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index ae1cbe783ab6b..ed9d7a46c3af2 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -20,8 +20,8 @@
 #ifdef CONFIG_DYNAMIC_FTRACE
 /*
  *
- * ftrace_caller() is the function that replaces _mcount() when ftrace is
- * active.
+ * ftrace_caller()/ftrace_regs_caller() is the function that replaces _mcount()
+ * when ftrace is active.
  *
  * We arrive here after a function A calls function B, and we are the trace
  * function for B. When we enter r1 points to A's stack frame, B has not yet
@@ -37,7 +37,7 @@
  * Our job is to save the register state into a struct pt_regs (on the stack)
  * and then arrange for the ftrace function to be called.
  */
-_GLOBAL(ftrace_caller)
+_GLOBAL(ftrace_regs_caller)
 	/* Save the original return address in A's stack frame */
 	std	r0,LRSAVE(r1)
 
@@ -100,8 +100,8 @@ _GLOBAL(ftrace_caller)
 	addi    r6, r1 ,STACK_FRAME_OVERHEAD
 
 	/* ftrace_call(r3, r4, r5, r6) */
-.globl ftrace_call
-ftrace_call:
+.globl ftrace_regs_call
+ftrace_regs_call:
 	bl	ftrace_stub
 	nop
 
@@ -162,6 +162,7 @@ ftrace_call:
 	bne-	livepatch_handler
 #endif
 
+ftrace_caller_common:
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 .globl ftrace_graph_call
 ftrace_graph_call:
@@ -182,6 +183,66 @@ ftrace_no_trace:
 	mtlr	r0
 	bctr
 
+_GLOBAL(ftrace_caller)
+	/* Save the original return address in A's stack frame */
+	std	r0, LRSAVE(r1)
+
+	/* Create our stack frame + pt_regs */
+	stdu	r1, -SWITCH_FRAME_SIZE(r1)
+
+	/* Save all gprs to pt_regs */
+	SAVE_8GPRS(3, r1)
+
+	lbz	r3, PACA_FTRACE_ENABLED(r13)
+	cmpdi	r3, 0
+	beq	ftrace_no_trace
+
+	/* Get the _mcount() call site out of LR */
+	mflr	r7
+	std     r7, _NIP(r1)
+
+	/* Save callee's TOC in the ABI compliant location */
+	std	r2, 24(r1)
+	ld	r2, PACATOC(r13)	/* get kernel TOC in r2 */
+
+	addis	r3, r2, function_trace_op@toc@ha
+	addi	r3, r3, function_trace_op@toc@l
+	ld	r5, 0(r3)
+
+	/* Calculate ip from nip-4 into r3 for call below */
+	subi    r3, r7, MCOUNT_INSN_SIZE
+
+	/* Put the original return address in r4 as parent_ip */
+	mr	r4, r0
+
+	/* Set pt_regs to NULL */
+	li	r6, 0
+
+	/* ftrace_call(r3, r4, r5, r6) */
+.globl ftrace_call
+ftrace_call:
+	bl	ftrace_stub
+	nop
+
+	ld	r3, _NIP(r1)
+	mtctr	r3
+
+	/* Restore gprs */
+	REST_8GPRS(3,r1)
+
+	/* Restore callee's TOC */
+	ld	r2, 24(r1)
+
+	/* Pop our stack frame */
+	addi	r1, r1, SWITCH_FRAME_SIZE
+
+	/* Reload original LR */
+	ld	r0, LRSAVE(r1)
+	mtlr	r0
+
+	/* Handle function_graph or go back */
+	b	ftrace_caller_common
+
 #ifdef CONFIG_LIVEPATCH
 	/*
 	 * This function runs in the mcount context, between two functions. As

From 0c0c52306f4792a41d8a86e7c5d30cd4f442e532 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 27 Mar 2018 15:29:06 +1100
Subject: [PATCH 011/251] powerpc: Only support DYNAMIC_FTRACE not static

We've had dynamic ftrace support for over 9 years since Steve first
wrote it, all the distros use dynamic, and static is basically
untested these days, so drop support for static ftrace.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Kconfig                          |  1 +
 arch/powerpc/include/asm/ftrace.h             |  4 +--
 arch/powerpc/kernel/trace/ftrace.c            |  2 --
 arch/powerpc/kernel/trace/ftrace_32.S         | 20 -------------
 arch/powerpc/kernel/trace/ftrace_64.S         | 29 -------------------
 .../powerpc/kernel/trace/ftrace_64_mprofile.S |  3 --
 arch/powerpc/kernel/trace/ftrace_64_pg.S      |  2 --
 7 files changed, 2 insertions(+), 59 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c32a181a7cbbf..ebb90f09e74fa 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -162,6 +162,7 @@ config PPC
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS
 	select DCACHE_WORD_ACCESS		if PPC64 && CPU_LITTLE_ENDIAN
+	select DYNAMIC_FTRACE			if FUNCTION_TRACER
 	select EDAC_ATOMIC_SCRUB
 	select EDAC_SUPPORT
 	select GENERIC_ATOMIC64			if PPC32
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index f0806a2fd4518..fc3a2203c566e 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -48,7 +48,6 @@
 #else /* !__ASSEMBLY__ */
 extern void _mcount(void);
 
-#ifdef CONFIG_DYNAMIC_FTRACE
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
        /* reloction of mcount call site is the same as the address */
@@ -58,13 +57,12 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
 struct dyn_arch_ftrace {
 	struct module *mod;
 };
-#endif /*  CONFIG_DYNAMIC_FTRACE */
 #endif /* __ASSEMBLY__ */
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
 #define ARCH_SUPPORTS_FTRACE_OPS 1
 #endif
-#endif
+#endif /* CONFIG_FUNCTION_TRACER */
 
 #ifndef __ASSEMBLY__
 #if defined(CONFIG_FTRACE_SYSCALLS) && defined(PPC64_ELF_ABI_v1)
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 79d2924e75d5f..c076a32093fdd 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -662,7 +662,6 @@ int __init ftrace_dyn_arch_init(void)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
-#ifdef CONFIG_DYNAMIC_FTRACE
 extern void ftrace_graph_call(void);
 extern void ftrace_graph_stub(void);
 
@@ -691,7 +690,6 @@ int ftrace_disable_ftrace_graph_caller(void)
 
 	return ftrace_modify_code(ip, old, new);
 }
-#endif /* CONFIG_DYNAMIC_FTRACE */
 
 /*
  * Hook the return address and push it in the stack of return addrs
diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S
index afef2c0762822..2c29098f630f6 100644
--- a/arch/powerpc/kernel/trace/ftrace_32.S
+++ b/arch/powerpc/kernel/trace/ftrace_32.S
@@ -14,7 +14,6 @@
 #include <asm/ftrace.h>
 #include <asm/export.h>
 
-#ifdef CONFIG_DYNAMIC_FTRACE
 _GLOBAL(mcount)
 _GLOBAL(_mcount)
 	/*
@@ -47,26 +46,7 @@ _GLOBAL(ftrace_graph_stub)
 	MCOUNT_RESTORE_FRAME
 	/* old link register ends up in ctr reg */
 	bctr
-#else
-_GLOBAL(mcount)
-_GLOBAL(_mcount)
-
-	MCOUNT_SAVE_FRAME
 
-	subi	r3, r3, MCOUNT_INSN_SIZE
-	LOAD_REG_ADDR(r5, ftrace_trace_function)
-	lwz	r5,0(r5)
-
-	mtctr	r5
-	bctrl
-	nop
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	b	ftrace_graph_caller
-#endif
-	MCOUNT_RESTORE_FRAME
-	bctr
-#endif
 EXPORT_SYMBOL(_mcount)
 
 _GLOBAL(ftrace_stub)
diff --git a/arch/powerpc/kernel/trace/ftrace_64.S b/arch/powerpc/kernel/trace/ftrace_64.S
index e5ccea19821e1..e25f77c10a72c 100644
--- a/arch/powerpc/kernel/trace/ftrace_64.S
+++ b/arch/powerpc/kernel/trace/ftrace_64.S
@@ -14,7 +14,6 @@
 #include <asm/ppc-opcode.h>
 #include <asm/export.h>
 
-#ifdef CONFIG_DYNAMIC_FTRACE
 _GLOBAL(mcount)
 _GLOBAL(_mcount)
 EXPORT_SYMBOL(_mcount)
@@ -23,34 +22,6 @@ EXPORT_SYMBOL(_mcount)
 	mtlr	r0
 	bctr
 
-#else /* CONFIG_DYNAMIC_FTRACE */
-_GLOBAL_TOC(_mcount)
-EXPORT_SYMBOL(_mcount)
-	/* Taken from output of objdump from lib64/glibc */
-	mflr	r3
-	ld	r11, 0(r1)
-	stdu	r1, -112(r1)
-	std	r3, 128(r1)
-	ld	r4, 16(r11)
-
-	subi	r3, r3, MCOUNT_INSN_SIZE
-	LOAD_REG_ADDR(r5,ftrace_trace_function)
-	ld	r5,0(r5)
-	ld	r5,0(r5)
-	mtctr	r5
-	bctrl
-	nop
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	b	ftrace_graph_caller
-#endif
-	ld	r0, 128(r1)
-	mtlr	r0
-	addi	r1, r1, 112
-_GLOBAL(ftrace_stub)
-	blr
-#endif /* CONFIG_DYNAMIC_FTRACE */
-
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 _GLOBAL(return_to_handler)
 	/* need to save return values */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index ed9d7a46c3af2..9a5b5a5136040 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -17,7 +17,6 @@
 #include <asm/bug.h>
 #include <asm/ptrace.h>
 
-#ifdef CONFIG_DYNAMIC_FTRACE
 /*
  *
  * ftrace_caller()/ftrace_regs_caller() is the function that replaces _mcount()
@@ -311,8 +310,6 @@ livepatch_handler:
 	blr
 #endif /* CONFIG_LIVEPATCH */
 
-#endif /* CONFIG_DYNAMIC_FTRACE */
-
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 _GLOBAL(ftrace_graph_caller)
 	stdu	r1, -112(r1)
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg.S
index b7ba51a0f3b60..4c515c4023de0 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_pg.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg.S
@@ -14,7 +14,6 @@
 #include <asm/ppc-opcode.h>
 #include <asm/export.h>
 
-#ifdef CONFIG_DYNAMIC_FTRACE
 _GLOBAL_TOC(ftrace_caller)
 	lbz	r3, PACA_FTRACE_ENABLED(r13)
 	cmpdi	r3, 0
@@ -43,7 +42,6 @@ _GLOBAL(ftrace_graph_stub)
 
 _GLOBAL(ftrace_stub)
 	blr
-#endif /* CONFIG_DYNAMIC_FTRACE */
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 _GLOBAL(ftrace_graph_caller)

From 3bc6cf5a86e5e506250c03dfcdf971129d7dcf65 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Sun, 10 Dec 2017 22:29:13 -0500
Subject: [PATCH 012/251] powerpc: remove retired sbc834x support

I no longer have a functional version of this board for even the most
basic sanity boot testing, and they have not been available for purchase
for quite some years now.

There is no point in adding a burden to testing coverage that does
walk all the possible defconfigs, so with all the above in mind, it
makes sense to remove it.  Of course it will remain in the git history
for anyone who happens to stumble on one and wants to tinker with it.

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/boot/dts/sbc8349.dts           | 331 --------------------
 arch/powerpc/configs/83xx/sbc834x_defconfig |  74 -----
 arch/powerpc/platforms/83xx/Kconfig         |   7 -
 arch/powerpc/platforms/83xx/Makefile        |   1 -
 arch/powerpc/platforms/83xx/sbc834x.c       |  73 -----
 5 files changed, 486 deletions(-)
 delete mode 100644 arch/powerpc/boot/dts/sbc8349.dts
 delete mode 100644 arch/powerpc/configs/83xx/sbc834x_defconfig
 delete mode 100644 arch/powerpc/platforms/83xx/sbc834x.c

diff --git a/arch/powerpc/boot/dts/sbc8349.dts b/arch/powerpc/boot/dts/sbc8349.dts
deleted file mode 100644
index fc89e00b765cf..0000000000000
--- a/arch/powerpc/boot/dts/sbc8349.dts
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- * SBC8349E Device Tree Source
- *
- * Copyright 2007 Wind River Inc.
- *
- * Paul Gortmaker (see MAINTAINERS for contact information)
- *
- *	-based largely on the Freescale MPC834x_MDS dts.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-/dts-v1/;
-
-/ {
-	model = "SBC8349E";
-	compatible = "SBC834xE";
-	#address-cells = <1>;
-	#size-cells = <1>;
-
-	aliases {
-		ethernet0 = &enet0;
-		ethernet1 = &enet1;
-		serial0 = &serial0;
-		serial1 = &serial1;
-		pci0 = &pci0;
-	};
-
-	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		PowerPC,8349@0 {
-			device_type = "cpu";
-			reg = <0x0>;
-			d-cache-line-size = <32>;
-			i-cache-line-size = <32>;
-			d-cache-size = <32768>;
-			i-cache-size = <32768>;
-			timebase-frequency = <0>;	// from bootloader
-			bus-frequency = <0>;		// from bootloader
-			clock-frequency = <0>;		// from bootloader
-		};
-	};
-
-	memory {
-		device_type = "memory";
-		reg = <0x00000000 0x10000000>;	// 256MB at 0
-	};
-
-	soc8349@e0000000 {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		device_type = "soc";
-		ranges = <0x0 0xe0000000 0x00100000>;
-		reg = <0xe0000000 0x00000200>;
-		bus-frequency = <0>;
-
-		wdt@200 {
-			compatible = "mpc83xx_wdt";
-			reg = <0x200 0x100>;
-		};
-
-		i2c@3000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <0>;
-			compatible = "fsl-i2c";
-			reg = <0x3000 0x100>;
-			interrupts = <14 0x8>;
-			interrupt-parent = <&ipic>;
-			dfsrr;
-		};
-
-		i2c@3100 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <1>;
-			compatible = "fsl-i2c";
-			reg = <0x3100 0x100>;
-			interrupts = <15 0x8>;
-			interrupt-parent = <&ipic>;
-			dfsrr;
-		};
-
-		spi@7000 {
-			cell-index = <0>;
-			compatible = "fsl,spi";
-			reg = <0x7000 0x1000>;
-			interrupts = <16 0x8>;
-			interrupt-parent = <&ipic>;
-			mode = "cpu";
-		};
-
-		dma@82a8 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "fsl,mpc8349-dma", "fsl,elo-dma";
-			reg = <0x82a8 4>;
-			ranges = <0 0x8100 0x1a8>;
-			interrupt-parent = <&ipic>;
-			interrupts = <71 8>;
-			cell-index = <0>;
-			dma-channel@0 {
-				compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
-				reg = <0 0x80>;
-				cell-index = <0>;
-				interrupt-parent = <&ipic>;
-				interrupts = <71 8>;
-			};
-			dma-channel@80 {
-				compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
-				reg = <0x80 0x80>;
-				cell-index = <1>;
-				interrupt-parent = <&ipic>;
-				interrupts = <71 8>;
-			};
-			dma-channel@100 {
-				compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
-				reg = <0x100 0x80>;
-				cell-index = <2>;
-				interrupt-parent = <&ipic>;
-				interrupts = <71 8>;
-			};
-			dma-channel@180 {
-				compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
-				reg = <0x180 0x28>;
-				cell-index = <3>;
-				interrupt-parent = <&ipic>;
-				interrupts = <71 8>;
-			};
-		};
-
-		/* phy type (ULPI or SERIAL) are only types supported for MPH */
-		/* port = 0 or 1 */
-		usb@22000 {
-			compatible = "fsl-usb2-mph";
-			reg = <0x22000 0x1000>;
-			#address-cells = <1>;
-			#size-cells = <0>;
-			interrupt-parent = <&ipic>;
-			interrupts = <39 0x8>;
-			phy_type = "ulpi";
-			port0;
-		};
-
-		enet0: ethernet@24000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <0>;
-			device_type = "network";
-			model = "TSEC";
-			compatible = "gianfar";
-			reg = <0x24000 0x1000>;
-			ranges = <0x0 0x24000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <32 0x8 33 0x8 34 0x8>;
-			interrupt-parent = <&ipic>;
-			tbi-handle = <&tbi0>;
-			phy-handle = <&phy0>;
-			linux,network-index = <0>;
-
-			mdio@520 {
-				#address-cells = <1>;
-				#size-cells = <0>;
-				compatible = "fsl,gianfar-mdio";
-				reg = <0x520 0x20>;
-
-				phy0: ethernet-phy@19 {
-					interrupt-parent = <&ipic>;
-					interrupts = <20 0x8>;
-					reg = <0x19>;
-				};
-
-				phy1: ethernet-phy@1a {
-					interrupt-parent = <&ipic>;
-					interrupts = <21 0x8>;
-					reg = <0x1a>;
-				};
-
-				tbi0: tbi-phy@11 {
-					reg = <0x11>;
-					device_type = "tbi-phy";
-				};
-			};
-		};
-
-		enet1: ethernet@25000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <1>;
-			device_type = "network";
-			model = "TSEC";
-			compatible = "gianfar";
-			reg = <0x25000 0x1000>;
-			ranges = <0x0 0x25000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <35 0x8 36 0x8 37 0x8>;
-			interrupt-parent = <&ipic>;
-			tbi-handle = <&tbi1>;
-			phy-handle = <&phy1>;
-			linux,network-index = <1>;
-
-			mdio@520 {
-				#address-cells = <1>;
-				#size-cells = <0>;
-				compatible = "fsl,gianfar-tbi";
-				reg = <0x520 0x20>;
-
-				tbi1: tbi-phy@11 {
-					reg = <0x11>;
-					device_type = "tbi-phy";
-				};
-			};
-		};
-
-		serial0: serial@4500 {
-			cell-index = <0>;
-			device_type = "serial";
-			compatible = "fsl,ns16550", "ns16550";
-			reg = <0x4500 0x100>;
-			clock-frequency = <0>;
-			interrupts = <9 0x8>;
-			interrupt-parent = <&ipic>;
-		};
-
-		serial1: serial@4600 {
-			cell-index = <1>;
-			device_type = "serial";
-			compatible = "fsl,ns16550", "ns16550";
-			reg = <0x4600 0x100>;
-			clock-frequency = <0>;
-			interrupts = <10 0x8>;
-			interrupt-parent = <&ipic>;
-		};
-
-		crypto@30000 {
-			compatible = "fsl,sec2.0";
-			reg = <0x30000 0x10000>;
-			interrupts = <11 0x8>;
-			interrupt-parent = <&ipic>;
-			fsl,num-channels = <4>;
-			fsl,channel-fifo-len = <24>;
-			fsl,exec-units-mask = <0x7e>;
-			fsl,descriptor-types-mask = <0x01010ebf>;
-		};
-
-		/* IPIC
-		 * interrupts cell = <intr #, sense>
-		 * sense values match linux IORESOURCE_IRQ_* defines:
-		 * sense == 8: Level, low assertion
-		 * sense == 2: Edge, high-to-low change
-		 */
-		ipic: pic@700 {
-			interrupt-controller;
-			#address-cells = <0>;
-			#interrupt-cells = <2>;
-			reg = <0x700 0x100>;
-			device_type = "ipic";
-		};
-	};
-
-	localbus@e0005000 {
-		#address-cells = <2>;
-		#size-cells = <1>;
-		compatible = "fsl,mpc8349-localbus", "simple-bus";
-		reg = <0xe0005000 0x1000>;
-		interrupts = <77 0x8>;
-		interrupt-parent = <&ipic>;
-		ranges = <0x0 0x0 0xff800000 0x00800000		/* 8MB Flash */
-			  0x1 0x0 0xf8000000 0x00002000		/* 8KB EEPROM */
-			  0x2 0x0 0x10000000 0x04000000		/* 64MB SDRAM */
-			  0x3 0x0 0x10000000 0x04000000>;	/* 64MB SDRAM */
-
-		flash@0,0 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "intel,28F640J3A", "cfi-flash";
-			reg = <0x0 0x0 0x800000>;
-			bank-width = <2>;
-			device-width = <1>;
-
-			partition@0 {
-				label = "u-boot";
-				reg = <0x00000000 0x00040000>;
-				read-only;
-			};
-
-			partition@40000 {
-				label = "user";
-				reg = <0x00040000 0x006c0000>;
-			};
-
-			partition@700000 {
-				label = "legacy u-boot";
-				reg = <0x00700000 0x00100000>;
-				read-only;
-			};
-
-		};
-	};
-
-	pci0: pci@e0008500 {
-		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
-		interrupt-map = <
-
-				/* IDSEL 0x11 */
-				 0x8800 0x0 0x0 0x1 &ipic 48 0x8
-				 0x8800 0x0 0x0 0x2 &ipic 17 0x8
-				 0x8800 0x0 0x0 0x3 &ipic 18 0x8
-				 0x8800 0x0 0x0 0x4 &ipic 19 0x8>;
-
-		interrupt-parent = <&ipic>;
-		interrupts = <0x42 0x8>;
-		bus-range = <0 0>;
-		ranges = <0x02000000 0x0 0x90000000 0x90000000 0x0 0x10000000
-			  0x42000000 0x0 0x80000000 0x80000000 0x0 0x10000000
-			  0x01000000 0x0 0x00000000 0xe2000000 0x0 0x00100000>;
-		clock-frequency = <66666666>;
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0xe0008500 0x100		/* internal registers */
-		       0xe0008300 0x8>;		/* config space access registers */
-		compatible = "fsl,mpc8349-pci";
-		device_type = "pci";
-	};
-};
diff --git a/arch/powerpc/configs/83xx/sbc834x_defconfig b/arch/powerpc/configs/83xx/sbc834x_defconfig
deleted file mode 100644
index 7d74699334da5..0000000000000
--- a/arch/powerpc/configs/83xx/sbc834x_defconfig
+++ /dev/null
@@ -1,74 +0,0 @@
-CONFIG_SYSVIPC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_EXPERT=y
-# CONFIG_KALLSYMS is not set
-CONFIG_SLAB=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_PPC_CHRP is not set
-# CONFIG_PPC_PMAC is not set
-CONFIG_PPC_83xx=y
-CONFIG_SBC834x=y
-CONFIG_GEN_RTC=y
-CONFIG_PCI=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=m
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_SYN_COOKIES=y
-# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_FW_LOADER is not set
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_CFI_INTELEXT=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_SCSI=y
-# CONFIG_SCSI_PROC_FS is not set
-CONFIG_BLK_DEV_SD=y
-# CONFIG_SCSI_LOWLEVEL is not set
-CONFIG_NETDEVICES=y
-CONFIG_GIANFAR=y
-CONFIG_BROADCOM_PHY=y
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-# CONFIG_SERIAL_8250_PCI is not set
-CONFIG_SERIAL_8250_NR_UARTS=2
-CONFIG_SERIAL_8250_RUNTIME_UARTS=2
-# CONFIG_HW_RANDOM is not set
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-CONFIG_I2C_MPC=y
-CONFIG_WATCHDOG=y
-# CONFIG_USB_HID is not set
-CONFIG_USB=y
-CONFIG_USB_MON=y
-CONFIG_USB_EHCI_HCD=y
-CONFIG_USB_EHCI_FSL=y
-CONFIG_USB_STORAGE=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT4_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V4=y
-CONFIG_ROOT_NFS=y
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/platforms/83xx/Kconfig b/arch/powerpc/platforms/83xx/Kconfig
index 7e38b7b71a5a8..071f53b0c0a01 100644
--- a/arch/powerpc/platforms/83xx/Kconfig
+++ b/arch/powerpc/platforms/83xx/Kconfig
@@ -90,13 +90,6 @@ config MPC837x_RDB
 	help
 	  This option enables support for the MPC837x RDB and WLAN Boards.
 
-config SBC834x
-	bool "Wind River SBC834x"
-	select DEFAULT_UIMAGE
-	select PPC_MPC834x
-	help
-	  This option enables support for the Wind River SBC834x board.
-
 config ASP834x
 	bool "Analogue & Micro ASP 834x"
 	select PPC_MPC834x
diff --git a/arch/powerpc/platforms/83xx/Makefile b/arch/powerpc/platforms/83xx/Makefile
index bb4720897f6a0..41cb5f842eff1 100644
--- a/arch/powerpc/platforms/83xx/Makefile
+++ b/arch/powerpc/platforms/83xx/Makefile
@@ -14,7 +14,6 @@ obj-$(CONFIG_MPC836x_MDS)	+= mpc836x_mds.o
 obj-$(CONFIG_MPC836x_RDK)	+= mpc836x_rdk.o
 obj-$(CONFIG_MPC832x_MDS)	+= mpc832x_mds.o
 obj-$(CONFIG_MPC837x_MDS)	+= mpc837x_mds.o
-obj-$(CONFIG_SBC834x)		+= sbc834x.o
 obj-$(CONFIG_MPC837x_RDB)	+= mpc837x_rdb.o
 obj-$(CONFIG_ASP834x)		+= asp834x.o
 obj-$(CONFIG_KMETER1)		+= km83xx.o
diff --git a/arch/powerpc/platforms/83xx/sbc834x.c b/arch/powerpc/platforms/83xx/sbc834x.c
deleted file mode 100644
index cb4bdabfdf1cd..0000000000000
--- a/arch/powerpc/platforms/83xx/sbc834x.c
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * arch/powerpc/platforms/83xx/sbc834x.c
- *
- * Wind River SBC834x board specific routines
- *
- * By Paul Gortmaker (see MAINTAINERS for contact information)
- *
- * Based largely on the mpc834x_mds.c support by Kumar Gala.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/reboot.h>
-#include <linux/pci.h>
-#include <linux/kdev_t.h>
-#include <linux/major.h>
-#include <linux/console.h>
-#include <linux/delay.h>
-#include <linux/seq_file.h>
-#include <linux/root_dev.h>
-#include <linux/of_platform.h>
-
-#include <linux/atomic.h>
-#include <asm/time.h>
-#include <asm/io.h>
-#include <asm/machdep.h>
-#include <asm/ipic.h>
-#include <asm/irq.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-#include <sysdev/fsl_soc.h>
-#include <sysdev/fsl_pci.h>
-
-#include "mpc83xx.h"
-
-/* ************************************************************************
- *
- * Setup the architecture
- *
- */
-static void __init sbc834x_setup_arch(void)
-{
-	mpc83xx_setup_arch();
-}
-
-machine_device_initcall(sbc834x, mpc83xx_declare_of_platform_devices);
-
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init sbc834x_probe(void)
-{
-	return of_machine_is_compatible("SBC834xE");
-}
-
-define_machine(sbc834x) {
-	.name			= "SBC834xE",
-	.probe			= sbc834x_probe,
-	.setup_arch		= sbc834x_setup_arch,
-	.init_IRQ		= mpc83xx_ipic_init_IRQ,
-	.get_irq		= ipic_get_irq,
-	.restart		= mpc83xx_restart,
-	.time_init		= mpc83xx_time_init,
-	.calibrate_decr		= generic_calibrate_decr,
-	.progress		= udbg_progress,
-};

From 0ea5ee035133aeb549883ddc604a6507c9493b9e Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 2 May 2018 21:29:48 +1000
Subject: [PATCH 013/251] tracing: Remove PPC32 wart from config
 TRACING_SUPPORT

config TRACING_SUPPORT has an exception for PPC32, because PPC32
didn't have irqflags tracing support.

But that hasn't been true since commit 5d38902c4838 ("powerpc: Add
irqtrace support for 32-bit powerpc") (Jun 2009).

So remove the exception for PPC32 and the comment.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 kernel/trace/Kconfig | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index c4f0f2e4126e4..dd6c0a2ad969f 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -110,11 +110,7 @@ config GENERIC_TRACER
 #
 config TRACING_SUPPORT
 	bool
-	# PPC32 has no irqflags tracing support, but it can use most of the
-	# tracers anyway, they were tested to build and work. Note that new
-	# exceptions to this list aren't welcomed, better implement the
-	# irqflags tracing for your architecture.
-	depends on TRACE_IRQFLAGS_SUPPORT || PPC32
+	depends on TRACE_IRQFLAGS_SUPPORT
 	depends on STACKTRACE_SUPPORT
 	default y
 

From b71a693d3db3abd1ddf7d29be967a1180c3ebb22 Mon Sep 17 00:00:00 2001
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Date: Tue, 10 Apr 2018 19:11:16 +0530
Subject: [PATCH 014/251] powerpc/fadump: exclude memory holes while reserving
 memory in second kernel

The second kernel, during early boot after the crash, reserves rest of
the memory above boot memory size to make sure it does not touch any of the
dump memory area. It uses memblock_reserve() that reserves the specified
memory region irrespective of memory holes present within that region.
There are chances where previous kernel would have hot removed some of
its memory leaving memory holes behind. In such cases fadump kernel reports
incorrect number of reserved pages through arch_reserved_kernel_pages()
hook causing kernel to hang or panic.

Fix this by excluding memory holes while reserving rest of the memory
above boot memory size during second kernel boot after crash.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/fadump.c | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 3c2c2688918ff..bea8d5fe3b6ee 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -335,6 +335,26 @@ static unsigned long get_fadump_area_size(void)
 	return size;
 }
 
+static void __init fadump_reserve_crash_area(unsigned long base,
+					     unsigned long size)
+{
+	struct memblock_region *reg;
+	unsigned long mstart, mend, msize;
+
+	for_each_memblock(memory, reg) {
+		mstart = max_t(unsigned long, base, reg->base);
+		mend = reg->base + reg->size;
+		mend = min(base + size, mend);
+
+		if (mstart < mend) {
+			msize = mend - mstart;
+			memblock_reserve(mstart, msize);
+			pr_info("Reserved %ldMB of memory at %#016lx for saving crash dump\n",
+				(msize >> 20), mstart);
+		}
+	}
+}
+
 int __init fadump_reserve_mem(void)
 {
 	unsigned long base, size, memory_boundary;
@@ -380,7 +400,8 @@ int __init fadump_reserve_mem(void)
 		memory_boundary = memblock_end_of_DRAM();
 
 	if (fw_dump.dump_active) {
-		printk(KERN_INFO "Firmware-assisted dump is active.\n");
+		pr_info("Firmware-assisted dump is active.\n");
+
 		/*
 		 * If last boot has crashed then reserve all the memory
 		 * above boot_memory_size so that we don't touch it until
@@ -389,11 +410,7 @@ int __init fadump_reserve_mem(void)
 		 */
 		base = fw_dump.boot_memory_size;
 		size = memory_boundary - base;
-		memblock_reserve(base, size);
-		printk(KERN_INFO "Reserved %ldMB of memory at %ldMB "
-				"for saving crash dump\n",
-				(unsigned long)(size >> 20),
-				(unsigned long)(base >> 20));
+		fadump_reserve_crash_area(base, size);
 
 		fw_dump.fadumphdr_addr =
 				be64_to_cpu(fdm_active->rmr_region.destination_address) +

From 8597538712ebd90bc83dfb0b3b40398a0c53ad5b Mon Sep 17 00:00:00 2001
From: Hari Bathini <hbathini@linux.vnet.ibm.com>
Date: Tue, 10 Apr 2018 19:11:31 +0530
Subject: [PATCH 015/251] powerpc/fadump: Do not use hugepages when fadump is
 active

FADump capture kernel boots in restricted memory environment preserving
the context of previous kernel to save vmcore. Supporting hugepages in
such environment makes things unnecessarily complicated, as hugepages
need memory set aside for them. This means most of the capture kernel's
memory is used in supporting hugepages. In most cases, this results in
out-of-memory issues while booting FADump capture kernel. But hugepages
are not of much use in capture kernel whose only job is to save vmcore.
So, disabling hugepages support, when fadump is active, is a reliable
solution for the out of memory issues. Introducing a flag variable to
disable HugeTLB support when fadump is active.

Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com>
Reviewed-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/page.h | 1 +
 arch/powerpc/kernel/fadump.c    | 8 ++++++++
 arch/powerpc/mm/hash_utils_64.c | 6 ++++--
 arch/powerpc/mm/hugetlbpage.c   | 7 +++++++
 4 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index dec9ce5ba8afa..db7be0779d550 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -39,6 +39,7 @@
 
 #ifndef __ASSEMBLY__
 #ifdef CONFIG_HUGETLB_PAGE
+extern bool hugetlb_disabled;
 extern unsigned int HPAGE_SHIFT;
 #else
 #define HPAGE_SHIFT PAGE_SHIFT
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index bea8d5fe3b6ee..8ceabef40d3d4 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -402,6 +402,14 @@ int __init fadump_reserve_mem(void)
 	if (fw_dump.dump_active) {
 		pr_info("Firmware-assisted dump is active.\n");
 
+#ifdef CONFIG_HUGETLB_PAGE
+		/*
+		 * FADump capture kernel doesn't care much about hugepages.
+		 * In fact, handling hugepages in capture kernel is asking for
+		 * trouble. So, disable HugeTLB support when fadump is active.
+		 */
+		hugetlb_disabled = true;
+#endif
 		/*
 		 * If last boot has crashed then reserve all the memory
 		 * above boot_memory_size so that we don't touch it until
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 0bd3790d35df4..5beeec6fbb9be 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -572,8 +572,10 @@ static void __init htab_scan_page_sizes(void)
 	}
 
 #ifdef CONFIG_HUGETLB_PAGE
-	/* Reserve 16G huge page memory sections for huge pages */
-	of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL);
+	if (!hugetlb_disabled) {
+		/* Reserve 16G huge page memory sections for huge pages */
+		of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL);
+	}
 #endif /* CONFIG_HUGETLB_PAGE */
 }
 
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index f1153f8254e3d..2a4b1bf8bde6c 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -35,6 +35,8 @@
 #define PAGE_SHIFT_16M	24
 #define PAGE_SHIFT_16G	34
 
+bool hugetlb_disabled = false;
+
 unsigned int HPAGE_SHIFT;
 EXPORT_SYMBOL(HPAGE_SHIFT);
 
@@ -651,6 +653,11 @@ static int __init hugetlbpage_init(void)
 {
 	int psize;
 
+	if (hugetlb_disabled) {
+		pr_info("HugeTLB support is disabled!\n");
+		return 0;
+	}
+
 #if !defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_PPC_8xx)
 	if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE))
 		return -ENODEV;

From 722cde76d68e8cc4f3de42e71c82fd40dea4f7b9 Mon Sep 17 00:00:00 2001
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Date: Fri, 27 Apr 2018 11:53:18 +0530
Subject: [PATCH 016/251] powerpc/fadump: Unregister fadump on kexec down path.

Unregister fadump on kexec down path otherwise the fadump registration
in new kexec-ed kernel complains that fadump is already registered.
This makes new kernel to continue using fadump registered by previous
kernel which may lead to invalid vmcore generation. Hence this patch
fixes this issue by un-registering fadump in fadump_cleanup() which is
called during kexec path so that new kernel can register fadump with
new valid values.

Fixes: b500afff11f6 ("fadump: Invalidate registration and release reserved memory for general use.")
Cc: stable@vger.kernel.org # v3.4+
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/fadump.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 8ceabef40d3d4..07e8396d472bd 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1180,6 +1180,9 @@ void fadump_cleanup(void)
 		init_fadump_mem_struct(&fdm,
 			be64_to_cpu(fdm_active->cpu_state_data.destination_address));
 		fadump_invalidate_dump(&fdm);
+	} else if (fw_dump.dump_registered) {
+		/* Un-register Firmware-assisted dump if it was registered. */
+		fadump_unregister_dump(&fdm);
 	}
 }
 

From 45201c8794693d16e8e634188a92e9c0b075fd91 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Tue, 24 Apr 2018 18:31:28 +0200
Subject: [PATCH 017/251] powerpc/nohash: Remove hash related code from nohash
 headers.

When nohash and book3s header were split, some hash related stuff
remained in the nohash header. This patch removes them.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
[mpe: Duplicate pte_young() to avoid circular header dependency]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/nohash/32/pgtable.h | 34 +++++------------
 arch/powerpc/include/asm/nohash/64/pgtable.h | 21 ++++-------
 arch/powerpc/include/asm/nohash/pgtable.h    | 39 ++------------------
 arch/powerpc/include/asm/nohash/pte-book3e.h |  1 -
 4 files changed, 20 insertions(+), 75 deletions(-)

diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 03bbd1149530d..987a658b18e1c 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -133,7 +133,7 @@ extern int icache_44x_need_flush;
 #ifndef __ASSEMBLY__
 
 #define pte_clear(mm, addr, ptep) \
-	do { pte_update(ptep, ~_PAGE_HASHPTE, 0); } while (0)
+	do { pte_update(ptep, ~0, 0); } while (0)
 
 #define pmd_none(pmd)		(!pmd_val(pmd))
 #define	pmd_bad(pmd)		(pmd_val(pmd) & _PMD_BAD)
@@ -145,21 +145,6 @@ static inline void pmd_clear(pmd_t *pmdp)
 
 
 
-/*
- * When flushing the tlb entry for a page, we also need to flush the hash
- * table entry.  flush_hash_pages is assembler (for speed) in hashtable.S.
- */
-extern int flush_hash_pages(unsigned context, unsigned long va,
-			    unsigned long pmdval, int count);
-
-/* Add an HPTE to the hash table */
-extern void add_hash_page(unsigned context, unsigned long va,
-			  unsigned long pmdval);
-
-/* Flush an entry from the TLB/hash table */
-extern void flush_hash_entry(struct mm_struct *mm, pte_t *ptep,
-			     unsigned long address);
-
 /*
  * PTE updates. This function is called whenever an existing
  * valid PTE is updated. This does -not- include set_pte_at()
@@ -246,12 +231,6 @@ static inline int __ptep_test_and_clear_young(unsigned int context, unsigned lon
 {
 	unsigned long old;
 	old = pte_update(ptep, _PAGE_ACCESSED, 0);
-#if _PAGE_HASHPTE != 0
-	if (old & _PAGE_HASHPTE) {
-		unsigned long ptephys = __pa(ptep) & PAGE_MASK;
-		flush_hash_pages(context, addr, ptephys, 1);
-	}
-#endif
 	return (old & _PAGE_ACCESSED) != 0;
 }
 #define ptep_test_and_clear_young(__vma, __addr, __ptep) \
@@ -261,7 +240,7 @@ static inline int __ptep_test_and_clear_young(unsigned int context, unsigned lon
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 				       pte_t *ptep)
 {
-	return __pte(pte_update(ptep, ~_PAGE_HASHPTE, 0));
+	return __pte(pte_update(ptep, ~0, 0));
 }
 
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
@@ -288,8 +267,13 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
 	pte_update(ptep, clr, set);
 }
 
+static inline int pte_young(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_ACCESSED;
+}
+
 #define __HAVE_ARCH_PTE_SAME
-#define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HASHPTE) == 0)
+#define pte_same(A,B)	((pte_val(A) ^ pte_val(B)) == 0)
 
 /*
  * Note that on Book E processors, the pmd contains the kernel virtual
@@ -330,7 +314,7 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
 /*
  * Encode and decode a swap entry.
  * Note that the bits we use in a PTE for representing a swap entry
- * must not include the _PAGE_PRESENT bit or the _PAGE_HASHPTE bit (if used).
+ * must not include the _PAGE_PRESENT bit.
  *   -- paulus
  */
 #define __swp_type(entry)		((entry).val & 0x1f)
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 5c5f75d005ada..ef3fdfc04f927 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -173,8 +173,6 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
 /* to find an entry in a kernel page-table-directory */
 /* This now only contains the vmalloc pages */
 #define pgd_offset_k(address) pgd_offset(&init_mm, address)
-extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
-			    pte_t *ptep, unsigned long pte, int huge);
 
 /* Atomic PTE updates */
 static inline unsigned long pte_update(struct mm_struct *mm,
@@ -205,20 +203,20 @@ static inline unsigned long pte_update(struct mm_struct *mm,
 	if (!huge)
 		assert_pte_locked(mm, addr);
 
-#ifdef CONFIG_PPC_BOOK3S_64
-	if (old & _PAGE_HASHPTE)
-		hpte_need_flush(mm, addr, ptep, old, huge);
-#endif
-
 	return old;
 }
 
+static inline int pte_young(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_ACCESSED;
+}
+
 static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
 					      unsigned long addr, pte_t *ptep)
 {
 	unsigned long old;
 
-	if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
+	if (pte_young(*ptep))
 		return 0;
 	old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
 	return (old & _PAGE_ACCESSED) != 0;
@@ -312,7 +310,7 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
 }
 
 #define __HAVE_ARCH_PTE_SAME
-#define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
+#define pte_same(A,B)	((pte_val(A) ^ pte_val(B)) == 0)
 
 #define pte_ERROR(e) \
 	pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
@@ -324,11 +322,6 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
 /* Encode and de-code a swap entry */
 #define MAX_SWAPFILES_CHECK() do { \
 	BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
-	/*							\
-	 * Don't have overlapping bits with _PAGE_HPTEFLAGS	\
-	 * We filter HPTEFLAGS on set_pte.			\
-	 */							\
-	BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \
 	} while (0)
 /*
  * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index c56de1e8026f6..36a1ea6a78060 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -17,7 +17,6 @@ static inline int pte_write(pte_t pte)
 }
 static inline int pte_read(pte_t pte)		{ return 1; }
 static inline int pte_dirty(pte_t pte)		{ return pte_val(pte) & _PAGE_DIRTY; }
-static inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED; }
 static inline int pte_special(pte_t pte)	{ return pte_val(pte) & _PAGE_SPECIAL; }
 static inline int pte_none(pte_t pte)		{ return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
 static inline pgprot_t pte_pgprot(pte_t pte)	{ return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
@@ -148,37 +147,16 @@ extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
 static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
 				pte_t *ptep, pte_t pte, int percpu)
 {
-#if defined(CONFIG_PPC_STD_MMU_32) && defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT)
-	/* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use the
-	 * helper pte_update() which does an atomic update. We need to do that
-	 * because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a
-	 * per-CPU PTE such as a kmap_atomic, we do a simple update preserving
-	 * the hash bits instead (ie, same as the non-SMP case)
-	 */
-	if (percpu)
-		*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
-			      | (pte_val(pte) & ~_PAGE_HASHPTE));
-	else
-		pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte));
-
-#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
+#if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
 	/* Second case is 32-bit with 64-bit PTE.  In this case, we
 	 * can just store as long as we do the two halves in the right order
-	 * with a barrier in between. This is possible because we take care,
-	 * in the hash code, to pre-invalidate if the PTE was already hashed,
-	 * which synchronizes us with any concurrent invalidation.
-	 * In the percpu case, we also fallback to the simple update preserving
-	 * the hash bits
+	 * with a barrier in between.
+	 * In the percpu case, we also fallback to the simple update
 	 */
 	if (percpu) {
-		*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
-			      | (pte_val(pte) & ~_PAGE_HASHPTE));
+		*ptep = pte;
 		return;
 	}
-#if _PAGE_HASHPTE != 0
-	if (pte_val(*ptep) & _PAGE_HASHPTE)
-		flush_hash_entry(mm, ptep, addr);
-#endif
 	__asm__ __volatile__("\
 		stw%U0%X0 %2,%0\n\
 		eieio\n\
@@ -186,15 +164,6 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
 	: "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
 	: "r" (pte) : "memory");
 
-#elif defined(CONFIG_PPC_STD_MMU_32)
-	/* Third case is 32-bit hash table in UP mode, we need to preserve
-	 * the _PAGE_HASHPTE bit since we may not have invalidated the previous
-	 * translation in the hash yet (done in a subsequent flush_tlb_xxx())
-	 * and see we need to keep track that this PTE needs invalidating
-	 */
-	*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
-		      | (pte_val(pte) & ~_PAGE_HASHPTE));
-
 #else
 	/* Anything else just stores the PTE normally. That covers all 64-bit
 	 * cases, and 32-bit non-hash with 32-bit PTEs.
diff --git a/arch/powerpc/include/asm/nohash/pte-book3e.h b/arch/powerpc/include/asm/nohash/pte-book3e.h
index ccee8eb509bb7..9ff51b4c0cacd 100644
--- a/arch/powerpc/include/asm/nohash/pte-book3e.h
+++ b/arch/powerpc/include/asm/nohash/pte-book3e.h
@@ -57,7 +57,6 @@
 #define _PAGE_USER		(_PAGE_BAP_UR | _PAGE_BAP_SR) /* Can be read */
 #define _PAGE_PRIVILEGED	(_PAGE_BAP_SR)
 
-#define _PAGE_HASHPTE	0
 #define _PAGE_BUSY	0
 
 #define _PAGE_SPECIAL	_PAGE_SW0

From 0d594aa75f5e9f48c5365708f9b7d286a73ede53 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Tue, 24 Apr 2018 18:31:30 +0200
Subject: [PATCH 018/251] powerpc/nohash: Remove _PAGE_BUSY

_PAGE_BUSY is always 0, remove it.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/nohash/64/pgtable.h | 10 +++-------
 arch/powerpc/include/asm/nohash/pte-book3e.h |  5 -----
 2 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index ef3fdfc04f927..6ac8381f4c7af 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -186,14 +186,12 @@ static inline unsigned long pte_update(struct mm_struct *mm,
 
 	__asm__ __volatile__(
 	"1:	ldarx	%0,0,%3		# pte_update\n\
-	andi.	%1,%0,%6\n\
-	bne-	1b \n\
 	andc	%1,%0,%4 \n\
-	or	%1,%1,%7\n\
+	or	%1,%1,%6\n\
 	stdcx.	%1,0,%3 \n\
 	bne-	1b"
 	: "=&r" (old), "=&r" (tmp), "=m" (*ptep)
-	: "r" (ptep), "r" (clr), "m" (*ptep), "i" (_PAGE_BUSY), "r" (set)
+	: "r" (ptep), "r" (clr), "m" (*ptep), "r" (set)
 	: "cc" );
 #else
 	unsigned long old = pte_val(*ptep);
@@ -295,13 +293,11 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
 
 	__asm__ __volatile__(
 	"1:	ldarx	%0,0,%4\n\
-		andi.	%1,%0,%6\n\
-		bne-	1b \n\
 		or	%0,%3,%0\n\
 		stdcx.	%0,0,%4\n\
 		bne-	1b"
 	:"=&r" (old), "=&r" (tmp), "=m" (*ptep)
-	:"r" (bits), "r" (ptep), "m" (*ptep), "i" (_PAGE_BUSY)
+	:"r" (bits), "r" (ptep), "m" (*ptep)
 	:"cc");
 #else
 	unsigned long old = pte_val(*ptep);
diff --git a/arch/powerpc/include/asm/nohash/pte-book3e.h b/arch/powerpc/include/asm/nohash/pte-book3e.h
index 9ff51b4c0cacd..12730b81cd984 100644
--- a/arch/powerpc/include/asm/nohash/pte-book3e.h
+++ b/arch/powerpc/include/asm/nohash/pte-book3e.h
@@ -57,13 +57,8 @@
 #define _PAGE_USER		(_PAGE_BAP_UR | _PAGE_BAP_SR) /* Can be read */
 #define _PAGE_PRIVILEGED	(_PAGE_BAP_SR)
 
-#define _PAGE_BUSY	0
-
 #define _PAGE_SPECIAL	_PAGE_SW0
 
-/* Flags to be preserved on PTE modifications */
-#define _PAGE_HPTEFLAGS	_PAGE_BUSY
-
 /* Base page size */
 #ifdef CONFIG_PPC_64K_PAGES
 #define _PAGE_PSIZE	_PAGE_PSIZE_64K

From d5808ffaec817ff28e10c9970e1b0260c8c50bc4 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Tue, 24 Apr 2018 18:31:32 +0200
Subject: [PATCH 019/251] powerpc/nohash: Use IS_ENABLED() to simplify
 __set_pte_at()

By using IS_ENABLED() we can simplify __set_pte_at() by removing
redundant *ptep = pte.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/nohash/pgtable.h | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index 36a1ea6a78060..2160be2e4339d 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -147,40 +147,33 @@ extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
 static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
 				pte_t *ptep, pte_t pte, int percpu)
 {
-#if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
 	/* Second case is 32-bit with 64-bit PTE.  In this case, we
 	 * can just store as long as we do the two halves in the right order
 	 * with a barrier in between.
 	 * In the percpu case, we also fallback to the simple update
 	 */
-	if (percpu) {
-		*ptep = pte;
+	if (IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_PTE_64BIT) && !percpu) {
+		__asm__ __volatile__("\
+			stw%U0%X0 %2,%0\n\
+			eieio\n\
+			stw%U0%X0 %L2,%1"
+		: "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
+		: "r" (pte) : "memory");
 		return;
 	}
-	__asm__ __volatile__("\
-		stw%U0%X0 %2,%0\n\
-		eieio\n\
-		stw%U0%X0 %L2,%1"
-	: "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
-	: "r" (pte) : "memory");
-
-#else
 	/* Anything else just stores the PTE normally. That covers all 64-bit
 	 * cases, and 32-bit non-hash with 32-bit PTEs.
 	 */
 	*ptep = pte;
 
-#ifdef CONFIG_PPC_BOOK3E_64
 	/*
 	 * With hardware tablewalk, a sync is needed to ensure that
 	 * subsequent accesses see the PTE we just wrote.  Unlike userspace
 	 * mappings, we can't tolerate spurious faults, so make sure
 	 * the new PTE will be seen the first time.
 	 */
-	if (is_kernel_addr(addr))
+	if (IS_ENABLED(CONFIG_PPC_BOOK3E_64) && is_kernel_addr(addr))
 		mb();
-#endif
-#endif
 }
 
 

From 7acf50e4efa60270edcb95107f660f5e258a90f2 Mon Sep 17 00:00:00 2001
From: Balbir Singh <bsingharora@gmail.com>
Date: Tue, 1 May 2018 12:57:25 +1000
Subject: [PATCH 020/251] Revert "powerpc/powernv: Increase memory block size
 to 1GB on radix"

This commit was a stop-gap to prevent crashes on hotunplug, caused by
the mismatch between the 1G mappings used for the linear mapping and the
memory block size. Those issues are now resolved because we split the
linear mapping at hotunplug time if necessary, as implemented in commit
4dd5f8a99e79 ("powerpc/mm/radix: Split linear mapping on hot-unplug").

Signed-off-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Michael Neuling <mikey@neuling.org>
Tested-by: Rashmica Gupta <rashmica.g@gmail.com>
Tested-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/setup.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index ef8c9ce53a616..fa63d3fff14c8 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -356,15 +356,7 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 static unsigned long pnv_memory_block_size(void)
 {
-	/*
-	 * We map the kernel linear region with 1GB large pages on radix. For
-	 * memory hot unplug to work our memory block size must be at least
-	 * this size.
-	 */
-	if (radix_enabled())
-		return 1UL * 1024 * 1024 * 1024;
-	else
-		return 256UL * 1024 * 1024;
+	return 256UL * 1024 * 1024;
 }
 #endif
 

From 5212213aa5a2359dd0474c9dab22b6220b591fe1 Mon Sep 17 00:00:00 2001
From: Ram Pai <linuxram@us.ibm.com>
Date: Tue, 27 Mar 2018 02:09:26 -0700
Subject: [PATCH 021/251] mm, powerpc, x86: define VM_PKEY_BITx bits if
 CONFIG_ARCH_HAS_PKEYS is enabled

VM_PKEY_BITx are defined only if CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
is enabled. Powerpc also needs these bits. Hence lets define the
VM_PKEY_BITx bits for any architecture that enables
CONFIG_ARCH_HAS_PKEYS.

Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/pkeys.h | 2 ++
 fs/proc/task_mmu.c               | 4 ++--
 include/linux/mm.h               | 9 +++++----
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h
index 0409c80c32c0a..18ef59a9886d7 100644
--- a/arch/powerpc/include/asm/pkeys.h
+++ b/arch/powerpc/include/asm/pkeys.h
@@ -26,6 +26,8 @@ extern u32 initial_allocation_mask; /* bits set for reserved keys */
 # define VM_PKEY_BIT2	VM_HIGH_ARCH_2
 # define VM_PKEY_BIT3	VM_HIGH_ARCH_3
 # define VM_PKEY_BIT4	VM_HIGH_ARCH_4
+#elif !defined(VM_PKEY_BIT4)
+# define VM_PKEY_BIT4	VM_HIGH_ARCH_4
 #endif
 
 #define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | \
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index c486ad4b43f00..541392a626089 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -673,13 +673,13 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
 		[ilog2(VM_MERGEABLE)]	= "mg",
 		[ilog2(VM_UFFD_MISSING)]= "um",
 		[ilog2(VM_UFFD_WP)]	= "uw",
-#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+#ifdef CONFIG_ARCH_HAS_PKEYS
 		/* These come out via ProtectionKey: */
 		[ilog2(VM_PKEY_BIT0)]	= "",
 		[ilog2(VM_PKEY_BIT1)]	= "",
 		[ilog2(VM_PKEY_BIT2)]	= "",
 		[ilog2(VM_PKEY_BIT3)]	= "",
-#endif
+#endif /* CONFIG_ARCH_HAS_PKEYS */
 	};
 	size_t i;
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1ac1f06a4be6b..c6a6f2492c1b3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -228,15 +228,16 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_HIGH_ARCH_4	BIT(VM_HIGH_ARCH_BIT_4)
 #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */
 
-#if defined(CONFIG_X86)
-# define VM_PAT		VM_ARCH_1	/* PAT reserves whole VMA at once (x86) */
-#if defined (CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS)
+#ifdef CONFIG_ARCH_HAS_PKEYS
 # define VM_PKEY_SHIFT	VM_HIGH_ARCH_BIT_0
 # define VM_PKEY_BIT0	VM_HIGH_ARCH_0	/* A protection key is a 4-bit value */
 # define VM_PKEY_BIT1	VM_HIGH_ARCH_1
 # define VM_PKEY_BIT2	VM_HIGH_ARCH_2
 # define VM_PKEY_BIT3	VM_HIGH_ARCH_3
-#endif
+#endif /* CONFIG_ARCH_HAS_PKEYS */
+
+#if defined(CONFIG_X86)
+# define VM_PAT		VM_ARCH_1	/* PAT reserves whole VMA at once (x86) */
 #elif defined(CONFIG_PPC)
 # define VM_SAO		VM_ARCH_1	/* Strong Access Ordering (powerpc) */
 #elif defined(CONFIG_PARISC)

From 2c9e0a6fa2bb75697981825153128f43b32f6d08 Mon Sep 17 00:00:00 2001
From: Ram Pai <linuxram@us.ibm.com>
Date: Tue, 27 Mar 2018 02:09:27 -0700
Subject: [PATCH 022/251] mm, powerpc, x86: introduce an additional vma bit for
 powerpc pkey

Currently only 4bits are allocated in the vma flags to hold 16
keys. This is sufficient for x86. PowerPC  supports  32  keys,
which needs 5bits. This patch allocates an  additional bit.

Reviewed-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
[mpe: Fold in #if VM_PKEY_BIT4 as noticed by Dave Hansen]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 fs/proc/task_mmu.c | 3 +++
 include/linux/mm.h | 7 ++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 541392a626089..c2163606e6fba 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -679,6 +679,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
 		[ilog2(VM_PKEY_BIT1)]	= "",
 		[ilog2(VM_PKEY_BIT2)]	= "",
 		[ilog2(VM_PKEY_BIT3)]	= "",
+#if VM_PKEY_BIT4
+		[ilog2(VM_PKEY_BIT4)]	= "",
+#endif
 #endif /* CONFIG_ARCH_HAS_PKEYS */
 	};
 	size_t i;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c6a6f2492c1b3..fc25929031c0d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -231,9 +231,14 @@ extern unsigned int kobjsize(const void *objp);
 #ifdef CONFIG_ARCH_HAS_PKEYS
 # define VM_PKEY_SHIFT	VM_HIGH_ARCH_BIT_0
 # define VM_PKEY_BIT0	VM_HIGH_ARCH_0	/* A protection key is a 4-bit value */
-# define VM_PKEY_BIT1	VM_HIGH_ARCH_1
+# define VM_PKEY_BIT1	VM_HIGH_ARCH_1	/* on x86 and 5-bit value on ppc64   */
 # define VM_PKEY_BIT2	VM_HIGH_ARCH_2
 # define VM_PKEY_BIT3	VM_HIGH_ARCH_3
+#ifdef CONFIG_PPC
+# define VM_PKEY_BIT4  VM_HIGH_ARCH_4
+#else
+# define VM_PKEY_BIT4  0
+#endif
 #endif /* CONFIG_ARCH_HAS_PKEYS */
 
 #if defined(CONFIG_X86)

From cd419a513146367af08b895a8f7d360e4e77b638 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 10 Apr 2018 13:12:20 +1000
Subject: [PATCH 023/251] mm/pkeys: Remove include of asm/mmu_context.h from
 pkeys.h

While trying to unify the pkey handling in show_smap() between x86 and
powerpc we stumbled across various build failures due to the order of
includes between the two arches.

Part of the problem is that linux/pkeys.h includes asm/mmu_context.h,
and the relationship between asm/mmu_context.h and asm/pkeys.h is not
consistent between the two arches.

It would be cleaner if linux/pkeys.h only included asm/pkeys.h,
creating a single integration point for the arch pkey definitions.

So this patch removes the include of asm/mmu_context.h from
linux/pkeys.h.

We can't prove that this is safe in the general case, but it passes
all the build tests I've thrown at it. Also asm/mmu_context.h is
included widely while linux/pkeys.h is not, so most likely any code
that is including linux/pkeys.h is already getting asm/mmu_context.h
from elsewhere.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 include/linux/pkeys.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h
index 0794ca78c379a..ed06e1a67bfaa 100644
--- a/include/linux/pkeys.h
+++ b/include/linux/pkeys.h
@@ -3,7 +3,6 @@
 #define _LINUX_PKEYS_H
 
 #include <linux/mm_types.h>
-#include <asm/mmu_context.h>
 
 #ifdef CONFIG_ARCH_HAS_PKEYS
 #include <asm/pkeys.h>

From dbec10e58deadba596d59a0ab4a394fef271992f Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 11 Apr 2018 23:41:42 +1000
Subject: [PATCH 024/251] mm/pkeys, powerpc, x86: Provide an empty vma_pkey()
 in linux/pkeys.h

Consolidate the pkey handling by providing a common empty definition
of vma_pkey() in pkeys.h when CONFIG_ARCH_HAS_PKEYS=n.

This also removes another entanglement of pkeys.h and
asm/mmu_context.h.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Ram Pai <linuxram@us.ibm.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
---
 arch/powerpc/include/asm/mmu_context.h | 5 -----
 arch/x86/include/asm/mmu_context.h     | 5 -----
 include/linux/pkeys.h                  | 5 +++++
 3 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 1835ca1505d6b..896efa5599969 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -250,11 +250,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
 #define thread_pkey_regs_restore(new_thread, old_thread)
 #define thread_pkey_regs_init(thread)
 
-static inline int vma_pkey(struct vm_area_struct *vma)
-{
-	return 0;
-}
-
 static inline u64 pte_to_hpte_pkey_bits(u64 pteflags)
 {
 	return 0x0UL;
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 57e3785d0d26e..3d748bdf44a7a 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -296,11 +296,6 @@ static inline int vma_pkey(struct vm_area_struct *vma)
 
 	return (vma->vm_flags & vma_pkey_mask) >> VM_PKEY_SHIFT;
 }
-#else
-static inline int vma_pkey(struct vm_area_struct *vma)
-{
-	return 0;
-}
 #endif
 
 /*
diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h
index ed06e1a67bfaa..aad54663763b7 100644
--- a/include/linux/pkeys.h
+++ b/include/linux/pkeys.h
@@ -13,6 +13,11 @@
 #define PKEY_DEDICATED_EXECUTE_ONLY 0
 #define ARCH_VM_PKEY_FLAGS 0
 
+static inline int vma_pkey(struct vm_area_struct *vma)
+{
+	return 0;
+}
+
 static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
 {
 	return (pkey == 0);

From 555934a71bb479ce109722807b374f2d98aefe89 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 12 Apr 2018 23:54:00 +1000
Subject: [PATCH 025/251] x86/pkeys: Move vma_pkey() into asm/pkeys.h

Move the last remaining pkey helper, vma_pkey() into asm/pkeys.h

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
---
 arch/x86/include/asm/mmu_context.h | 10 ----------
 arch/x86/include/asm/pkeys.h       |  8 ++++++++
 include/linux/pkeys.h              |  2 +-
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 3d748bdf44a7a..59e1fadef401a 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -288,16 +288,6 @@ static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
 		mpx_notify_unmap(mm, vma, start, end);
 }
 
-#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-static inline int vma_pkey(struct vm_area_struct *vma)
-{
-	unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 |
-				      VM_PKEY_BIT2 | VM_PKEY_BIT3;
-
-	return (vma->vm_flags & vma_pkey_mask) >> VM_PKEY_SHIFT;
-}
-#endif
-
 /*
  * We only want to enforce protection keys on the current process
  * because we effectively have no access to PKRU for other
diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index a0ba1ffda0dfd..0e5f749158e45 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -106,4 +106,12 @@ extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 		unsigned long init_val);
 extern void copy_init_pkru_to_fpregs(void);
 
+static inline int vma_pkey(struct vm_area_struct *vma)
+{
+	unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 |
+				      VM_PKEY_BIT2 | VM_PKEY_BIT3;
+
+	return (vma->vm_flags & vma_pkey_mask) >> VM_PKEY_SHIFT;
+}
+
 #endif /*_ASM_X86_PKEYS_H */
diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h
index aad54663763b7..946cb773b79f1 100644
--- a/include/linux/pkeys.h
+++ b/include/linux/pkeys.h
@@ -2,7 +2,7 @@
 #ifndef _LINUX_PKEYS_H
 #define _LINUX_PKEYS_H
 
-#include <linux/mm_types.h>
+#include <linux/mm.h>
 
 #ifdef CONFIG_ARCH_HAS_PKEYS
 #include <asm/pkeys.h>

From 3f36c94239658a79a4deeb84a2e0ebf005f083fc Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Fri, 13 Apr 2018 23:54:36 +1000
Subject: [PATCH 026/251] x86/pkeys: Add arch_pkeys_enabled()

This will be used in future patches to check for arch support for
pkeys in generic code.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
---
 arch/x86/include/asm/pkeys.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index 0e5f749158e45..c1957f8f7c1be 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -7,6 +7,11 @@
 extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 		unsigned long init_val);
 
+static inline bool arch_pkeys_enabled(void)
+{
+	return boot_cpu_has(X86_FEATURE_OSPKE);
+}
+
 /*
  * Try to dedicate one of the protection keys to be used as an
  * execute-only protection key.

From 4414ef9536c398ebc4e1443d59fbae8fea53732b Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Fri, 13 Apr 2018 23:54:52 +1000
Subject: [PATCH 027/251] mm/pkeys: Add an empty arch_pkeys_enabled()

Add an empty arch_pkeys_enabled() in linux/pkeys.h for the
CONFIG_ARCH_HAS_PKEYS=n case.

Split out of a patch by Ram Pai <linuxram@us.ibm.com>.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
---
 include/linux/pkeys.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h
index 946cb773b79f1..2955ba9760489 100644
--- a/include/linux/pkeys.h
+++ b/include/linux/pkeys.h
@@ -39,6 +39,11 @@ static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 	return 0;
 }
 
+static inline bool arch_pkeys_enabled(void)
+{
+	return false;
+}
+
 static inline void copy_init_pkru_to_fpregs(void)
 {
 }

From 27cca866e3fce0961e13b38b87e5322fe153e437 Mon Sep 17 00:00:00 2001
From: Ram Pai <linuxram@us.ibm.com>
Date: Fri, 13 Apr 2018 23:55:07 +1000
Subject: [PATCH 028/251] mm/pkeys, x86, powerpc: Display pkey in smaps if arch
 supports pkeys

Currently the architecture specific code is expected to display the
protection keys in smap for a given vma. This can lead to redundant
code and possibly to divergent formats in which the key gets
displayed.

This patch changes the implementation. It displays the pkey only if
the architecture support pkeys, i.e arch_pkeys_enabled() returns true.

x86 arch_show_smap() function is not needed anymore, delete it.

Signed-off-by: Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
[mpe: Split out from larger patch, rebased on header changes]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
---
 arch/x86/kernel/setup.c | 8 --------
 fs/proc/task_mmu.c      | 8 +++-----
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 5c623dfe39d15..2f86d883dd950 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1312,11 +1312,3 @@ static int __init register_kernel_offset_dumper(void)
 	return 0;
 }
 __initcall(register_kernel_offset_dumper);
-
-void arch_show_smap(struct seq_file *m, struct vm_area_struct *vma)
-{
-	if (!boot_cpu_has(X86_FEATURE_OSPKE))
-		return;
-
-	seq_printf(m, "ProtectionKey:  %8u\n", vma_pkey(vma));
-}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index c2163606e6fba..93cea7b07a80d 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -18,6 +18,7 @@
 #include <linux/page_idle.h>
 #include <linux/shmem_fs.h>
 #include <linux/uaccess.h>
+#include <linux/pkeys.h>
 
 #include <asm/elf.h>
 #include <asm/tlb.h>
@@ -730,10 +731,6 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
 }
 #endif /* HUGETLB_PAGE */
 
-void __weak arch_show_smap(struct seq_file *m, struct vm_area_struct *vma)
-{
-}
-
 #define SEQ_PUT_DEC(str, val) \
 		seq_put_decimal_ull_width(m, str, (val) >> 10, 8)
 static int show_smap(struct seq_file *m, void *v, int is_pid)
@@ -838,7 +835,8 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
 		seq_puts(m, " kB\n");
 	}
 	if (!rollup_mode) {
-		arch_show_smap(m, vma);
+		if (arch_pkeys_enabled())
+			seq_printf(m, "ProtectionKey:  %8u\n", vma_pkey(vma));
 		show_smap_vma_flags(m, vma);
 	}
 	m_cache_vma(m, vma);

From d86a4ba02fc6bea4670b2f59b208152a4a34c3c0 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 10 May 2018 19:46:10 +1000
Subject: [PATCH 029/251] powerpc/pkeys: Drop private VM_PKEY definitions

Now that we've updated the generic headers to support 5 PKEY bits for
powerpc we don't need our own #defines in arch code.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/pkeys.h | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h
index 18ef59a9886d7..5ba80cffb5054 100644
--- a/arch/powerpc/include/asm/pkeys.h
+++ b/arch/powerpc/include/asm/pkeys.h
@@ -15,21 +15,6 @@ DECLARE_STATIC_KEY_TRUE(pkey_disabled);
 extern int pkeys_total; /* total pkeys as per device tree */
 extern u32 initial_allocation_mask; /* bits set for reserved keys */
 
-/*
- * Define these here temporarily so we're not dependent on patching linux/mm.h.
- * Once it's updated we can drop these.
- */
-#ifndef VM_PKEY_BIT0
-# define VM_PKEY_SHIFT	VM_HIGH_ARCH_BIT_0
-# define VM_PKEY_BIT0	VM_HIGH_ARCH_0
-# define VM_PKEY_BIT1	VM_HIGH_ARCH_1
-# define VM_PKEY_BIT2	VM_HIGH_ARCH_2
-# define VM_PKEY_BIT3	VM_HIGH_ARCH_3
-# define VM_PKEY_BIT4	VM_HIGH_ARCH_4
-#elif !defined(VM_PKEY_BIT4)
-# define VM_PKEY_BIT4	VM_HIGH_ARCH_4
-#endif
-
 #define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | \
 			    VM_PKEY_BIT3 | VM_PKEY_BIT4)
 

From 8f2133cc0e1f9718f3e2d39b7587f4d6c9649a54 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 7 May 2018 23:03:55 +1000
Subject: [PATCH 030/251] powerpc/pseries: hcall_exit tracepoint retval should
 be signed

The hcall_exit() tracepoint has retval defined as unsigned long. That
leads to humours results like:

  bash-3686  [009] d..2   854.134094: hcall_entry: opcode=24
  bash-3686  [009] d..2   854.134095: hcall_exit: opcode=24 retval=18446744073709551609

It's normal for some hcalls to return negative values, displaying them
as unsigned isn't very helpful. So change it to signed.

  bash-3711  [001] d..2   471.691008: hcall_entry: opcode=24
  bash-3711  [001] d..2   471.691008: hcall_exit: opcode=24 retval=-7

Which can be more easily compared to H_NOT_FOUND in hvcall.h

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Tested-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
---
 arch/powerpc/include/asm/asm-prototypes.h    | 3 +--
 arch/powerpc/include/asm/trace.h             | 7 +++----
 arch/powerpc/platforms/pseries/hvCall_inst.c | 2 +-
 arch/powerpc/platforms/pseries/lpar.c        | 3 +--
 4 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index d9713ad62e3ca..068760d61e7eb 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -36,8 +36,7 @@ void kexec_copy_flush(struct kimage *image);
 /* pseries hcall tracing */
 extern struct static_key hcall_tracepoint_key;
 void __trace_hcall_entry(unsigned long opcode, unsigned long *args);
-void __trace_hcall_exit(long opcode, unsigned long retval,
-			unsigned long *retbuf);
+void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf);
 /* OPAL tracing */
 #ifdef HAVE_JUMP_LABEL
 extern struct static_key opal_tracepoint_key;
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
index 33f3b479138bc..d018e86026944 100644
--- a/arch/powerpc/include/asm/trace.h
+++ b/arch/powerpc/include/asm/trace.h
@@ -81,8 +81,7 @@ TRACE_EVENT_FN_COND(hcall_entry,
 
 TRACE_EVENT_FN_COND(hcall_exit,
 
-	TP_PROTO(unsigned long opcode, unsigned long retval,
-		unsigned long *retbuf),
+	TP_PROTO(unsigned long opcode, long retval, unsigned long *retbuf),
 
 	TP_ARGS(opcode, retval, retbuf),
 
@@ -90,7 +89,7 @@ TRACE_EVENT_FN_COND(hcall_exit,
 
 	TP_STRUCT__entry(
 		__field(unsigned long, opcode)
-		__field(unsigned long, retval)
+		__field(long, retval)
 	),
 
 	TP_fast_assign(
@@ -98,7 +97,7 @@ TRACE_EVENT_FN_COND(hcall_exit,
 		__entry->retval = retval;
 	),
 
-	TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval),
+	TP_printk("opcode=%lu retval=%ld", __entry->opcode, __entry->retval),
 
 	hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
 );
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index 89b7ce807e70c..6da320c786cd7 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -125,7 +125,7 @@ static void probe_hcall_entry(void *ignored, unsigned long opcode, unsigned long
 	h->purr_start = mfspr(SPRN_PURR);
 }
 
-static void probe_hcall_exit(void *ignored, unsigned long opcode, unsigned long retval,
+static void probe_hcall_exit(void *ignored, unsigned long opcode, long retval,
 			     unsigned long *retbuf)
 {
 	struct hcall_stats *h;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index adb996ed51e13..5a392e40f3d29 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -902,8 +902,7 @@ void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
 	local_irq_restore(flags);
 }
 
-void __trace_hcall_exit(long opcode, unsigned long retval,
-			unsigned long *retbuf)
+void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf)
 {
 	unsigned long flags;
 	unsigned int *depth;

From 7c18659dd498b25c6651ba83d4267ba7f9458c9c Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Sun, 6 May 2018 13:23:46 +0200
Subject: [PATCH 031/251] powerpc/watchdog: fix typo 'can by' to 'can be'

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/watchdog.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 6256dc3b0087d..591f7c3af4ffc 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -64,7 +64,7 @@
  * means the CPU(s) with their bit still set in the pending mask have had
  * their heartbeat stop, and action is taken.
  *
- * Some platforms implement true NMI IPIs, which can by used by the SMP
+ * Some platforms implement true NMI IPIs, which can be used by the SMP
  * watchdog to detect an unresponsive CPU and pull it out of its stuck
  * state with the NMI IPI, to get crash/debug data from it. This way the
  * SMP watchdog can detect hardware interrupts off lockups.

From a84813f5bf2948c2a2947b0d68456204128ae28a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Mon, 7 May 2018 16:20:16 +0200
Subject: [PATCH 032/251] powerpc: wii_defconfig: Disable Ethernet driver
 support code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Wii doesn't have built-in Ethernet and USB Ethernet adapters are in
a different menu. Disable CONFIG_ETHERNET to save some space in support
code for Ethernet drivers.

Note that this patch doesn't disable any Ethernet drivers, because they
are not enabled by default.

Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/configs/wii_defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig
index 0b0f78823a1bf..3167b9d7f3e54 100644
--- a/arch/powerpc/configs/wii_defconfig
+++ b/arch/powerpc/configs/wii_defconfig
@@ -49,6 +49,7 @@ CONFIG_BLK_DEV_RAM_COUNT=2
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_NETDEVICES=y
+# CONFIG_ETHERNET is not set
 CONFIG_B43=y
 CONFIG_B43_SDIO=y
 # CONFIG_B43_PHY_LP is not set

From d75f98dd24d24fc92ccd84d64080fc1723bcfb17 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Mon, 7 May 2018 16:20:17 +0200
Subject: [PATCH 033/251] powerpc: wii_defconfig: Enable GPIO-related options
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that there's a GPIO driver for the Wii, let's enable the following
drivers:

- the GPIO driver itself
- gpio-keys
- gpio-poweroff
- gpio-leds and a few LED triggers

Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/configs/wii_defconfig | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig
index 3167b9d7f3e54..c5933a38e5adf 100644
--- a/arch/powerpc/configs/wii_defconfig
+++ b/arch/powerpc/configs/wii_defconfig
@@ -58,6 +58,7 @@ CONFIG_INPUT_FF_MEMLESS=m
 CONFIG_INPUT_JOYDEV=y
 CONFIG_INPUT_EVDEV=y
 # CONFIG_KEYBOARD_ATKBD is not set
+CONFIG_KEYBOARD_GPIO=y
 # CONFIG_MOUSE_PS2 is not set
 CONFIG_INPUT_JOYSTICK=y
 CONFIG_INPUT_MISC=y
@@ -72,6 +73,9 @@ CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_GPIO=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
+CONFIG_GPIO_HLWD=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_GPIO=y
 # CONFIG_HWMON is not set
 CONFIG_SSB_DEBUG=y
 CONFIG_FB=y
@@ -89,6 +93,12 @@ CONFIG_HID_APPLE=m
 CONFIG_HID_WACOM=m
 CONFIG_MMC=y
 CONFIG_MMC_SDHCI=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+CONFIG_LEDS_TRIGGER_PANIC=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_GENERIC=y
 CONFIG_EXT2_FS=y

From 1a6d51f4994c0d02b749848740ce5423482750be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Mon, 7 May 2018 16:20:18 +0200
Subject: [PATCH 034/251] powerpc: wii_defconfig: Enable Wii SDHCI driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This allows access to the SD card and the BCM4318 Wifi module.

Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/configs/wii_defconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig
index c5933a38e5adf..a674dd420f0fc 100644
--- a/arch/powerpc/configs/wii_defconfig
+++ b/arch/powerpc/configs/wii_defconfig
@@ -93,6 +93,8 @@ CONFIG_HID_APPLE=m
 CONFIG_HID_WACOM=m
 CONFIG_MMC=y
 CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_SDHCI_OF_HLWD=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
 CONFIG_LEDS_GPIO=y

From c4c19684fa0aec4a4902b7f12c8b5b597600b41d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Mon, 7 May 2018 16:20:19 +0200
Subject: [PATCH 035/251] powerpc: wii_defconfig: Disable BCMA support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The B43 driver only needs CONFIG_SSB to support the WLAN card found in
the Wii. Configure it accordingly, and disable BCMA bus support to save
a bit of space.

Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/configs/wii_defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig
index a674dd420f0fc..10940533da716 100644
--- a/arch/powerpc/configs/wii_defconfig
+++ b/arch/powerpc/configs/wii_defconfig
@@ -51,6 +51,7 @@ CONFIG_BLK_DEV_SD=y
 CONFIG_NETDEVICES=y
 # CONFIG_ETHERNET is not set
 CONFIG_B43=y
+CONFIG_B43_BUSES_SSB=y
 CONFIG_B43_SDIO=y
 # CONFIG_B43_PHY_LP is not set
 CONFIG_B43_DEBUG=y

From 9a018fb1e14704fef39db05730599f42cb4672ac Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Mon, 30 Apr 2018 20:31:50 +1000
Subject: [PATCH 036/251] powerpc/config: powernv_defconfig updates

For consideration:

* Add NVDIMM support - Enables greater testing, mambo device.
* Add IPv6 support built in + additional modules - Because it's 2018 maan.
* Add DEFERRED_STRUCT_PAGE_INIT - Let's see what breaks.
* Add PPC_MEMTRACE - Small powernv debugfs driver for getting hardware traces.
* Add MEMORY_FAILURE - Machine check exceptions can now drive memory failure.
* Turn on FANOTIFY - This is the current filesystem notification feature.
* Turn on SCOM_DEBUGFS - Handy for hardware/firmware debugging, security risk?
* Turn on async SCSI scanning - Let's see what breaks.
* Add MLX5 driver as a module - Popular demand.
* Add CRYPTO_CRCT10DIF_VPMSUM - POWER8 T10DIF acceleration.

* Make a bunch of USB hid drivers modules.
* Make SCSI SG, SR, and FC modules - FC is huge.
* Make video drivers except AST GPU modules - Also huge.
* Make PCI serial driver a module - Uncommon.
* Make more things modules, NFS FS, RAM disk, netconsole, MS-DOS fs.

* Get rid of /dev/port - Not used.
* Remove PPS and PTP subsystms - Unusual.
* Remove legacy BSD ttys - Long dead.
* Remove IDE - Deprecated and replaced with ATA.
* Remove WIRELESS - Until we get POWER9 laptops.
* Remove RAW - Long deprecated in favour of direct IO.
* Remove floppy, parport, and PS2 input devices - not supported.
* Remove virtio drivers, ballooning - We're host only.
* Remove PPP - Sorry Paulus.

This results in a significantly smaller vmlinux:

    text      data       bss        dec   filename
13143383   5277944   1317856   19739183   vanilla
12263281   4852074   1341720   18457075   patched

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/configs/powernv_defconfig | 108 +++++++++++++++----------
 1 file changed, 65 insertions(+), 43 deletions(-)

diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index 9e92aa6a52bab..6ab34e60495f4 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -38,7 +38,9 @@ CONFIG_MODULE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
 CONFIG_PARTITION_ADVANCED=y
+CONFIG_SCOM_DEBUGFS=y
 CONFIG_OPAL_PRD=y
+CONFIG_PPC_MEMTRACE=y
 # CONFIG_PPC_PSERIES is not set
 # CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
 CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
@@ -54,7 +56,10 @@ CONFIG_NUMA=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_KSM=y
+CONFIG_MEMORY_FAILURE=y
+CONFIG_HWPOISON_INJECT=m
 CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
 CONFIG_PPC_64K_PAGES=y
 CONFIG_PPC_SUBPAGE_PROT=y
 CONFIG_SCHED_SMT=y
@@ -72,7 +77,13 @@ CONFIG_SYN_COOKIES=y
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
-# CONFIG_IPV6 is not set
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_IPV6_SIT=m
 CONFIG_NETFILTER=y
 # CONFIG_NETFILTER_ADVANCED is not set
 CONFIG_BRIDGE=m
@@ -81,33 +92,28 @@ CONFIG_NET_SCHED=y
 CONFIG_NET_CLS_BPF=m
 CONFIG_NET_CLS_ACT=y
 CONFIG_NET_ACT_BPF=m
+CONFIG_DNS_RESOLVER=y
 CONFIG_BPF_JIT=y
+# CONFIG_WIRELESS is not set
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_MTD=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_POWERNV_FLASH=y
-CONFIG_PARPORT=m
-CONFIG_PARPORT_PC=m
-CONFIG_BLK_DEV_FD=m
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM=m
 CONFIG_BLK_DEV_RAM_SIZE=65536
-CONFIG_VIRTIO_BLK=m
 CONFIG_BLK_DEV_NVME=y
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECD=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_AMD74XX=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_CHR_DEV_ST=m
-CONFIG_BLK_DEV_SR=y
+CONFIG_BLK_DEV_SR=m
 CONFIG_BLK_DEV_SR_VENDOR=y
-CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_SG=m
 CONFIG_SCSI_CONSTANTS=y
-CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_SCAN_ASYNC=y
+CONFIG_SCSI_FC_ATTRS=m
 CONFIG_SCSI_SRP_ATTRS=y
 CONFIG_SCSI_CXGB3_ISCSI=m
 CONFIG_SCSI_CXGB4_ISCSI=m
@@ -121,7 +127,6 @@ CONFIG_SCSI_IPR=y
 CONFIG_SCSI_QLA_FC=m
 CONFIG_SCSI_QLA_ISCSI=m
 CONFIG_SCSI_LPFC=m
-CONFIG_SCSI_VIRTIO=m
 CONFIG_SCSI_DH=y
 CONFIG_SCSI_DH_RDAC=m
 CONFIG_SCSI_DH_ALUA=m
@@ -152,16 +157,16 @@ CONFIG_DUMMY=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_VXLAN=m
-CONFIG_NETCONSOLE=y
+CONFIG_NETCONSOLE=m
 CONFIG_TUN=m
 CONFIG_VETH=m
-CONFIG_VIRTIO_NET=m
 CONFIG_VORTEX=m
 CONFIG_ACENIC=m
 CONFIG_ACENIC_OMIT_TIGON_I=y
 CONFIG_PCNET32=m
 CONFIG_TIGON3=y
 CONFIG_BNX2X=m
+# CONFIG_CAVIUM_PTP is not set
 CONFIG_CHELSIO_T1=m
 CONFIG_BE2NET=m
 CONFIG_S2IO=m
@@ -172,46 +177,62 @@ CONFIG_IXGB=m
 CONFIG_IXGBE=m
 CONFIG_I40E=m
 CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_FPGA=y
+CONFIG_MLX5_CORE_EN=y
+CONFIG_MLX5_CORE_IPOIB=y
 CONFIG_MYRI10GE=m
 CONFIG_QLGE=m
 CONFIG_NETXEN_NIC=m
-CONFIG_PPP=m
-CONFIG_PPP_BSDCOMP=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPPOE=m
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
+CONFIG_USB_NET_DRIVERS=m
+# CONFIG_WLAN is not set
 CONFIG_INPUT_EVDEV=m
-CONFIG_INPUT_MISC=y
-# CONFIG_SERIO_SERPORT is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_PCI=m
 CONFIG_SERIAL_JSM=m
-CONFIG_VIRTIO_CONSOLE=m
 CONFIG_IPMI_HANDLER=y
 CONFIG_IPMI_DEVICE_INTERFACE=y
 CONFIG_IPMI_POWERNV=y
-CONFIG_RAW_DRIVER=y
-CONFIG_MAX_RAW_DEVS=1024
+# CONFIG_DEVPORT is not set
 CONFIG_I2C_CHARDEV=y
+# CONFIG_PTP_1588_CLOCK is not set
 CONFIG_DRM=y
 CONFIG_DRM_AST=y
 CONFIG_FIRMWARE_EDID=y
 CONFIG_FB_OF=y
-CONFIG_FB_MATROX=y
+CONFIG_FB_MATROX=m
 CONFIG_FB_MATROX_MILLENIUM=y
 CONFIG_FB_MATROX_MYSTIQUE=y
 CONFIG_FB_MATROX_G=y
-CONFIG_FB_RADEON=y
-CONFIG_FB_IBM_GXT4500=y
+CONFIG_FB_RADEON=m
+CONFIG_FB_IBM_GXT4500=m
 CONFIG_LCD_PLATFORM=m
+CONFIG_BACKLIGHT_GENERIC=m
 # CONFIG_VGA_CONSOLE is not set
 CONFIG_LOGO=y
-CONFIG_HID_GYRATION=y
-CONFIG_HID_PANTHERLORD=y
-CONFIG_HID_PETALYNX=y
-CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SUNPLUS=y
+CONFIG_HID_A4TECH=m
+CONFIG_HID_APPLE=m
+CONFIG_HID_BELKIN=m
+CONFIG_HID_CHERRY=m
+CONFIG_HID_CHICONY=m
+CONFIG_HID_CYPRESS=m
+CONFIG_HID_EZKEY=m
+CONFIG_HID_GYRATION=m
+CONFIG_HID_ITE=m
+CONFIG_HID_KENSINGTON=m
+CONFIG_HID_LOGITECH=m
+CONFIG_HID_MICROSOFT=m
+CONFIG_HID_MONTEREY=m
+CONFIG_HID_PANTHERLORD=m
+CONFIG_HID_PETALYNX=m
+CONFIG_HID_SAMSUNG=m
+CONFIG_HID_SUNPLUS=m
+CONFIG_USB_HID=m
 CONFIG_USB_HIDDEV=y
 CONFIG_USB=y
 CONFIG_USB_MON=m
@@ -219,6 +240,7 @@ CONFIG_USB_XHCI_HCD=y
 CONFIG_USB_EHCI_HCD=y
 # CONFIG_USB_EHCI_HCD_PPC_OF is not set
 CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PCI=m
 CONFIG_USB_STORAGE=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=m
@@ -236,8 +258,9 @@ CONFIG_INFINIBAND_SRP=m
 CONFIG_INFINIBAND_ISER=m
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_GENERIC=y
-CONFIG_VIRTIO_PCI=m
-CONFIG_VIRTIO_BALLOON=m
+# CONFIG_VIRTIO_MENU is not set
+CONFIG_LIBNVDIMM=y
+# CONFIG_ND_BLK is not set
 CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
@@ -253,12 +276,13 @@ CONFIG_XFS_POSIX_ACL=y
 CONFIG_BTRFS_FS=m
 CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_NILFS2_FS=m
+CONFIG_FANOTIFY=y
 CONFIG_AUTOFS4_FS=m
 CONFIG_FUSE_FS=m
 CONFIG_OVERLAY_FS=m
 CONFIG_ISO9660_FS=y
 CONFIG_UDF_FS=m
-CONFIG_MSDOS_FS=y
+CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
@@ -270,9 +294,9 @@ CONFIG_SQUASHFS_XATTR=y
 CONFIG_SQUASHFS_LZO=y
 CONFIG_SQUASHFS_XZ=y
 CONFIG_PSTORE=y
-CONFIG_NFS_FS=y
+CONFIG_NFS_FS=m
 CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=y
+CONFIG_NFS_V4=m
 CONFIG_NFSD=m
 CONFIG_NFSD_V3_ACL=y
 CONFIG_NFSD_V4=y
@@ -291,9 +315,7 @@ CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_SOFTLOCKUP_DETECTOR=y
 CONFIG_HARDLOCKUP_DETECTOR=y
 CONFIG_LATENCYTOP=y
-CONFIG_FTRACE=y
 CONFIG_FUNCTION_TRACER=y
-CONFIG_FUNCTION_GRAPH_TRACER=y
 CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_BLK_DEV_IO_TRACE=y
@@ -303,10 +325,10 @@ CONFIG_FTR_FIXUP_SELFTEST=y
 CONFIG_MSI_BITMAP_SELFTEST=y
 CONFIG_XMON=y
 CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_HMAC=y
 CONFIG_CRYPTO_CRC32C_VPMSUM=m
+CONFIG_CRYPTO_CRCT10DIF_VPMSUM=m
 CONFIG_CRYPTO_MD5_PPC=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_SHA1_PPC=m

From d2b04b0c78881ef1c051cc3a4a2c61b7bf91aa53 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@kaod.org>
Date: Tue, 8 May 2018 09:05:14 +0200
Subject: [PATCH 037/251] powerpc/64/kexec: fix race in kexec when XIVE is
 shutdown
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The kexec_state KEXEC_STATE_IRQS_OFF barrier is reached by all
secondary CPUs before the kexec_cpu_down() operation is called on
secondaries. This can raise conflicts and provoque errors in the XIVE
hcalls when XIVE is shutdown with H_INT_RESET on the primary CPU.

To synchronize the kexec_cpu_down() operations and make sure the
secondaries have completed their task before the primary starts doing
the same, let's move the primary kexec_cpu_down() after the
KEXEC_STATE_REAL_MODE barrier.

This change of the ending sequence of kexec is mostly useful on the
pseries platform but it impacts also the powernv, ps3 and 85xx
platforms. powernv can be easily tested and fixed but some caution is
required for the other two.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/machine_kexec_64.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 1044bf15d5eda..a0f6f45005bd4 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -231,16 +231,16 @@ static void kexec_prepare_cpus(void)
 	/* we are sure every CPU has IRQs off at this point */
 	kexec_all_irq_disabled = 1;
 
-	/* after we tell the others to go down */
-	if (ppc_md.kexec_cpu_down)
-		ppc_md.kexec_cpu_down(0, 0);
-
 	/*
 	 * Before removing MMU mappings make sure all CPUs have entered real
 	 * mode:
 	 */
 	kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE);
 
+	/* after we tell the others to go down */
+	if (ppc_md.kexec_cpu_down)
+		ppc_md.kexec_cpu_down(0, 0);
+
 	put_cpu();
 }
 

From 028555a590d647eb75b975b05dacc35748626469 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@kaod.org>
Date: Tue, 8 May 2018 09:05:15 +0200
Subject: [PATCH 038/251] powerpc/xive: fix hcall H_INT_RESET to support long
 busy delays
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The hcall H_INT_RESET can take some time to complete and in such cases
it returns H_LONG_BUSY_* codes requiring the machine to sleep for a
while before retrying.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/sysdev/xive/spapr.c | 52 +++++++++++++++++++++++++++++---
 1 file changed, 47 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
index 091f1d0d0af19..864de3a733c7f 100644
--- a/arch/powerpc/sysdev/xive/spapr.c
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -19,6 +19,7 @@
 #include <linux/spinlock.h>
 #include <linux/cpumask.h>
 #include <linux/mm.h>
+#include <linux/delay.h>
 
 #include <asm/prom.h>
 #include <asm/io.h>
@@ -108,6 +109,51 @@ static void xive_irq_bitmap_free(int irq)
 	}
 }
 
+
+/* Based on the similar routines in RTAS */
+static unsigned int plpar_busy_delay_time(long rc)
+{
+	unsigned int ms = 0;
+
+	if (H_IS_LONG_BUSY(rc)) {
+		ms = get_longbusy_msecs(rc);
+	} else if (rc == H_BUSY) {
+		ms = 10; /* seems appropriate for XIVE hcalls */
+	}
+
+	return ms;
+}
+
+static unsigned int plpar_busy_delay(int rc)
+{
+	unsigned int ms;
+
+	ms = plpar_busy_delay_time(rc);
+	if (ms)
+		mdelay(ms);
+
+	return ms;
+}
+
+/*
+ * Note: this call has a partition wide scope and can take a while to
+ * complete. If it returns H_LONG_BUSY_* it should be retried
+ * periodically.
+ */
+static long plpar_int_reset(unsigned long flags)
+{
+	long rc;
+
+	do {
+		rc = plpar_hcall_norets(H_INT_RESET, flags);
+	} while (plpar_busy_delay(rc));
+
+	if (rc)
+		pr_err("H_INT_RESET failed %ld\n", rc);
+
+	return rc;
+}
+
 static long plpar_int_get_source_info(unsigned long flags,
 				      unsigned long lisn,
 				      unsigned long *src_flags,
@@ -445,11 +491,7 @@ static void xive_spapr_put_ipi(unsigned int cpu, struct xive_cpu *xc)
 
 static void xive_spapr_shutdown(void)
 {
-	long rc;
-
-	rc = plpar_hcall_norets(H_INT_RESET, 0);
-	if (rc)
-		pr_err("H_INT_RESET failed %ld\n", rc);
+	plpar_int_reset(0);
 }
 
 /*

From 72224846fb1f066cc04dc0a38d910221b5719bdf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@kaod.org>
Date: Tue, 8 May 2018 09:05:16 +0200
Subject: [PATCH 039/251] powerpc/xive: shutdown XIVE when kexec or kdump is
 performed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The hcall H_INT_RESET should be called to make sure XIVE is fully
reseted.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/pseries/kexec.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c
index 3fe1267969754..46fbaef69a59d 100644
--- a/arch/powerpc/platforms/pseries/kexec.c
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -57,8 +57,11 @@ void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
 		}
 	}
 
-	if (xive_enabled())
+	if (xive_enabled()) {
 		xive_kexec_teardown_cpu(secondary);
-	else
+
+		if (!secondary)
+			xive_shutdown();
+	} else
 		xics_kexec_teardown_cpu(secondary);
 }

From 282498d65fb4740c2e9212a79c6888887d0be6cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@kaod.org>
Date: Tue, 8 May 2018 09:05:17 +0200
Subject: [PATCH 040/251] powerpc/xive: prepare all hcalls to support long busy
 delays
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is not the case for the moment, but future releases of pHyp might
need to introduce some synchronisation routines under the hood which
would make the XIVE hcalls longer to complete.

As this was done for H_INT_RESET, let's wrap the other hcalls in a
loop catching the H_LONG_BUSY_* codes.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/sysdev/xive/spapr.c | 36 +++++++++++++++++++++++++-------
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
index 864de3a733c7f..575db3b06a6b8 100644
--- a/arch/powerpc/sysdev/xive/spapr.c
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -164,7 +164,10 @@ static long plpar_int_get_source_info(unsigned long flags,
 	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
 	long rc;
 
-	rc = plpar_hcall(H_INT_GET_SOURCE_INFO, retbuf, flags, lisn);
+	do {
+		rc = plpar_hcall(H_INT_GET_SOURCE_INFO, retbuf, flags, lisn);
+	} while (plpar_busy_delay(rc));
+
 	if (rc) {
 		pr_err("H_INT_GET_SOURCE_INFO lisn=%ld failed %ld\n", lisn, rc);
 		return rc;
@@ -197,8 +200,11 @@ static long plpar_int_set_source_config(unsigned long flags,
 		flags, lisn, target, prio, sw_irq);
 
 
-	rc = plpar_hcall_norets(H_INT_SET_SOURCE_CONFIG, flags, lisn,
-				target, prio, sw_irq);
+	do {
+		rc = plpar_hcall_norets(H_INT_SET_SOURCE_CONFIG, flags, lisn,
+					target, prio, sw_irq);
+	} while (plpar_busy_delay(rc));
+
 	if (rc) {
 		pr_err("H_INT_SET_SOURCE_CONFIG lisn=%ld target=%lx prio=%lx failed %ld\n",
 		       lisn, target, prio, rc);
@@ -217,7 +223,11 @@ static long plpar_int_get_queue_info(unsigned long flags,
 	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
 	long rc;
 
-	rc = plpar_hcall(H_INT_GET_QUEUE_INFO, retbuf, flags, target, priority);
+	do {
+		rc = plpar_hcall(H_INT_GET_QUEUE_INFO, retbuf, flags, target,
+				 priority);
+	} while (plpar_busy_delay(rc));
+
 	if (rc) {
 		pr_err("H_INT_GET_QUEUE_INFO cpu=%ld prio=%ld failed %ld\n",
 		       target, priority, rc);
@@ -246,8 +256,11 @@ static long plpar_int_set_queue_config(unsigned long flags,
 	pr_devel("H_INT_SET_QUEUE_CONFIG flags=%lx target=%lx priority=%lx qpage=%lx qsize=%lx\n",
 		flags,  target, priority, qpage, qsize);
 
-	rc = plpar_hcall_norets(H_INT_SET_QUEUE_CONFIG, flags, target,
-				priority, qpage, qsize);
+	do {
+		rc = plpar_hcall_norets(H_INT_SET_QUEUE_CONFIG, flags, target,
+					priority, qpage, qsize);
+	} while (plpar_busy_delay(rc));
+
 	if (rc) {
 		pr_err("H_INT_SET_QUEUE_CONFIG cpu=%ld prio=%ld qpage=%lx returned %ld\n",
 		       target, priority, qpage, rc);
@@ -261,7 +274,10 @@ static long plpar_int_sync(unsigned long flags, unsigned long lisn)
 {
 	long rc;
 
-	rc = plpar_hcall_norets(H_INT_SYNC, flags, lisn);
+	do {
+		rc = plpar_hcall_norets(H_INT_SYNC, flags, lisn);
+	} while (plpar_busy_delay(rc));
+
 	if (rc) {
 		pr_err("H_INT_SYNC lisn=%ld returned %ld\n", lisn, rc);
 		return  rc;
@@ -284,7 +300,11 @@ static long plpar_int_esb(unsigned long flags,
 	pr_devel("H_INT_ESB flags=%lx lisn=%lx offset=%lx in=%lx\n",
 		flags,  lisn, offset, in_data);
 
-	rc = plpar_hcall(H_INT_ESB, retbuf, flags, lisn, offset, in_data);
+	do {
+		rc = plpar_hcall(H_INT_ESB, retbuf, flags, lisn, offset,
+				 in_data);
+	} while (plpar_busy_delay(rc));
+
 	if (rc) {
 		pr_err("H_INT_ESB lisn=%ld offset=%ld returned %ld\n",
 		       lisn, offset, rc);

From 5a951c4e7e8df5d6df52bace1b4ff327885584d6 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 5 May 2018 17:25:59 +1000
Subject: [PATCH 041/251] powerpc/watchdog: don't update the watchdog timestamp
 if a lockup is detected

The watchdog heartbeat timestamp is updated when the local heartbeat
timer fires (or touch_nmi_watchdog() is called).

This is an interesting data point, so don't overwrite it when the
soft-NMI interrupt detects a hard lockup. That code came from a pre-
merge version to prevent hard lockup messages flood, but that's taken
care of with the stuck CPU logic now, so there is no reason to
update the heartbeat timestamp here.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/watchdog.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 591f7c3af4ffc..b2d3bdff03aa5 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -245,8 +245,6 @@ void soft_nmi_interrupt(struct pt_regs *regs)
 
 	tb = get_tb();
 	if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) {
-		per_cpu(wd_timer_tb, cpu) = tb;
-
 		wd_smp_lock(&flags);
 		if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) {
 			wd_smp_unlock(&flags);

From 4e49226ea8e1810d2c14d9e92a290bad239b512b Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 5 May 2018 17:26:00 +1000
Subject: [PATCH 042/251] powerpc/watchdog: provide more data in watchdog
 messages

Provide timebase and timebase of last heartbeat in watchdog lockup
messages. Also provide a stack trace of when a CPU becomes un-stuck,
which can be useful -- it could be where irqs are re-enabled, so it
may be the end of the critical section which is responsible for the
latency which is useful information.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/watchdog.c | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index b2d3bdff03aa5..1d82274f7e9f7 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -111,7 +111,13 @@ static inline void wd_smp_unlock(unsigned long *flags)
 
 static void wd_lockup_ipi(struct pt_regs *regs)
 {
-	pr_emerg("CPU %d Hard LOCKUP\n", raw_smp_processor_id());
+	int cpu = raw_smp_processor_id();
+	u64 tb = get_tb();
+
+	pr_emerg("CPU %d Hard LOCKUP\n", cpu);
+	pr_emerg("CPU %d TB:%lld, last heartbeat TB:%lld (%lldms ago)\n",
+		 cpu, tb, per_cpu(wd_timer_tb, cpu),
+		 tb_to_ns(tb - per_cpu(wd_timer_tb, cpu)) / 1000000);
 	print_modules();
 	print_irqtrace_events(current);
 	if (regs)
@@ -154,6 +160,9 @@ static void watchdog_smp_panic(int cpu, u64 tb)
 
 	pr_emerg("CPU %d detected hard LOCKUP on other CPUs %*pbl\n",
 		 cpu, cpumask_pr_args(&wd_smp_cpus_pending));
+	pr_emerg("CPU %d TB:%lld, last SMP heartbeat TB:%lld (%lldms ago)\n",
+		 cpu, tb, wd_smp_last_reset_tb,
+		 tb_to_ns(tb - wd_smp_last_reset_tb) / 1000000);
 
 	if (!sysctl_hardlockup_all_cpu_backtrace) {
 		/*
@@ -194,10 +203,19 @@ static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
 {
 	if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) {
 		if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) {
+			struct pt_regs *regs = get_irq_regs();
 			unsigned long flags;
 
-			pr_emerg("CPU %d became unstuck\n", cpu);
 			wd_smp_lock(&flags);
+
+			pr_emerg("CPU %d became unstuck TB:%lld\n",
+				 cpu, tb);
+			print_irqtrace_events(current);
+			if (regs)
+				show_regs(regs);
+			else
+				dump_stack();
+
 			cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
 			wd_smp_unlock(&flags);
 		}
@@ -252,7 +270,11 @@ void soft_nmi_interrupt(struct pt_regs *regs)
 		}
 		set_cpu_stuck(cpu, tb);
 
-		pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n", cpu, (void *)regs->nip);
+		pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n",
+			 cpu, (void *)regs->nip);
+		pr_emerg("CPU %d TB:%lld, last heartbeat TB:%lld (%lldms ago)\n",
+			 cpu, tb, per_cpu(wd_timer_tb, cpu),
+			 tb_to_ns(tb - per_cpu(wd_timer_tb, cpu)) / 1000000);
 		print_modules();
 		print_irqtrace_events(current);
 		show_regs(regs);

From df78d3f6148092d33a9a24c7a9cfac3d0220b484 Mon Sep 17 00:00:00 2001
From: Torsten Duwe <duwe@lst.de>
Date: Fri, 4 May 2018 14:38:34 +0200
Subject: [PATCH 043/251] powerpc/livepatch: Implement reliable stack tracing
 for the consistency model
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The "Power Architecture 64-Bit ELF V2 ABI" says in section 2.3.2.3:

[...] There are several rules that must be adhered to in order to ensure
reliable and consistent call chain backtracing:

* Before a function calls any other function, it shall establish its
  own stack frame, whose size shall be a multiple of 16 bytes.

 – In instances where a function’s prologue creates a stack frame, the
   back-chain word of the stack frame shall be updated atomically with
   the value of the stack pointer (r1) when a back chain is implemented.
   (This must be supported as default by all ELF V2 ABI-compliant
   environments.)
[...]
 – The function shall save the link register that contains its return
   address in the LR save doubleword of its caller’s stack frame before
   calling another function.

To me this sounds like the equivalent of HAVE_RELIABLE_STACKTRACE.
This patch may be unneccessarily limited to ppc64le, but OTOH the only
user of this flag so far is livepatching, which is only implemented on
PPCs with 64-LE, a.k.a. ELF ABI v2.

Feel free to add other ppc variants, but so far only ppc64le got tested.

This change also implements save_stack_trace_tsk_reliable() for ppc64le
that checks for the above conditions, where possible.

Signed-off-by: Torsten Duwe <duwe@suse.de>
Signed-off-by: Nicolai Stange <nstange@suse.de>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Kconfig             |   1 +
 arch/powerpc/kernel/stacktrace.c | 119 ++++++++++++++++++++++++++++++-
 2 files changed, 119 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index ebb90f09e74fa..23247fa551e72 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -221,6 +221,7 @@ config PPC
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_RCU_TABLE_FREE		if SMP
 	select HAVE_REGS_AND_STACK_ACCESS_API
+	select HAVE_RELIABLE_STACKTRACE		if PPC64 && CPU_LITTLE_ENDIAN
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_VIRT_CPU_ACCOUNTING
 	select HAVE_IRQ_TIME_ACCOUNTING
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index d534ed9015384..26a50603177c3 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -2,7 +2,7 @@
  * Stack trace utility
  *
  * Copyright 2008 Christoph Hellwig, IBM Corp.
- *
+ * Copyright 2018 SUSE Linux GmbH
  *
  *      This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -11,11 +11,16 @@
  */
 
 #include <linux/export.h>
+#include <linux/kallsyms.h>
+#include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
 #include <asm/ptrace.h>
 #include <asm/processor.h>
+#include <linux/ftrace.h>
+#include <asm/kprobes.h>
 
 /*
  * Save stack-backtrace addresses into a stack_trace buffer.
@@ -76,3 +81,115 @@ save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
 	save_context_stack(trace, regs->gpr[1], current, 0);
 }
 EXPORT_SYMBOL_GPL(save_stack_trace_regs);
+
+#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
+int
+save_stack_trace_tsk_reliable(struct task_struct *tsk,
+				struct stack_trace *trace)
+{
+	unsigned long sp;
+	unsigned long stack_page = (unsigned long)task_stack_page(tsk);
+	unsigned long stack_end;
+	int graph_idx = 0;
+
+	/*
+	 * The last frame (unwinding first) may not yet have saved
+	 * its LR onto the stack.
+	 */
+	int firstframe = 1;
+
+	if (tsk == current)
+		sp = current_stack_pointer();
+	else
+		sp = tsk->thread.ksp;
+
+	stack_end = stack_page + THREAD_SIZE;
+	if (!is_idle_task(tsk)) {
+		/*
+		 * For user tasks, this is the SP value loaded on
+		 * kernel entry, see "PACAKSAVE(r13)" in _switch() and
+		 * system_call_common()/EXCEPTION_PROLOG_COMMON().
+		 *
+		 * Likewise for non-swapper kernel threads,
+		 * this also happens to be the top of the stack
+		 * as setup by copy_thread().
+		 *
+		 * Note that stack backlinks are not properly setup by
+		 * copy_thread() and thus, a forked task() will have
+		 * an unreliable stack trace until it's been
+		 * _switch()'ed to for the first time.
+		 */
+		stack_end -= STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
+	} else {
+		/*
+		 * idle tasks have a custom stack layout,
+		 * c.f. cpu_idle_thread_init().
+		 */
+		stack_end -= STACK_FRAME_OVERHEAD;
+	}
+
+	if (sp < stack_page + sizeof(struct thread_struct) ||
+	    sp > stack_end - STACK_FRAME_MIN_SIZE) {
+		return 1;
+	}
+
+	for (;;) {
+		unsigned long *stack = (unsigned long *) sp;
+		unsigned long newsp, ip;
+
+		/* sanity check: ABI requires SP to be aligned 16 bytes. */
+		if (sp & 0xF)
+			return 1;
+
+		/* Mark stacktraces with exception frames as unreliable. */
+		if (sp <= stack_end - STACK_INT_FRAME_SIZE &&
+		    stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
+			return 1;
+		}
+
+		newsp = stack[0];
+		/* Stack grows downwards; unwinder may only go up. */
+		if (newsp <= sp)
+			return 1;
+
+		if (newsp != stack_end &&
+		    newsp > stack_end - STACK_FRAME_MIN_SIZE) {
+			return 1; /* invalid backlink, too far up. */
+		}
+
+		/* Examine the saved LR: it must point into kernel code. */
+		ip = stack[STACK_FRAME_LR_SAVE];
+		if (!firstframe && !__kernel_text_address(ip))
+			return 1;
+		firstframe = 0;
+
+		/*
+		 * FIXME: IMHO these tests do not belong in
+		 * arch-dependent code, they are generic.
+		 */
+		ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, NULL);
+
+		/*
+		 * Mark stacktraces with kretprobed functions on them
+		 * as unreliable.
+		 */
+		if (ip == (unsigned long)kretprobe_trampoline)
+			return 1;
+
+		if (!trace->skip)
+			trace->entries[trace->nr_entries++] = ip;
+		else
+			trace->skip--;
+
+		if (newsp == stack_end)
+			break;
+
+		if (trace->nr_entries >= trace->max_entries)
+			return -E2BIG;
+
+		sp = newsp;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk_reliable);
+#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */

From 3691d6145585f52a6292c158e72bcde59df8e0a9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 2 May 2018 23:20:46 +1000
Subject: [PATCH 044/251] powerpc/syscalls: Switch trivial cases to
 SYSCALL_DEFINE

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/pci_32.c               | 6 +++---
 arch/powerpc/kernel/pci_64.c               | 4 ++--
 arch/powerpc/mm/subpage-prot.c             | 4 +++-
 arch/powerpc/platforms/cell/spu_syscalls.c | 3 ++-
 4 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 85ad2f78b8896..af36e46c3ed63 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -16,6 +16,7 @@
 #include <linux/of.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <linux/syscalls.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -283,7 +284,8 @@ pci_bus_to_hose(int bus)
  * Note that the returned IO or memory base is a physical address
  */
 
-long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn)
+SYSCALL_DEFINE3(pciconfig_iobase, long, which,
+		unsigned long, bus, unsigned long, devfn)
 {
 	struct pci_controller* hose;
 	long result = -EOPNOTSUPP;
@@ -307,5 +309,3 @@ long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn)
 
 	return result;
 }
-
-
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 15ce0306b092b..dff28f9035124 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -203,8 +203,8 @@ void pcibios_setup_phb_io_space(struct pci_controller *hose)
 #define IOBASE_ISA_IO		3
 #define IOBASE_ISA_MEM		4
 
-long sys_pciconfig_iobase(long which, unsigned long in_bus,
-			  unsigned long in_devfn)
+SYSCALL_DEFINE3(pciconfig_iobase, long, which, unsigned long, in_bus,
+			  unsigned long, in_devfn)
 {
 	struct pci_controller* hose;
 	struct pci_bus *tmp_bus, *bus = NULL;
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c
index f14a07c2fb903..9d16ee251fc01 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/subpage-prot.c
@@ -13,6 +13,7 @@
 #include <linux/types.h>
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
+#include <linux/syscalls.h>
 
 #include <asm/pgtable.h>
 #include <linux/uaccess.h>
@@ -185,7 +186,8 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
  * in a 2-bit field won't allow writes to a page that is otherwise
  * write-protected.
  */
-long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map)
+SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
+		unsigned long, len, u32 __user *, map)
 {
 	struct mm_struct *mm = current->mm;
 	struct subpage_prot_table *spt = &mm->context.spt;
diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c
index 5e6e0bad6db6c..263413a348230 100644
--- a/arch/powerpc/platforms/cell/spu_syscalls.c
+++ b/arch/powerpc/platforms/cell/spu_syscalls.c
@@ -26,6 +26,7 @@
 #include <linux/syscalls.h>
 #include <linux/rcupdate.h>
 #include <linux/binfmts.h>
+#include <linux/syscalls.h>
 
 #include <asm/spu.h>
 
@@ -90,7 +91,7 @@ SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags,
 	return ret;
 }
 
-asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus)
+SYSCALL_DEFINE3(spu_run,int, fd, __u32 __user *, unpc, __u32 __user *, ustatus)
 {
 	long ret;
 	struct fd arg;

From f3675644e172301e88354dc7bfca96c124301145 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 2 May 2018 23:20:47 +1000
Subject: [PATCH 045/251] powerpc/syscalls: signal_{32, 64} - switch to
 SYSCALL_DEFINE

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
[mpe: Fix sys_debug_setcontext() prototype to return long]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/asm-prototypes.h | 14 +++-----
 arch/powerpc/kernel/signal.h              |  6 ++--
 arch/powerpc/kernel/signal_32.c           | 40 ++++++++++++++---------
 arch/powerpc/kernel/signal_64.c           | 15 +++------
 4 files changed, 35 insertions(+), 40 deletions(-)

diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 068760d61e7eb..6f661e3757c8b 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -80,18 +80,12 @@ void machine_check_exception(struct pt_regs *regs);
 void emulation_assist_interrupt(struct pt_regs *regs);
 
 /* signals, syscalls and interrupts */
-#ifdef CONFIG_PPC64
-int sys_swapcontext(struct ucontext __user *old_ctx,
-		    struct ucontext __user *new_ctx,
-		    long ctx_size, long r6, long r7, long r8, struct pt_regs *regs);
-#else
 long sys_swapcontext(struct ucontext __user *old_ctx,
 		    struct ucontext __user *new_ctx,
-		    int ctx_size, int r6, int r7, int r8, struct pt_regs *regs);
-int sys_debug_setcontext(struct ucontext __user *ctx,
-			 int ndbg, struct sig_dbg_op __user *dbg,
-			 int r6, int r7, int r8,
-			 struct pt_regs *regs);
+		    long ctx_size);
+#ifdef CONFIG_PPC32
+long sys_debug_setcontext(struct ucontext __user *ctx,
+			  int ndbg, struct sig_dbg_op __user *dbg);
 int
 ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp);
 unsigned long __init early_init(unsigned long dt_ptr);
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index a6467f843acff..8004336858884 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -49,10 +49,8 @@ extern int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 
 #else /* CONFIG_PPC64 */
 
-extern long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
-		     struct pt_regs *regs);
-extern long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
-		       struct pt_regs *regs);
+extern long sys_rt_sigreturn(void);
+extern long sys_sigreturn(void);
 
 static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 				     struct task_struct *tsk)
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 492f03451877e..9cf8a03d3bc75 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -26,8 +26,8 @@
 #include <linux/elf.h>
 #include <linux/ptrace.h>
 #include <linux/ratelimit.h>
-#ifdef CONFIG_PPC64
 #include <linux/syscalls.h>
+#ifdef CONFIG_PPC64
 #include <linux/compat.h>
 #else
 #include <linux/wait.h>
@@ -57,10 +57,6 @@
 
 
 #ifdef CONFIG_PPC64
-#define sys_rt_sigreturn	compat_sys_rt_sigreturn
-#define sys_swapcontext	compat_sys_swapcontext
-#define sys_sigreturn	compat_sys_sigreturn
-
 #define old_sigaction	old_sigaction32
 #define sigcontext	sigcontext32
 #define mcontext	mcontext32
@@ -1041,10 +1037,15 @@ static int do_setcontext_tm(struct ucontext __user *ucp,
 }
 #endif
 
-long sys_swapcontext(struct ucontext __user *old_ctx,
-		     struct ucontext __user *new_ctx,
-		     int ctx_size, int r6, int r7, int r8, struct pt_regs *regs)
+#ifdef CONFIG_PPC64
+COMPAT_SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
+		       struct ucontext __user *, new_ctx, int, ctx_size)
+#else
+SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
+		       struct ucontext __user *, new_ctx, long, ctx_size)
+#endif
 {
+	struct pt_regs *regs = current_pt_regs();
 	unsigned char tmp __maybe_unused;
 	int ctx_has_vsx_region = 0;
 
@@ -1132,10 +1133,14 @@ long sys_swapcontext(struct ucontext __user *old_ctx,
 	return 0;
 }
 
-long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
-		     struct pt_regs *regs)
+#ifdef CONFIG_PPC64
+COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
+#else
+SYSCALL_DEFINE0(rt_sigreturn)
+#endif
 {
 	struct rt_sigframe __user *rt_sf;
+	struct pt_regs *regs = current_pt_regs();
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	struct ucontext __user *uc_transact;
 	unsigned long msr_hi;
@@ -1224,11 +1229,10 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
 }
 
 #ifdef CONFIG_PPC32
-int sys_debug_setcontext(struct ucontext __user *ctx,
-			 int ndbg, struct sig_dbg_op __user *dbg,
-			 int r6, int r7, int r8,
-			 struct pt_regs *regs)
+SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx,
+			 int, ndbg, struct sig_dbg_op __user *, dbg)
 {
+	struct pt_regs *regs = current_pt_regs();
 	struct sig_dbg_op op;
 	int i;
 	unsigned char tmp __maybe_unused;
@@ -1419,9 +1423,13 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
 /*
  * Do a signal return; undo the signal stack.
  */
-long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
-		       struct pt_regs *regs)
+#ifdef CONFIG_PPC64
+COMPAT_SYSCALL_DEFINE0(sigreturn)
+#else
+SYSCALL_DEFINE0(sigreturn)
+#endif
 {
+	struct pt_regs *regs = current_pt_regs();
 	struct sigframe __user *sf;
 	struct sigcontext __user *sc;
 	struct sigcontext sigctx;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 720117690822d..83d51bf586c7e 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -24,6 +24,7 @@
 #include <linux/elf.h>
 #include <linux/ptrace.h>
 #include <linux/ratelimit.h>
+#include <linux/syscalls.h>
 
 #include <asm/sigcontext.h>
 #include <asm/ucontext.h>
@@ -624,17 +625,14 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp)
 /*
  * Handle {get,set,swap}_context operations
  */
-int sys_swapcontext(struct ucontext __user *old_ctx,
-		    struct ucontext __user *new_ctx,
-		    long ctx_size, long r6, long r7, long r8, struct pt_regs *regs)
+SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
+		struct ucontext __user *, new_ctx, long, ctx_size)
 {
 	unsigned char tmp;
 	sigset_t set;
 	unsigned long new_msr = 0;
 	int ctx_has_vsx_region = 0;
 
-	BUG_ON(regs != current->thread.regs);
-
 	if (new_ctx &&
 	    get_user(new_msr, &new_ctx->uc_mcontext.gp_regs[PT_MSR]))
 		return -EFAULT;
@@ -698,18 +696,15 @@ int sys_swapcontext(struct ucontext __user *old_ctx,
  * Do a signal return; undo the signal stack.
  */
 
-int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,
-		     unsigned long r6, unsigned long r7, unsigned long r8,
-		     struct pt_regs *regs)
+SYSCALL_DEFINE0(rt_sigreturn)
 {
+	struct pt_regs *regs = current_pt_regs();
 	struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1];
 	sigset_t set;
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	unsigned long msr;
 #endif
 
-	BUG_ON(current->thread.regs != regs);
-
 	/* Always make any pending restarted system calls return -EINTR */
 	current->restart_block.fn = do_no_restart_syscall;
 

From 4c392e6591e3257ebd08210e1ac0a175eefd0168 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 2 May 2018 23:20:48 +1000
Subject: [PATCH 046/251] powerpc/syscalls: switch rtas(2) to SYSCALL_DEFINE

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
[mpe: Update sys_ni.c for s/ppc_rtas/sys_rtas/]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/syscalls.h         | 2 +-
 arch/powerpc/include/asm/systbl.h           | 2 +-
 arch/powerpc/kernel/rtas.c                  | 3 ++-
 arch/powerpc/kernel/systbl.S                | 1 -
 arch/powerpc/kernel/systbl_chk.c            | 1 -
 arch/powerpc/platforms/cell/spu_callbacks.c | 1 -
 kernel/sys_ni.c                             | 2 +-
 7 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h
index 1b90a3516a351..398171fdcd9fc 100644
--- a/arch/powerpc/include/asm/syscalls.h
+++ b/arch/powerpc/include/asm/syscalls.h
@@ -16,7 +16,7 @@ asmlinkage long sys_mmap2(unsigned long addr, size_t len,
 		unsigned long prot, unsigned long flags,
 		unsigned long fd, unsigned long pgoff);
 asmlinkage long ppc64_personality(unsigned long personality);
-asmlinkage int ppc_rtas(struct rtas_args __user *uargs);
+asmlinkage long sys_rtas(struct rtas_args __user *uargs);
 
 #endif /* __KERNEL__ */
 #endif /* __ASM_POWERPC_SYSCALLS_H */
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index d61f9c96d9163..b91701c0711a4 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -260,7 +260,7 @@ COMPAT_SYS_SPU(utimes)
 COMPAT_SYS_SPU(statfs64)
 COMPAT_SYS_SPU(fstatfs64)
 SYSX(sys_ni_syscall,ppc_fadvise64_64,ppc_fadvise64_64)
-PPC_SYS_SPU(rtas)
+SYSCALL_SPU(rtas)
 OLDSYS(debug_setcontext)
 SYSCALL(ni_syscall)
 COMPAT_SYS(migrate_pages)
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 3f1c4fcbe0aa5..8afd146bc9c70 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -26,6 +26,7 @@
 #include <linux/memblock.h>
 #include <linux/slab.h>
 #include <linux/reboot.h>
+#include <linux/syscalls.h>
 
 #include <asm/prom.h>
 #include <asm/rtas.h>
@@ -1050,7 +1051,7 @@ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
 }
 
 /* We assume to be passed big endian arguments */
-asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
+SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
 {
 	struct rtas_args args;
 	unsigned long flags;
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index 7ccb7f81f8db8..c7d5216d91d76 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -35,7 +35,6 @@
 #endif
 #define SYSCALL_SPU(func)	SYSCALL(func)
 #define COMPAT_SYS_SPU(func)	COMPAT_SYS(func)
-#define PPC_SYS_SPU(func)	PPC_SYS(func)
 #define SYSX_SPU(f, f3264, f32)	SYSX(f, f3264, f32)
 
 .section .rodata,"a"
diff --git a/arch/powerpc/kernel/systbl_chk.c b/arch/powerpc/kernel/systbl_chk.c
index 55323a620cfe2..28476e8116448 100644
--- a/arch/powerpc/kernel/systbl_chk.c
+++ b/arch/powerpc/kernel/systbl_chk.c
@@ -31,7 +31,6 @@
 
 #define SYSCALL_SPU(func)	SYSCALL(func)
 #define COMPAT_SYS_SPU(func)	COMPAT_SYS(func)
-#define PPC_SYS_SPU(func)	PPC_SYS(func)
 #define SYSX_SPU(f, f3264, f32)	SYSX(f, f3264, f32)
 
 /* Just insert a marker for ni_syscalls */
diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c
index a494028b2cdfa..d5bb8c8d769a6 100644
--- a/arch/powerpc/platforms/cell/spu_callbacks.c
+++ b/arch/powerpc/platforms/cell/spu_callbacks.c
@@ -44,7 +44,6 @@ static void *spu_syscall_table[] = {
 
 #define SYSCALL_SPU(func)	sys_##func,
 #define COMPAT_SYS_SPU(func)	sys_##func,
-#define PPC_SYS_SPU(func)	ppc_##func,
 #define SYSX_SPU(f, f3264, f32)	f,
 
 #include <asm/systbl.h>
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 9791364925dc1..3751a511e2b82 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -365,7 +365,7 @@ COND_SYSCALL(s390_pci_mmio_write);
 COND_SYSCALL_COMPAT(s390_ipc);
 
 /* powerpc */
-cond_syscall(ppc_rtas);
+COND_SYSCALL(rtas);
 COND_SYSCALL(spu_run);
 COND_SYSCALL(spu_create);
 COND_SYSCALL(subpage_prot);

From 454d7ef81ad2dc3be2bede61f0703f0e69f21dd3 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 2 May 2018 23:20:49 +1000
Subject: [PATCH 047/251] powerpc/syscalls: Add COMPAT_SPU_NEW() macro

Currently the select system call is wired up with the SYSX_SPU()
macro. The SYSX_SPU() is not handled by systbl_chk.c, which means the
syscall number for select is not checked.

That hides the fact that the syscall number for select is actually
__NR__newselect not __NR_select.

In a following patch we'd like to drop ppc32_select() which means
select will become a regular COMPAT_SYS_SPU() syscall. But
COMPAT_SYS_SPU() can't deal with the fact that the syscall number is
actually __NR__newselect. We also can't just redefine __NR_select
because that's still used for the old select call.

So add a new COMPAT_NEW_SPU() that does the same thing as
COMPAT_SYS_SPU() except it encodes that we're using the new number.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/systbl.S                | 1 +
 arch/powerpc/kernel/systbl_chk.c            | 1 +
 arch/powerpc/platforms/cell/spu_callbacks.c | 1 +
 3 files changed, 3 insertions(+)

diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index c7d5216d91d76..919a32746ede9 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -35,6 +35,7 @@
 #endif
 #define SYSCALL_SPU(func)	SYSCALL(func)
 #define COMPAT_SYS_SPU(func)	COMPAT_SYS(func)
+#define COMPAT_SPU_NEW(func)	COMPAT_SYS(func)
 #define SYSX_SPU(f, f3264, f32)	SYSX(f, f3264, f32)
 
 .section .rodata,"a"
diff --git a/arch/powerpc/kernel/systbl_chk.c b/arch/powerpc/kernel/systbl_chk.c
index 28476e8116448..4653258722ac0 100644
--- a/arch/powerpc/kernel/systbl_chk.c
+++ b/arch/powerpc/kernel/systbl_chk.c
@@ -31,6 +31,7 @@
 
 #define SYSCALL_SPU(func)	SYSCALL(func)
 #define COMPAT_SYS_SPU(func)	COMPAT_SYS(func)
+#define COMPAT_SPU_NEW(func)	COMPAT_SYS(_new##func)
 #define SYSX_SPU(f, f3264, f32)	SYSX(f, f3264, f32)
 
 /* Just insert a marker for ni_syscalls */
diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c
index d5bb8c8d769a6..8ae86200ef6c7 100644
--- a/arch/powerpc/platforms/cell/spu_callbacks.c
+++ b/arch/powerpc/platforms/cell/spu_callbacks.c
@@ -44,6 +44,7 @@ static void *spu_syscall_table[] = {
 
 #define SYSCALL_SPU(func)	sys_##func,
 #define COMPAT_SYS_SPU(func)	sys_##func,
+#define COMPAT_SPU_NEW(func)	sys_##func,
 #define SYSX_SPU(f, f3264, f32)	f,
 
 #include <asm/systbl.h>

From 28b9c34aa60494c02aa80b5e2cf7210379c3716f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 2 May 2018 23:20:50 +1000
Subject: [PATCH 048/251] powerpc/syscalls: kill ppc32_select()

it had always been pointless - compat_sys_select() sign-extends
the first argument just fine on its own.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
[mpe: Use COMPAT_SPU_NEW() to keep systbl_chk.sh happy]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/systbl.h | 2 +-
 arch/powerpc/kernel/sys_ppc32.c   | 9 ---------
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index b91701c0711a4..cdf528089a632 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -147,7 +147,7 @@ SYSCALL_SPU(setfsuid)
 SYSCALL_SPU(setfsgid)
 SYSCALL_SPU(llseek)
 COMPAT_SYS_SPU(getdents)
-SYSX_SPU(sys_select,ppc32_select,sys_select)
+COMPAT_SPU_NEW(select)
 SYSCALL_SPU(flock)
 SYSCALL_SPU(msync)
 COMPAT_SYS_SPU(readv)
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index c11c733736912..bdf58ba1a94ba 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -52,15 +52,6 @@
 #include <asm/syscalls.h>
 #include <asm/switch_to.h>
 
-
-asmlinkage long ppc32_select(u32 n, compat_ulong_t __user *inp,
-		compat_ulong_t __user *outp, compat_ulong_t __user *exp,
-		compat_uptr_t tvp_x)
-{
-	/* sign extend n */
-	return compat_sys_select((int)n, inp, outp, exp, compat_ptr(tvp_x));
-}
-
 unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
 			  unsigned long prot, unsigned long flags,
 			  unsigned long fd, unsigned long pgoff)

From 4c1481ae60f85b75633a300fb616999f7d9a113f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 2 May 2018 23:20:51 +1000
Subject: [PATCH 049/251] powerpc/syscalls: timer_create can be handle by
 perfectly normal COMPAT_SYS_SPU

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/systbl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index cdf528089a632..79a3b47e48394 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -245,7 +245,7 @@ SYSCALL_SPU(epoll_create)
 SYSCALL_SPU(epoll_ctl)
 SYSCALL_SPU(epoll_wait)
 SYSCALL_SPU(remap_file_pages)
-SYSX_SPU(sys_timer_create,compat_sys_timer_create,sys_timer_create)
+COMPAT_SYS_SPU(timer_create)
 COMPAT_SYS_SPU(timer_settime)
 COMPAT_SYS_SPU(timer_gettime)
 SYSCALL_SPU(timer_getoverrun)

From 53da14d0833a663cb98ef111ed91c575f2da9236 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 30 Apr 2018 13:27:36 +1000
Subject: [PATCH 050/251] powerpc: Make it clearer that systbl check errors are
 errors

If the systbl_chk.sh checks fail we print a message, but with no
indication that it's an error. That makes it hard to find in build
logs with eg. grep.

So prefix any output with "Error:".

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/systbl_chk.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/systbl_chk.sh b/arch/powerpc/kernel/systbl_chk.sh
index 31b6e7c358ca9..f2e356c2a345b 100644
--- a/arch/powerpc/kernel/systbl_chk.sh
+++ b/arch/powerpc/kernel/systbl_chk.sh
@@ -16,7 +16,7 @@ awk	'BEGIN { num = -1; }	# Ignore the beginning of the file
 	/^START_TABLE/ { num = 0; next; }
 	/^END_TABLE/ {
 		if (num != $2) {
-			printf "NR_syscalls (%s) is not one more than the last syscall (%s)\n",
+			printf "Error: NR_syscalls (%s) is not one more than the last syscall (%s)\n",
 				$2, num - 1;
 			exit(1);
 		}
@@ -25,7 +25,7 @@ awk	'BEGIN { num = -1; }	# Ignore the beginning of the file
 	{
 		if (num == -1) next;
 		if (($1 != -1) && ($1 != num)) {
-			printf "Syscall %s out of order (expected %s)\n",
+			printf "Error: Syscall %s out of order (expected %s)\n",
 				$1, num;
 			exit(1);
 		};

From 32810d91325ec76b8ef4df463f8a0e9baf353322 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 17 Apr 2018 00:39:02 +1000
Subject: [PATCH 051/251] powerpc/lib: Fix feature fixup test of external
 branch

The expected case for this test was wrong, the source of the alternate
code sequence is:

  FTR_SECTION_ELSE
  2:	or	2,2,2
  	PPC_LCMPI	r3,1
  	beq	3f
  	blt	2b
  	b	3f
  	b	1b
  ALT_FTR_SECTION_END(0, 1)
  3:	or	1,1,1
  	or	2,2,2
  4:	or	3,3,3

So when it's patched the '3' label should still be on the 'or 1,1,1',
and the 4 label is irrelevant and can be removed.

Fixes: 362e7701fd18 ("powerpc: Add self-tests of the feature fixup code")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/lib/feature-fixups-test.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/lib/feature-fixups-test.S b/arch/powerpc/lib/feature-fixups-test.S
index f4613118132e5..12ff0f673956c 100644
--- a/arch/powerpc/lib/feature-fixups-test.S
+++ b/arch/powerpc/lib/feature-fixups-test.S
@@ -167,9 +167,9 @@ globl(ftr_fixup_test6_expected)
 	blt	2b
 	b	3f
 	b	1b
-2:	or	1,1,1
+3:	or	1,1,1
 	or	2,2,2
-3:	or	3,3,3
+	or	3,3,3
 
 
 #if 0

From cad0e39023b43d94d5e38dfd55c103e15bdd093d Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 17 Apr 2018 00:39:03 +1000
Subject: [PATCH 052/251] powerpc/lib: Fix the feature fixup tests to actually
 work

The code patching code has always been a bit confused about whether
it's best to use void *, unsigned int *, char *, etc. to point to
instructions. In fact in the feature fixups tests we use both unsigned
int[] and u8[] in different places.

Unfortunately the tests that use unsigned int[] calculate the size of
the code blocks using subtraction of those unsigned int pointers, and
then pass the result to memcmp(). This means we're only comparing 1/4
of the bytes we need to, because we need to multiply by
sizeof(unsigned int) to get the number of *bytes*.

The result is that the tests do all the patching and then only compare
some of the resulting code, so patching bugs that only effect that
last 3/4 of the code could slip through undetected. It turns out that
hasn't been happening, although one test had a bad expected case (see
previous commit).

Fix it for now by multiplying the size by 4 in the affected functions.

Fixes: 362e7701fd18 ("powerpc: Add self-tests of the feature fixup code")
Epic-brown-paper-bag-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/lib/feature-fixups.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 288fe4f0db4ea..097b45bd9de4f 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -285,7 +285,7 @@ static void test_basic_patching(void)
 	extern unsigned int end_ftr_fixup_test1[];
 	extern unsigned int ftr_fixup_test1_orig[];
 	extern unsigned int ftr_fixup_test1_expected[];
-	int size = end_ftr_fixup_test1 - ftr_fixup_test1;
+	int size = 4 * (end_ftr_fixup_test1 - ftr_fixup_test1);
 
 	fixup.value = fixup.mask = 8;
 	fixup.start_off = calc_offset(&fixup, ftr_fixup_test1 + 1);
@@ -317,7 +317,7 @@ static void test_alternative_patching(void)
 	extern unsigned int ftr_fixup_test2_orig[];
 	extern unsigned int ftr_fixup_test2_alt[];
 	extern unsigned int ftr_fixup_test2_expected[];
-	int size = end_ftr_fixup_test2 - ftr_fixup_test2;
+	int size = 4 * (end_ftr_fixup_test2 - ftr_fixup_test2);
 
 	fixup.value = fixup.mask = 0xF;
 	fixup.start_off = calc_offset(&fixup, ftr_fixup_test2 + 1);
@@ -349,7 +349,7 @@ static void test_alternative_case_too_big(void)
 	extern unsigned int end_ftr_fixup_test3[];
 	extern unsigned int ftr_fixup_test3_orig[];
 	extern unsigned int ftr_fixup_test3_alt[];
-	int size = end_ftr_fixup_test3 - ftr_fixup_test3;
+	int size = 4 * (end_ftr_fixup_test3 - ftr_fixup_test3);
 
 	fixup.value = fixup.mask = 0xC;
 	fixup.start_off = calc_offset(&fixup, ftr_fixup_test3 + 1);
@@ -376,7 +376,7 @@ static void test_alternative_case_too_small(void)
 	extern unsigned int ftr_fixup_test4_orig[];
 	extern unsigned int ftr_fixup_test4_alt[];
 	extern unsigned int ftr_fixup_test4_expected[];
-	int size = end_ftr_fixup_test4 - ftr_fixup_test4;
+	int size = 4 * (end_ftr_fixup_test4 - ftr_fixup_test4);
 	unsigned long flag;
 
 	/* Check a high-bit flag */
@@ -410,7 +410,7 @@ static void test_alternative_case_with_branch(void)
 	extern unsigned int ftr_fixup_test5[];
 	extern unsigned int end_ftr_fixup_test5[];
 	extern unsigned int ftr_fixup_test5_expected[];
-	int size = end_ftr_fixup_test5 - ftr_fixup_test5;
+	int size = 4 * (end_ftr_fixup_test5 - ftr_fixup_test5);
 
 	check(memcmp(ftr_fixup_test5, ftr_fixup_test5_expected, size) == 0);
 }
@@ -420,7 +420,7 @@ static void test_alternative_case_with_external_branch(void)
 	extern unsigned int ftr_fixup_test6[];
 	extern unsigned int end_ftr_fixup_test6[];
 	extern unsigned int ftr_fixup_test6_expected[];
-	int size = end_ftr_fixup_test6 - ftr_fixup_test6;
+	int size = 4 * (end_ftr_fixup_test6 - ftr_fixup_test6);
 
 	check(memcmp(ftr_fixup_test6, ftr_fixup_test6_expected, size) == 0);
 }

From b58e7987965028c1afbd95cc0b14be0543b937a0 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 17 Apr 2018 00:39:04 +1000
Subject: [PATCH 053/251] powerpc/lib: Rename ftr_fixup_test7 to
 ftr_fixup_test_too_big

We want this to remain the last test (because it's disabled by
default), so give it a non-numbered name so we don't have to renumber
it when adding new tests before it.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/lib/feature-fixups-test.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/lib/feature-fixups-test.S b/arch/powerpc/lib/feature-fixups-test.S
index 12ff0f673956c..dd05afcbcde35 100644
--- a/arch/powerpc/lib/feature-fixups-test.S
+++ b/arch/powerpc/lib/feature-fixups-test.S
@@ -176,7 +176,7 @@ globl(ftr_fixup_test6_expected)
 /* Test that if we have a larger else case the assembler spots it and
  * reports an error. #if 0'ed so as not to break the build normally.
  */
-ftr_fixup_test7:
+ftr_fixup_test_too_big:
 	or	1,1,1
 BEGIN_FTR_SECTION
 	or	2,2,2

From 6158faed7c80256fb6b370781cb3cd6815f4ba69 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 17 Apr 2018 00:39:05 +1000
Subject: [PATCH 054/251] powerpc/lib: Add alt patching test of branching past
 the last instruction

Add a test of the relative branch patching logic in the alternate
section feature fixup code. This tests that if we branch past the last
instruction of the alternate section, the branch is not patched.
That's because the assembler will have created a branch that already
points to the first instruction after the patched section, which is
correct and needs no further patching.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/lib/feature-fixups-test.S | 36 ++++++++++++++++++++++++++
 arch/powerpc/lib/feature-fixups.c      | 11 ++++++++
 2 files changed, 47 insertions(+)

diff --git a/arch/powerpc/lib/feature-fixups-test.S b/arch/powerpc/lib/feature-fixups-test.S
index dd05afcbcde35..f16cec9895060 100644
--- a/arch/powerpc/lib/feature-fixups-test.S
+++ b/arch/powerpc/lib/feature-fixups-test.S
@@ -171,6 +171,42 @@ globl(ftr_fixup_test6_expected)
 	or	2,2,2
 	or	3,3,3
 
+globl(ftr_fixup_test7)
+	or	1,1,1
+BEGIN_FTR_SECTION
+	or	2,2,2
+	or	2,2,2
+	or	2,2,2
+	or	2,2,2
+	or	2,2,2
+	or	2,2,2
+	or	2,2,2
+FTR_SECTION_ELSE
+2:	b	3f
+3:	or	5,5,5
+	beq	3b
+	b	1f
+	or	6,6,6
+	b	2b
+	bdnz	3b
+1:
+ALT_FTR_SECTION_END(0, 1)
+	or	1,1,1
+	or	1,1,1
+
+globl(end_ftr_fixup_test7)
+	nop
+
+globl(ftr_fixup_test7_expected)
+	or	1,1,1
+2:	b	3f
+3:	or	5,5,5
+	beq	3b
+	b	1f
+	or	6,6,6
+	b	2b
+	bdnz	3b
+1:	or	1,1,1
 
 #if 0
 /* Test that if we have a larger else case the assembler spots it and
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 097b45bd9de4f..f3e46d4edd72d 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -425,6 +425,16 @@ static void test_alternative_case_with_external_branch(void)
 	check(memcmp(ftr_fixup_test6, ftr_fixup_test6_expected, size) == 0);
 }
 
+static void test_alternative_case_with_branch_to_end(void)
+{
+	extern unsigned int ftr_fixup_test7[];
+	extern unsigned int end_ftr_fixup_test7[];
+	extern unsigned int ftr_fixup_test7_expected[];
+	int size = 4 * (end_ftr_fixup_test7 - ftr_fixup_test7);
+
+	check(memcmp(ftr_fixup_test7, ftr_fixup_test7_expected, size) == 0);
+}
+
 static void test_cpu_macros(void)
 {
 	extern u8 ftr_fixup_test_FTR_macros[];
@@ -480,6 +490,7 @@ static int __init test_feature_fixups(void)
 	test_alternative_case_too_small();
 	test_alternative_case_with_branch();
 	test_alternative_case_with_external_branch();
+	test_alternative_case_with_branch_to_end();
 	test_cpu_macros();
 	test_fw_macros();
 	test_lwsync_macros();

From 89c190627257a38d5e4d7cb3e5382f0e6e089f7c Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 9 May 2018 23:42:27 +1000
Subject: [PATCH 055/251] powerpc/prom: Drop support for old FDT versions

In commit e6a6928c3ea1 ("of/fdt: Convert FDT functions to use
libfdt") (Apr 2014), the generic flat device tree code dropped support
for flat device tree's older than version 0x10 (16).

We still have code in our CPU scanning to cope with flat device tree
versions earlier than 2, which can now never trigger, so drop it.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/prom.c | 23 ++++-------------------
 1 file changed, 4 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 9dbed488aba1a..05e7fb47a7a46 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -332,25 +332,10 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 	 * NOTE: This must match the parsing done in smp_setup_cpu_maps.
 	 */
 	for (i = 0; i < nthreads; i++) {
-		/*
-		 * version 2 of the kexec param format adds the phys cpuid of
-		 * booted proc.
-		 */
-		if (fdt_version(initial_boot_params) >= 2) {
-			if (be32_to_cpu(intserv[i]) ==
-			    fdt_boot_cpuid_phys(initial_boot_params)) {
-				found = boot_cpu_count;
-				found_thread = i;
-			}
-		} else {
-			/*
-			 * Check if it's the boot-cpu, set it's hw index now,
-			 * unfortunately this format did not support booting
-			 * off secondary threads.
-			 */
-			if (of_get_flat_dt_prop(node,
-					"linux,boot-cpu", NULL) != NULL)
-				found = boot_cpu_count;
+		if (be32_to_cpu(intserv[i]) ==
+			fdt_boot_cpuid_phys(initial_boot_params)) {
+			found = boot_cpu_count;
+			found_thread = i;
 		}
 #ifdef CONFIG_SMP
 		/* logical cpu id is always 0 on UP kernels */

From 8ccb442dec0ab53eefb5a607e405f7f3cd02e57f Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 10 May 2018 23:09:13 +1000
Subject: [PATCH 056/251] powerpc/powernv: Fix memtrace build when NUMA=n
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently memtrace doesn't build if NUMA=n:

  In function ‘memtrace_alloc_node’:
  arch/powerpc/platforms/powernv/memtrace.c:134:6:
  error: the address of ‘contig_page_data’ will always evaluate as ‘true’
    if (!NODE_DATA(nid) || !node_spanned_pages(nid))
        ^

This is because for NUMA=n NODE_DATA(nid) points to an always
allocated structure, contig_page_data.

But even in the NUMA=y case memtrace_alloc_node() is only called for
online nodes, and we should always have a NODE_DATA() allocated for an
online node. So remove the (hopefully) overly paranoid check, which
also means we can build when NUMA=n.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/memtrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index fc222a0c2ac46..b99283df8584d 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -131,7 +131,7 @@ static u64 memtrace_alloc_node(u32 nid, u64 size)
 	u64 start_pfn, end_pfn, nr_pages;
 	u64 base_pfn;
 
-	if (!NODE_DATA(nid) || !node_spanned_pages(nid))
+	if (!node_spanned_pages(nid))
 		return 0;
 
 	start_pfn = node_start_pfn(nid);

From 7ef73cd39b4e2829ee88c06f938bc7de5aff487f Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Mon, 14 May 2018 19:39:22 +1000
Subject: [PATCH 057/251] powerpc/ioda: Use ibm, supported-tce-sizes for IOMMU
 page size mask

At the moment we assume that IODA2 and newer PHBs can always do 4K/64K/16M
IOMMU pages, however this is not the case for POWER9 and now skiboot
advertises the supported sizes via the device so we use that instead
of hard coding the mask.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 30 ++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 3f9c69d7623a9..9731098005726 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2910,6 +2910,34 @@ static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
 			tbl->it_indirect_levels);
 }
 
+static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb)
+{
+	struct pci_controller *hose = phb->hose;
+	struct device_node *dn = hose->dn;
+	unsigned long mask = 0;
+	int i, rc, count;
+	u32 val;
+
+	count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes");
+	if (count <= 0) {
+		mask = SZ_4K | SZ_64K;
+		/* Add 16M for POWER8 by default */
+		if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
+				!cpu_has_feature(CPU_FTR_ARCH_300))
+			mask |= SZ_16M;
+		return mask;
+	}
+
+	for (i = 0; i < count; i++) {
+		rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes",
+						i, &val);
+		if (rc == 0)
+			mask |= 1ULL << val;
+	}
+
+	return mask;
+}
+
 static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 				       struct pnv_ioda_pe *pe)
 {
@@ -2934,7 +2962,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 	pe->table_group.max_dynamic_windows_supported =
 			IOMMU_TABLE_GROUP_MAX_TABLES;
 	pe->table_group.max_levels = POWERNV_IOMMU_MAX_LEVELS;
-	pe->table_group.pgsizes = SZ_4K | SZ_64K | SZ_16M;
+	pe->table_group.pgsizes = pnv_ioda_parse_tce_sizes(phb);
 #ifdef CONFIG_IOMMU_API
 	pe->table_group.ops = &pnv_pci_ioda2_ops;
 #endif

From c906d2c74e0bb9059fbf43e21f8a962986eb21e2 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 12 May 2018 13:35:24 +1000
Subject: [PATCH 058/251] selftests/powerpc: fix exec benchmark

The exec_target binary could segfault calling _exit(2) because r13
is not set up properly (and libc looks at that when performing a
syscall). Call SYS_exit using syscall(2) which doesn't seem to
have this problem.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/benchmarks/exec_target.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/powerpc/benchmarks/exec_target.c b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
index 3c9c144192be5..c14b0fc1edde0 100644
--- a/tools/testing/selftests/powerpc/benchmarks/exec_target.c
+++ b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
@@ -6,8 +6,11 @@
  * Copyright 2018, Anton Blanchard, IBM Corp.
  */
 
-void _exit(int);
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+
 void _start(void)
 {
-	_exit(0);
+	syscall(SYS_exit, 0);
 }

From 942cc40ae4354fee1e97137346434a2697d97e02 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 10 May 2018 16:54:39 +0100
Subject: [PATCH 059/251] macintosh/windfarm: fix spelling mistake: "ttarged"
 -> "ttarget"

Trivial fix to spelling mistake in debug messages of a structure
field name

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/macintosh/windfarm_pm121.c | 2 +-
 drivers/macintosh/windfarm_pm81.c  | 2 +-
 drivers/macintosh/windfarm_pm91.c  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/macintosh/windfarm_pm121.c b/drivers/macintosh/windfarm_pm121.c
index 4d72d8f58cb63..97634e0309916 100644
--- a/drivers/macintosh/windfarm_pm121.c
+++ b/drivers/macintosh/windfarm_pm121.c
@@ -710,7 +710,7 @@ static void pm121_create_cpu_fans(void)
 	wf_cpu_pid_init(&pm121_cpu_state->pid, &pid_param);
 
 	pr_debug("pm121: CPU Fan control initialized.\n");
-	pr_debug("       ttarged=%d.%03d, tmax=%d.%03d, min=%d RPM, max=%d RPM,\n",
+	pr_debug("       ttarget=%d.%03d, tmax=%d.%03d, min=%d RPM, max=%d RPM,\n",
 		 FIX32TOPRINT(pid_param.ttarget), FIX32TOPRINT(pid_param.tmax),
 		 pid_param.min, pid_param.max);
 
diff --git a/drivers/macintosh/windfarm_pm81.c b/drivers/macintosh/windfarm_pm81.c
index d9ea45581b9e8..346c91744442a 100644
--- a/drivers/macintosh/windfarm_pm81.c
+++ b/drivers/macintosh/windfarm_pm81.c
@@ -455,7 +455,7 @@ static void wf_smu_create_cpu_fans(void)
 	wf_cpu_pid_init(&wf_smu_cpu_fans->pid, &pid_param);
 
 	DBG("wf: CPU Fan control initialized.\n");
-	DBG("    ttarged=%d.%03d, tmax=%d.%03d, min=%d RPM, max=%d RPM\n",
+	DBG("    ttarget=%d.%03d, tmax=%d.%03d, min=%d RPM, max=%d RPM\n",
 	    FIX32TOPRINT(pid_param.ttarget), FIX32TOPRINT(pid_param.tmax),
 	    pid_param.min, pid_param.max);
 
diff --git a/drivers/macintosh/windfarm_pm91.c b/drivers/macintosh/windfarm_pm91.c
index 7fd73dcb2b0a4..a8a47c62a74bd 100644
--- a/drivers/macintosh/windfarm_pm91.c
+++ b/drivers/macintosh/windfarm_pm91.c
@@ -200,7 +200,7 @@ static void wf_smu_create_cpu_fans(void)
 	wf_cpu_pid_init(&wf_smu_cpu_fans->pid, &pid_param);
 
 	DBG("wf: CPU Fan control initialized.\n");
-	DBG("    ttarged=%d.%03d, tmax=%d.%03d, min=%d RPM, max=%d RPM\n",
+	DBG("    ttarget=%d.%03d, tmax=%d.%03d, min=%d RPM, max=%d RPM\n",
 	    FIX32TOPRINT(pid_param.ttarget), FIX32TOPRINT(pid_param.tmax),
 	    pid_param.min, pid_param.max);
 

From e807f02c5c364c5a7315b50efdbc102bdd11e953 Mon Sep 17 00:00:00 2001
From: Souptick Joarder <jrdr.linux@gmail.com>
Date: Fri, 20 Apr 2018 23:02:39 +0530
Subject: [PATCH 060/251] powerpc/cell/spufs: Change return type to vm_fault_t

Use new return type vm_fault_t for fault handler. For now, this is
just documenting that the function returns a VM_FAULT value rather
than an errno. Once all instances are converted, vm_fault_t will
become a distinct type. See commit 1c8f422059ae ("mm: change return
type to vm_fault_t").

We are fixing a minor bug, that the error from vm_insert_pfn() was
being ignored and the effect of this is likely to be only felt in OOM
situations.

Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Reviewed-by: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/cell/spufs/file.c | 33 +++++++++++++-----------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 469bdd0b748f7..43e7b93f27c71 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -232,12 +232,13 @@ spufs_mem_write(struct file *file, const char __user *buffer,
 	return size;
 }
 
-static int
+static vm_fault_t
 spufs_mem_mmap_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct spu_context *ctx	= vma->vm_file->private_data;
 	unsigned long pfn, offset;
+	vm_fault_t ret;
 
 	offset = vmf->pgoff << PAGE_SHIFT;
 	if (offset >= LS_SIZE)
@@ -256,11 +257,11 @@ spufs_mem_mmap_fault(struct vm_fault *vmf)
 		vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
 		pfn = (ctx->spu->local_store_phys + offset) >> PAGE_SHIFT;
 	}
-	vm_insert_pfn(vma, vmf->address, pfn);
+	ret = vmf_insert_pfn(vma, vmf->address, pfn);
 
 	spu_release(ctx);
 
-	return VM_FAULT_NOPAGE;
+	return ret;
 }
 
 static int spufs_mem_mmap_access(struct vm_area_struct *vma,
@@ -312,13 +313,14 @@ static const struct file_operations spufs_mem_fops = {
 	.mmap			= spufs_mem_mmap,
 };
 
-static int spufs_ps_fault(struct vm_fault *vmf,
+static vm_fault_t spufs_ps_fault(struct vm_fault *vmf,
 				    unsigned long ps_offs,
 				    unsigned long ps_size)
 {
 	struct spu_context *ctx = vmf->vma->vm_file->private_data;
 	unsigned long area, offset = vmf->pgoff << PAGE_SHIFT;
-	int ret = 0;
+	int err = 0;
+	vm_fault_t ret = VM_FAULT_NOPAGE;
 
 	spu_context_nospu_trace(spufs_ps_fault__enter, ctx);
 
@@ -349,25 +351,26 @@ static int spufs_ps_fault(struct vm_fault *vmf,
 	if (ctx->state == SPU_STATE_SAVED) {
 		up_read(&current->mm->mmap_sem);
 		spu_context_nospu_trace(spufs_ps_fault__sleep, ctx);
-		ret = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
+		err = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
 		spu_context_trace(spufs_ps_fault__wake, ctx, ctx->spu);
 		down_read(&current->mm->mmap_sem);
 	} else {
 		area = ctx->spu->problem_phys + ps_offs;
-		vm_insert_pfn(vmf->vma, vmf->address, (area + offset) >> PAGE_SHIFT);
+		ret = vmf_insert_pfn(vmf->vma, vmf->address,
+				(area + offset) >> PAGE_SHIFT);
 		spu_context_trace(spufs_ps_fault__insert, ctx, ctx->spu);
 	}
 
-	if (!ret)
+	if (!err)
 		spu_release(ctx);
 
 refault:
 	put_spu_context(ctx);
-	return VM_FAULT_NOPAGE;
+	return ret;
 }
 
 #if SPUFS_MMAP_4K
-static int spufs_cntl_mmap_fault(struct vm_fault *vmf)
+static vm_fault_t spufs_cntl_mmap_fault(struct vm_fault *vmf)
 {
 	return spufs_ps_fault(vmf, 0x4000, SPUFS_CNTL_MAP_SIZE);
 }
@@ -1040,7 +1043,7 @@ static ssize_t spufs_signal1_write(struct file *file, const char __user *buf,
 	return 4;
 }
 
-static int
+static vm_fault_t
 spufs_signal1_mmap_fault(struct vm_fault *vmf)
 {
 #if SPUFS_SIGNAL_MAP_SIZE == 0x1000
@@ -1178,7 +1181,7 @@ static ssize_t spufs_signal2_write(struct file *file, const char __user *buf,
 }
 
 #if SPUFS_MMAP_4K
-static int
+static vm_fault_t
 spufs_signal2_mmap_fault(struct vm_fault *vmf)
 {
 #if SPUFS_SIGNAL_MAP_SIZE == 0x1000
@@ -1307,7 +1310,7 @@ DEFINE_SPUFS_ATTRIBUTE(spufs_signal2_type, spufs_signal2_type_get,
 		       spufs_signal2_type_set, "%llu\n", SPU_ATTR_ACQUIRE);
 
 #if SPUFS_MMAP_4K
-static int
+static vm_fault_t
 spufs_mss_mmap_fault(struct vm_fault *vmf)
 {
 	return spufs_ps_fault(vmf, 0x0000, SPUFS_MSS_MAP_SIZE);
@@ -1369,7 +1372,7 @@ static const struct file_operations spufs_mss_fops = {
 	.llseek  = no_llseek,
 };
 
-static int
+static vm_fault_t
 spufs_psmap_mmap_fault(struct vm_fault *vmf)
 {
 	return spufs_ps_fault(vmf, 0x0000, SPUFS_PS_MAP_SIZE);
@@ -1429,7 +1432,7 @@ static const struct file_operations spufs_psmap_fops = {
 
 
 #if SPUFS_MMAP_4K
-static int
+static vm_fault_t
 spufs_mfc_mmap_fault(struct vm_fault *vmf)
 {
 	return spufs_ps_fault(vmf, 0x3000, SPUFS_MFC_MAP_SIZE);

From 21828c99ee91bec94c3d2c32b3d5562ffdea980a Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 16 Apr 2018 16:57:15 +0530
Subject: [PATCH 061/251] powerpc/kvm: Switch kvm pmd allocator to custom
 allocator

In the next set of patches, we will switch pmd allocator to use page fragments
and the locking will be updated to split pmd ptlock. We want to avoid using
fragments for partition-scoped table. Use slab cache similar to level 4 table

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 36 ++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index a57eafec4dc2e..ccdf3761eec00 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -200,6 +200,7 @@ void kvmppc_radix_set_pte_at(struct kvm *kvm, unsigned long addr,
 }
 
 static struct kmem_cache *kvm_pte_cache;
+static struct kmem_cache *kvm_pmd_cache;
 
 static pte_t *kvmppc_pte_alloc(void)
 {
@@ -217,6 +218,16 @@ static inline int pmd_is_leaf(pmd_t pmd)
 	return !!(pmd_val(pmd) & _PAGE_PTE);
 }
 
+static pmd_t *kvmppc_pmd_alloc(void)
+{
+	return kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL);
+}
+
+static void kvmppc_pmd_free(pmd_t *pmdp)
+{
+	kmem_cache_free(kvm_pmd_cache, pmdp);
+}
+
 static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 			     unsigned int level, unsigned long mmu_seq)
 {
@@ -239,7 +250,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 	if (pud && pud_present(*pud) && !pud_huge(*pud))
 		pmd = pmd_offset(pud, gpa);
 	else if (level <= 1)
-		new_pmd = pmd_alloc_one(kvm->mm, gpa);
+		new_pmd = kvmppc_pmd_alloc();
 
 	if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))
 		new_ptep = kvmppc_pte_alloc();
@@ -382,7 +393,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 	if (new_pud)
 		pud_free(kvm->mm, new_pud);
 	if (new_pmd)
-		pmd_free(kvm->mm, new_pmd);
+		kvmppc_pmd_free(new_pmd);
 	if (new_ptep)
 		kvmppc_pte_free(new_ptep);
 	return ret;
@@ -758,7 +769,7 @@ void kvmppc_free_radix(struct kvm *kvm)
 				kvmppc_pte_free(pte);
 				pmd_clear(pmd);
 			}
-			pmd_free(kvm->mm, pmd_offset(pud, 0));
+			kvmppc_pmd_free(pmd_offset(pud, 0));
 			pud_clear(pud);
 		}
 		pud_free(kvm->mm, pud_offset(pgd, 0));
@@ -770,20 +781,35 @@ void kvmppc_free_radix(struct kvm *kvm)
 
 static void pte_ctor(void *addr)
 {
-	memset(addr, 0, PTE_TABLE_SIZE);
+	memset(addr, 0, RADIX_PTE_TABLE_SIZE);
+}
+
+static void pmd_ctor(void *addr)
+{
+	memset(addr, 0, RADIX_PMD_TABLE_SIZE);
 }
 
 int kvmppc_radix_init(void)
 {
-	unsigned long size = sizeof(void *) << PTE_INDEX_SIZE;
+	unsigned long size = sizeof(void *) << RADIX_PTE_INDEX_SIZE;
 
 	kvm_pte_cache = kmem_cache_create("kvm-pte", size, size, 0, pte_ctor);
 	if (!kvm_pte_cache)
 		return -ENOMEM;
+
+	size = sizeof(void *) << RADIX_PMD_INDEX_SIZE;
+
+	kvm_pmd_cache = kmem_cache_create("kvm-pmd", size, size, 0, pmd_ctor);
+	if (!kvm_pmd_cache) {
+		kmem_cache_destroy(kvm_pte_cache);
+		return -ENOMEM;
+	}
+
 	return 0;
 }
 
 void kvmppc_radix_exit(void)
 {
 	kmem_cache_destroy(kvm_pte_cache);
+	kmem_cache_destroy(kvm_pmd_cache);
 }

From 59879d542a8e880391863d82cddf38854e3807f4 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 16 Apr 2018 16:57:14 +0530
Subject: [PATCH 062/251] powerpc/mm/book3s64: Move book3s64 code to
 pgtable-book3s64

Only code movement and avoid #ifdef.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/pgtable-book3s64.c | 54 ++++++++++++++++++++++++++++
 arch/powerpc/mm/pgtable_64.c       | 56 ------------------------------
 2 files changed, 54 insertions(+), 56 deletions(-)

diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 518518fb7c45a..35913b0b6d56d 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -9,10 +9,13 @@
 
 #include <linux/sched.h>
 #include <linux/mm_types.h>
+#include <linux/memblock.h>
 #include <misc/cxl-base.h>
 
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
+#include <asm/trace.h>
+#include <asm/powernv.h>
 
 #include "mmu_decl.h"
 #include <trace/events/thp.h>
@@ -171,3 +174,54 @@ int __meminit remove_section_mapping(unsigned long start, unsigned long end)
 	return hash__remove_section_mapping(start, end);
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
+
+void __init mmu_partition_table_init(void)
+{
+	unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
+	unsigned long ptcr;
+
+	BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large.");
+	partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
+						MEMBLOCK_ALLOC_ANYWHERE));
+
+	/* Initialize the Partition Table with no entries */
+	memset((void *)partition_tb, 0, patb_size);
+
+	/*
+	 * update partition table control register,
+	 * 64 K size.
+	 */
+	ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
+	mtspr(SPRN_PTCR, ptcr);
+	powernv_set_nmmu_ptcr(ptcr);
+}
+
+void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
+				   unsigned long dw1)
+{
+	unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
+
+	partition_tb[lpid].patb0 = cpu_to_be64(dw0);
+	partition_tb[lpid].patb1 = cpu_to_be64(dw1);
+
+	/*
+	 * Global flush of TLBs and partition table caches for this lpid.
+	 * The type of flush (hash or radix) depends on what the previous
+	 * use of this partition ID was, not the new use.
+	 */
+	asm volatile("ptesync" : : : "memory");
+	if (old & PATB_HR) {
+		asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
+			     "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+		asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
+			     "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+		trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);
+	} else {
+		asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
+			     "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+		trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0);
+	}
+	/* do we need fixup here ?*/
+	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 9bf659d5078c8..a41784dd20425 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -33,7 +33,6 @@
 #include <linux/swap.h>
 #include <linux/stddef.h>
 #include <linux/vmalloc.h>
-#include <linux/memblock.h>
 #include <linux/slab.h>
 #include <linux/hugetlb.h>
 
@@ -47,13 +46,11 @@
 #include <asm/smp.h>
 #include <asm/machdep.h>
 #include <asm/tlb.h>
-#include <asm/trace.h>
 #include <asm/processor.h>
 #include <asm/cputable.h>
 #include <asm/sections.h>
 #include <asm/firmware.h>
 #include <asm/dma.h>
-#include <asm/powernv.h>
 
 #include "mmu_decl.h"
 
@@ -429,59 +426,6 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
 }
 #endif
 
-#ifdef CONFIG_PPC_BOOK3S_64
-void __init mmu_partition_table_init(void)
-{
-	unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
-	unsigned long ptcr;
-
-	BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large.");
-	partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
-						MEMBLOCK_ALLOC_ANYWHERE));
-
-	/* Initialize the Partition Table with no entries */
-	memset((void *)partition_tb, 0, patb_size);
-
-	/*
-	 * update partition table control register,
-	 * 64 K size.
-	 */
-	ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
-	mtspr(SPRN_PTCR, ptcr);
-	powernv_set_nmmu_ptcr(ptcr);
-}
-
-void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
-				   unsigned long dw1)
-{
-	unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
-
-	partition_tb[lpid].patb0 = cpu_to_be64(dw0);
-	partition_tb[lpid].patb1 = cpu_to_be64(dw1);
-
-	/*
-	 * Global flush of TLBs and partition table caches for this lpid.
-	 * The type of flush (hash or radix) depends on what the previous
-	 * use of this partition ID was, not the new use.
-	 */
-	asm volatile("ptesync" : : : "memory");
-	if (old & PATB_HR) {
-		asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
-			     "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
-		asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
-			     "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
-		trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);
-	} else {
-		asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
-			     "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
-		trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0);
-	}
-	/* do we need fixup here ?*/
-	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
-}
-EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
 #ifdef CONFIG_STRICT_KERNEL_RWX
 void mark_rodata_ro(void)
 {

From af60a4cf8736ec0889930509431dbc2f45a4e9eb Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 16 Apr 2018 16:57:16 +0530
Subject: [PATCH 063/251] powerpc/mm: Use pmd_lockptr instead of opencoding it

In later patch we switch pmd_lock from mm->page_table_lock to split pmd ptlock.
It avoid compilations issues, use pmd_lockptr helper.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/pgtable-book3s64.c | 4 ++--
 arch/powerpc/mm/pgtable-hash64.c   | 8 +++++---
 arch/powerpc/mm/pgtable-radix.c    | 2 +-
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 35913b0b6d56d..e1c3041831722 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -37,7 +37,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
 	int changed;
 #ifdef CONFIG_DEBUG_VM
 	WARN_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
-	assert_spin_locked(&vma->vm_mm->page_table_lock);
+	assert_spin_locked(pmd_lockptr(vma->vm_mm, pmdp));
 #endif
 	changed = !pmd_same(*(pmdp), entry);
 	if (changed) {
@@ -62,7 +62,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 {
 #ifdef CONFIG_DEBUG_VM
 	WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
-	assert_spin_locked(&mm->page_table_lock);
+	assert_spin_locked(pmd_lockptr(mm, pmdp));
 	WARN_ON(!(pmd_trans_huge(pmd) || pmd_devmap(pmd)));
 #endif
 	trace_hugepage_set_pmd(addr, pmd_val(pmd));
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c
index 199bfda5f0d96..692bfc9e372cb 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -193,7 +193,7 @@ unsigned long hash__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr
 
 #ifdef CONFIG_DEBUG_VM
 	WARN_ON(!hash__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
-	assert_spin_locked(&mm->page_table_lock);
+	assert_spin_locked(pmd_lockptr(mm, pmdp));
 #endif
 
 	__asm__ __volatile__(
@@ -265,7 +265,8 @@ void hash__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
 				  pgtable_t pgtable)
 {
 	pgtable_t *pgtable_slot;
-	assert_spin_locked(&mm->page_table_lock);
+
+	assert_spin_locked(pmd_lockptr(mm, pmdp));
 	/*
 	 * we store the pgtable in the second half of PMD
 	 */
@@ -285,7 +286,8 @@ pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
 	pgtable_t pgtable;
 	pgtable_t *pgtable_slot;
 
-	assert_spin_locked(&mm->page_table_lock);
+	assert_spin_locked(pmd_lockptr(mm, pmdp));
+
 	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
 	pgtable = *pgtable_slot;
 	/*
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index f1891e215e39e..473415750cbff 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -975,7 +975,7 @@ unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long add
 
 #ifdef CONFIG_DEBUG_VM
 	WARN_ON(!radix__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
-	assert_spin_locked(&mm->page_table_lock);
+	assert_spin_locked(pmd_lockptr(mm, pmdp));
 #endif
 
 	old = radix__pte_update(mm, addr, (pte_t *)pmdp, clr, set, 1);

From 8ce74cffff91b5f84c7c46409c452a3014971698 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 16 Apr 2018 16:57:17 +0530
Subject: [PATCH 064/251] powerpc/mm: Rename pte fragment functions

We rename the alloc and get_from_cache to indicate they operate on pte
fragments. In later patch we will add pmd fragment support.

No functional change in this patch.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/pgtable_64.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index a41784dd20425..3873f94a3ae9b 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -314,7 +314,7 @@ struct page *pmd_page(pmd_t pmd)
 }
 
 #ifdef CONFIG_PPC_64K_PAGES
-static pte_t *get_from_cache(struct mm_struct *mm)
+static pte_t *get_pte_from_cache(struct mm_struct *mm)
 {
 	void *pte_frag, *ret;
 
@@ -333,7 +333,7 @@ static pte_t *get_from_cache(struct mm_struct *mm)
 	return (pte_t *)ret;
 }
 
-static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
+static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
 {
 	void *ret = NULL;
 	struct page *page;
@@ -372,12 +372,13 @@ pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel
 {
 	pte_t *pte;
 
-	pte = get_from_cache(mm);
+	pte = get_pte_from_cache(mm);
 	if (pte)
 		return pte;
 
-	return __alloc_for_cache(mm, kernel);
+	return __alloc_for_ptecache(mm, kernel);
 }
+
 #endif /* CONFIG_PPC_64K_PAGES */
 
 void pte_fragment_free(unsigned long *table, int kernel)

From 7820856a4fcda29fcb3f56baaa124ec96def8db5 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 16 Apr 2018 16:57:18 +0530
Subject: [PATCH 065/251] powerpc/mm/book3e/64: Remove unsupported 64Kpage size
 from 64bit booke

We have in Kconfig

config PPC_64K_PAGES
	bool "64k page size"
	depends on !PPC_FSL_BOOK3E && (44x || PPC_BOOK3S_64 || PPC_BOOK3E_64)
	select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64

Only supported BOOK3E 64 bit platforms is FSL_BOOK3E. Remove the dead 64k page
support code from 64bit nohash.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/mmu-book3e.h         |  6 --
 arch/powerpc/include/asm/nohash/64/pgalloc.h  | 60 -------------------
 .../include/asm/nohash/64/pgtable-64k.h       | 57 ------------------
 arch/powerpc/include/asm/nohash/64/pgtable.h  |  8 +--
 4 files changed, 4 insertions(+), 127 deletions(-)
 delete mode 100644 arch/powerpc/include/asm/nohash/64/pgtable-64k.h

diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index cda94a0f5146b..e20072972e359 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -230,10 +230,6 @@ typedef struct {
 	unsigned int	id;
 	unsigned int	active;
 	unsigned long	vdso_base;
-#ifdef CONFIG_PPC_64K_PAGES
-	/* for 4K PTE fragment support */
-	void *pte_frag;
-#endif
 } mm_context_t;
 
 /* Page size definitions, common between 32 and 64-bit
@@ -275,8 +271,6 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
  */
 #if defined(CONFIG_PPC_4K_PAGES)
 #define mmu_virtual_psize	MMU_PAGE_4K
-#elif defined(CONFIG_PPC_64K_PAGES)
-#define mmu_virtual_psize	MMU_PAGE_64K
 #else
 #error Unsupported page size
 #endif
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index 9721c7867b9c5..a6baf3c13bb57 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -52,8 +52,6 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
 }
 
-#ifndef CONFIG_PPC_64K_PAGES
-
 #define pgd_populate(MM, PGD, PUD)	pgd_set(PGD, (unsigned long)PUD)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
@@ -131,64 +129,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 	pgtable_free_tlb(tlb, page_address(table), 0);
 }
 
-#else /* if CONFIG_PPC_64K_PAGES */
-
-extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
-extern void pte_fragment_free(unsigned long *, int);
-extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
-#ifdef CONFIG_SMP
-extern void __tlb_remove_table(void *_table);
-#endif
-
-#define pud_populate(mm, pud, pmd)	pud_set(pud, (unsigned long)pmd)
-
-static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
-				       pte_t *pte)
-{
-	pmd_set(pmd, (unsigned long)pte);
-}
-
-static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
-				pgtable_t pte_page)
-{
-	pmd_set(pmd, (unsigned long)pte_page);
-}
-
-static inline pgtable_t pmd_pgtable(pmd_t pmd)
-{
-	return (pgtable_t)(pmd_val(pmd) & ~PMD_MASKED_BITS);
-}
-
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
-{
-	return (pte_t *)pte_fragment_alloc(mm, address, 1);
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long address)
-{
-	return (pgtable_t)pte_fragment_alloc(mm, address, 0);
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-	pte_fragment_free((unsigned long *)pte, 1);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
-	pte_fragment_free((unsigned long *)ptepage, 0);
-}
-
-static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
-				  unsigned long address)
-{
-	tlb_flush_pgtable(tlb, address);
-	pgtable_free_tlb(tlb, table, 0);
-}
-#endif /* CONFIG_PPC_64K_PAGES */
-
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
 	return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h
deleted file mode 100644
index 7210c2818e41e..0000000000000
--- a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H
-#define _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H
-
-#define __ARCH_USE_5LEVEL_HACK
-#include <asm-generic/pgtable-nopud.h>
-
-
-#define PTE_INDEX_SIZE  8
-#define PMD_INDEX_SIZE  10
-#define PUD_INDEX_SIZE	0
-#define PGD_INDEX_SIZE  12
-
-/*
- * we support 32 fragments per PTE page of 64K size
- */
-#define PTE_FRAG_NR	32
-/*
- * We use a 2K PTE page fragment and another 2K for storing
- * real_pte_t hash index
- */
-#define PTE_FRAG_SIZE_SHIFT  11
-#define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
-
-#ifndef __ASSEMBLY__
-#define PTE_TABLE_SIZE	PTE_FRAG_SIZE
-#define PMD_TABLE_SIZE	(sizeof(pmd_t) << PMD_INDEX_SIZE)
-#define PUD_TABLE_SIZE	(0)
-#define PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)
-#endif	/* __ASSEMBLY__ */
-
-#define PTRS_PER_PTE	(1 << PTE_INDEX_SIZE)
-#define PTRS_PER_PMD	(1 << PMD_INDEX_SIZE)
-#define PTRS_PER_PGD	(1 << PGD_INDEX_SIZE)
-
-/* PMD_SHIFT determines what a second-level page table entry can map */
-#define PMD_SHIFT	(PAGE_SHIFT + PTE_INDEX_SIZE)
-#define PMD_SIZE	(1UL << PMD_SHIFT)
-#define PMD_MASK	(~(PMD_SIZE-1))
-
-/* PGDIR_SHIFT determines what a third-level page table entry can map */
-#define PGDIR_SHIFT	(PMD_SHIFT + PMD_INDEX_SIZE)
-#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
-#define PGDIR_MASK	(~(PGDIR_SIZE-1))
-
-/*
- * Bits to mask out from a PMD to get to the PTE page
- * PMDs point to PTE table fragments which are PTE_FRAG_SIZE aligned.
- */
-#define PMD_MASKED_BITS		(PTE_FRAG_SIZE - 1)
-/* Bits to mask out from a PGD/PUD to get to the PMD page */
-#define PUD_MASKED_BITS		0x1ff
-
-#define pgd_pte(pgd)	(pud_pte(((pud_t){ pgd })))
-#define pte_pgd(pte)	((pgd_t)pte_pud(pte))
-
-#endif /* _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H */
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 6ac8381f4c7af..de78eda5f8414 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -6,13 +6,13 @@
  * the ppc64 hashed page table.
  */
 
-#ifdef CONFIG_PPC_64K_PAGES
-#include <asm/nohash/64/pgtable-64k.h>
-#else
 #include <asm/nohash/64/pgtable-4k.h>
-#endif
 #include <asm/barrier.h>
 
+#ifdef CONFIG_PPC_64K_PAGES
+#error "Page size not supported"
+#endif
+
 #define FIRST_USER_ADDRESS	0UL
 
 /*

From 702346768c68a95be29f5819e8445a3eff30dac0 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 16 Apr 2018 16:57:19 +0530
Subject: [PATCH 066/251] powerpc/mm/nohash: Remove pte fragment dependency
 from nohash

Now that we have removed 64K page size support, the RCU page table free can
be much simpler for nohash. Make a copy of the the rcu callback to pgalloc.h
header similar to nohash 32. We could possibly merge 32 and 64 bit there. But
that is for a later patch

We also move the book3s specific handler to pgtable_book3s64.c. This will be
updated in a later patch to handle split pmd ptlock.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/nohash/64/pgalloc.h |  57 ++++++++--
 arch/powerpc/mm/pgtable-book3s64.c           | 114 +++++++++++++++++++
 arch/powerpc/mm/pgtable_64.c                 | 114 -------------------
 3 files changed, 159 insertions(+), 126 deletions(-)

diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index a6baf3c13bb57..21624ff1f065b 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -84,6 +84,18 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
+			pgtable_gfp_flags(mm, GFP_KERNEL));
+}
+
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+	kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd);
+}
+
+
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
@@ -118,26 +130,47 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
 	__free_page(ptepage);
 }
 
-extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
+static inline void pgtable_free(void *table, int shift)
+{
+	if (!shift) {
+		pgtable_page_dtor(table);
+		free_page((unsigned long)table);
+	} else {
+		BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+		kmem_cache_free(PGT_CACHE(shift), table);
+	}
+}
+
 #ifdef CONFIG_SMP
-extern void __tlb_remove_table(void *_table);
-#endif
-static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
-				  unsigned long address)
+static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
 {
-	tlb_flush_pgtable(tlb, address);
-	pgtable_free_tlb(tlb, page_address(table), 0);
+	unsigned long pgf = (unsigned long)table;
+
+	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+	pgf |= shift;
+	tlb_remove_table(tlb, (void *)pgf);
 }
 
-static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline void __tlb_remove_table(void *_table)
 {
-	return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
-			pgtable_gfp_flags(mm, GFP_KERNEL));
+	void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+	unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+	pgtable_free(table, shift);
 }
 
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+#else
+static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
 {
-	kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd);
+	pgtable_free(table, shift);
+}
+#endif
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
+				  unsigned long address)
+{
+	tlb_flush_pgtable(tlb, address);
+	pgtable_free_tlb(tlb, page_address(table), 0);
 }
 
 #define __pmd_free_tlb(tlb, pmd, addr)		      \
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index e1c3041831722..e4e1b2d4ca276 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -225,3 +225,117 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
 	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
 }
 EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
+#ifdef CONFIG_PPC_64K_PAGES
+static pte_t *get_pte_from_cache(struct mm_struct *mm)
+{
+	void *pte_frag, *ret;
+
+	spin_lock(&mm->page_table_lock);
+	ret = mm->context.pte_frag;
+	if (ret) {
+		pte_frag = ret + PTE_FRAG_SIZE;
+		/*
+		 * If we have taken up all the fragments mark PTE page NULL
+		 */
+		if (((unsigned long)pte_frag & ~PAGE_MASK) == 0)
+			pte_frag = NULL;
+		mm->context.pte_frag = pte_frag;
+	}
+	spin_unlock(&mm->page_table_lock);
+	return (pte_t *)ret;
+}
+
+static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
+{
+	void *ret = NULL;
+	struct page *page;
+
+	if (!kernel) {
+		page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT);
+		if (!page)
+			return NULL;
+		if (!pgtable_page_ctor(page)) {
+			__free_page(page);
+			return NULL;
+		}
+	} else {
+		page = alloc_page(PGALLOC_GFP);
+		if (!page)
+			return NULL;
+	}
+
+	ret = page_address(page);
+	spin_lock(&mm->page_table_lock);
+	/*
+	 * If we find pgtable_page set, we return
+	 * the allocated page with single fragement
+	 * count.
+	 */
+	if (likely(!mm->context.pte_frag)) {
+		set_page_count(page, PTE_FRAG_NR);
+		mm->context.pte_frag = ret + PTE_FRAG_SIZE;
+	}
+	spin_unlock(&mm->page_table_lock);
+
+	return (pte_t *)ret;
+}
+
+pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
+{
+	pte_t *pte;
+
+	pte = get_pte_from_cache(mm);
+	if (pte)
+		return pte;
+
+	return __alloc_for_ptecache(mm, kernel);
+}
+
+#endif /* CONFIG_PPC_64K_PAGES */
+
+void pte_fragment_free(unsigned long *table, int kernel)
+{
+	struct page *page = virt_to_page(table);
+
+	if (put_page_testzero(page)) {
+		if (!kernel)
+			pgtable_page_dtor(page);
+		free_unref_page(page);
+	}
+}
+
+#ifdef CONFIG_SMP
+void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
+{
+	unsigned long pgf = (unsigned long)table;
+
+	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+	pgf |= shift;
+	tlb_remove_table(tlb, (void *)pgf);
+}
+
+void __tlb_remove_table(void *_table)
+{
+	void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+	unsigned int shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+	if (!shift)
+		/* PTE page needs special handling */
+		pte_fragment_free(table, 0);
+	else {
+		BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+		kmem_cache_free(PGT_CACHE(shift), table);
+	}
+}
+#else
+void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
+{
+	if (!shift) {
+		/* PTE page needs special handling */
+		pte_fragment_free(table, 0);
+	} else {
+		BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+		kmem_cache_free(PGT_CACHE(shift), table);
+	}
+}
+#endif
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 3873f94a3ae9b..f0208f8f9dbb9 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -313,120 +313,6 @@ struct page *pmd_page(pmd_t pmd)
 	return virt_to_page(pmd_page_vaddr(pmd));
 }
 
-#ifdef CONFIG_PPC_64K_PAGES
-static pte_t *get_pte_from_cache(struct mm_struct *mm)
-{
-	void *pte_frag, *ret;
-
-	spin_lock(&mm->page_table_lock);
-	ret = mm->context.pte_frag;
-	if (ret) {
-		pte_frag = ret + PTE_FRAG_SIZE;
-		/*
-		 * If we have taken up all the fragments mark PTE page NULL
-		 */
-		if (((unsigned long)pte_frag & ~PAGE_MASK) == 0)
-			pte_frag = NULL;
-		mm->context.pte_frag = pte_frag;
-	}
-	spin_unlock(&mm->page_table_lock);
-	return (pte_t *)ret;
-}
-
-static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
-{
-	void *ret = NULL;
-	struct page *page;
-
-	if (!kernel) {
-		page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT);
-		if (!page)
-			return NULL;
-		if (!pgtable_page_ctor(page)) {
-			__free_page(page);
-			return NULL;
-		}
-	} else {
-		page = alloc_page(PGALLOC_GFP);
-		if (!page)
-			return NULL;
-	}
-
-	ret = page_address(page);
-	spin_lock(&mm->page_table_lock);
-	/*
-	 * If we find pgtable_page set, we return
-	 * the allocated page with single fragement
-	 * count.
-	 */
-	if (likely(!mm->context.pte_frag)) {
-		set_page_count(page, PTE_FRAG_NR);
-		mm->context.pte_frag = ret + PTE_FRAG_SIZE;
-	}
-	spin_unlock(&mm->page_table_lock);
-
-	return (pte_t *)ret;
-}
-
-pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
-{
-	pte_t *pte;
-
-	pte = get_pte_from_cache(mm);
-	if (pte)
-		return pte;
-
-	return __alloc_for_ptecache(mm, kernel);
-}
-
-#endif /* CONFIG_PPC_64K_PAGES */
-
-void pte_fragment_free(unsigned long *table, int kernel)
-{
-	struct page *page = virt_to_page(table);
-	if (put_page_testzero(page)) {
-		if (!kernel)
-			pgtable_page_dtor(page);
-		free_unref_page(page);
-	}
-}
-
-#ifdef CONFIG_SMP
-void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
-{
-	unsigned long pgf = (unsigned long)table;
-
-	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
-	pgf |= shift;
-	tlb_remove_table(tlb, (void *)pgf);
-}
-
-void __tlb_remove_table(void *_table)
-{
-	void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
-	unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
-
-	if (!shift)
-		/* PTE page needs special handling */
-		pte_fragment_free(table, 0);
-	else {
-		BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
-		kmem_cache_free(PGT_CACHE(shift), table);
-	}
-}
-#else
-void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
-{
-	if (!shift) {
-		/* PTE page needs special handling */
-		pte_fragment_free(table, 0);
-	} else {
-		BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
-		kmem_cache_free(PGT_CACHE(shift), table);
-	}
-}
-#endif
-
 #ifdef CONFIG_STRICT_KERNEL_RWX
 void mark_rodata_ro(void)
 {

From 1c7ec8a40aa7796a5a996bd985899c93dacaaa35 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 16 Apr 2018 16:57:20 +0530
Subject: [PATCH 067/251] powerpc/mm/book3s64/4k: Switch 4k pagesize config to
 use pagetable fragment

4K config use one full page at level 4 of the pagetable. Add support for single
fragment allocation in pagetable fragment code and and use that for 4K config.
This makes both 4k and 64k use the same code path. Later we will switch pmd to
use the page table fragment code. This is done only for 64bit platforms which
is using page table fragment support.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h |  6 +++--
 arch/powerpc/include/asm/book3s/64/mmu.h     |  6 ++---
 arch/powerpc/include/asm/book3s/64/pgalloc.h | 26 --------------------
 arch/powerpc/mm/mmu_context_book3s64.c       | 10 --------
 arch/powerpc/mm/pgtable-book3s64.c           | 11 ++++++---
 5 files changed, 15 insertions(+), 44 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 4b5423030d4bb..00c4db2a7682b 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -38,8 +38,10 @@
 #define H_PAGE_4K_PFN	0x0
 #define H_PAGE_THP_HUGE 0x0
 #define H_PAGE_COMBO	0x0
-#define H_PTE_FRAG_NR	0
-#define H_PTE_FRAG_SIZE_SHIFT  0
+
+/* 8 bytes per each pte entry */
+#define H_PTE_FRAG_SIZE_SHIFT  (H_PTE_INDEX_SIZE + 3)
+#define H_PTE_FRAG_NR	(PAGE_SIZE >> H_PTE_FRAG_SIZE_SHIFT)
 
 /* memory key bits, only 8 keys supported */
 #define H_PTE_PKEY_BIT0	0
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 5094696eecd6d..fde7803a2261c 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -134,10 +134,10 @@ typedef struct {
 #ifdef CONFIG_PPC_SUBPAGE_PROT
 	struct subpage_prot_table spt;
 #endif /* CONFIG_PPC_SUBPAGE_PROT */
-#ifdef CONFIG_PPC_64K_PAGES
-	/* for 4K PTE fragment support */
+	/*
+	 * pagetable fragment support
+	 */
 	void *pte_frag;
-#endif
 #ifdef CONFIG_SPAPR_TCE_IOMMU
 	struct list_head iommu_group_mem_list;
 #endif
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 558a159600adf..826171568192e 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -173,31 +173,6 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
 	return (pgtable_t)pmd_page_vaddr(pmd);
 }
 
-#ifdef CONFIG_PPC_4K_PAGES
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long address)
-{
-	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-				      unsigned long address)
-{
-	struct page *page;
-	pte_t *pte;
-
-	pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT);
-	if (!pte)
-		return NULL;
-	page = virt_to_page(pte);
-	if (!pgtable_page_ctor(page)) {
-		__free_page(page);
-		return NULL;
-	}
-	return pte;
-}
-#else /* if CONFIG_PPC_64K_PAGES */
-
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
@@ -209,7 +184,6 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
 {
 	return (pgtable_t)pte_fragment_alloc(mm, address, 0);
 }
-#endif
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index b75194dff64c2..87ee78973a35f 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -159,9 +159,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 
 	mm->context.id = index;
 
-#ifdef CONFIG_PPC_64K_PAGES
 	mm->context.pte_frag = NULL;
-#endif
 #ifdef CONFIG_SPAPR_TCE_IOMMU
 	mm_iommu_init(mm);
 #endif
@@ -192,7 +190,6 @@ static void destroy_contexts(mm_context_t *ctx)
 	spin_unlock(&mmu_context_lock);
 }
 
-#ifdef CONFIG_PPC_64K_PAGES
 static void destroy_pagetable_page(struct mm_struct *mm)
 {
 	int count;
@@ -213,13 +210,6 @@ static void destroy_pagetable_page(struct mm_struct *mm)
 	}
 }
 
-#else
-static inline void destroy_pagetable_page(struct mm_struct *mm)
-{
-	return;
-}
-#endif
-
 void destroy_context(struct mm_struct *mm)
 {
 #ifdef CONFIG_SPAPR_TCE_IOMMU
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index e4e1b2d4ca276..fc42cccb96c7c 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -225,7 +225,7 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
 	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
 }
 EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
-#ifdef CONFIG_PPC_64K_PAGES
+
 static pte_t *get_pte_from_cache(struct mm_struct *mm)
 {
 	void *pte_frag, *ret;
@@ -264,7 +264,14 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
 			return NULL;
 	}
 
+
 	ret = page_address(page);
+	/*
+	 * if we support only one fragment just return the
+	 * allocated page.
+	 */
+	if (PTE_FRAG_NR == 1)
+		return ret;
 	spin_lock(&mm->page_table_lock);
 	/*
 	 * If we find pgtable_page set, we return
@@ -291,8 +298,6 @@ pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel
 	return __alloc_for_ptecache(mm, kernel);
 }
 
-#endif /* CONFIG_PPC_64K_PAGES */
-
 void pte_fragment_free(unsigned long *table, int kernel)
 {
 	struct page *page = virt_to_page(table);

From 0c4d268029bf3ca78a57d934bdeb43bfdfd4790e Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 16 Apr 2018 16:57:21 +0530
Subject: [PATCH 068/251] powerpc/book3s64/mm: Simplify the rcu callback for
 page table free

Instead of encoding shift in the table address, use an enumerated index value.
This allow us to do different things in the callback for pte and pmd.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/pgalloc.h | 10 ++---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 10 +++++
 arch/powerpc/mm/pgtable-book3s64.c           | 45 +++++++++++---------
 3 files changed, 41 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 826171568192e..ed313b8d3facd 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -124,14 +124,14 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 }
 
 static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
-                                  unsigned long address)
+				  unsigned long address)
 {
 	/*
 	 * By now all the pud entries should be none entries. So go
 	 * ahead and flush the page walk cache
 	 */
 	flush_tlb_pgtable(tlb, address);
-	pgtable_free_tlb(tlb, pud, PUD_CACHE_INDEX);
+	pgtable_free_tlb(tlb, pud, PUD_INDEX);
 }
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
@@ -146,14 +146,14 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 }
 
 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
-                                  unsigned long address)
+				  unsigned long address)
 {
 	/*
 	 * By now all the pud entries should be none entries. So go
 	 * ahead and flush the page walk cache
 	 */
 	flush_tlb_pgtable(tlb, address);
-        return pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX);
+	return pgtable_free_tlb(tlb, pmd, PMD_INDEX);
 }
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
@@ -203,7 +203,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 	 * ahead and flush the page walk cache
 	 */
 	flush_tlb_pgtable(tlb, address);
-	pgtable_free_tlb(tlb, table, 0);
+	pgtable_free_tlb(tlb, table, PTE_INDEX);
 }
 
 #define check_pgt_cache()	do { } while (0)
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 47b5ffc8715d9..9462bc18806c5 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -273,6 +273,16 @@ extern unsigned long __pte_frag_size_shift;
 /* Bits to mask out from a PGD to get to the PUD page */
 #define PGD_MASKED_BITS		0xc0000000000000ffUL
 
+/*
+ * Used as an indicator for rcu callback functions
+ */
+enum pgtable_index {
+	PTE_INDEX = 0,
+	PMD_INDEX,
+	PUD_INDEX,
+	PGD_INDEX,
+};
+
 extern unsigned long __vmalloc_start;
 extern unsigned long __vmalloc_end;
 #define VMALLOC_START	__vmalloc_start
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index fc42cccb96c7c..0a05e99b54a1d 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -309,38 +309,45 @@ void pte_fragment_free(unsigned long *table, int kernel)
 	}
 }
 
+static inline void pgtable_free(void *table, int index)
+{
+	switch (index) {
+	case PTE_INDEX:
+		pte_fragment_free(table, 0);
+		break;
+	case PMD_INDEX:
+		kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), table);
+		break;
+	case PUD_INDEX:
+		kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), table);
+		break;
+		/* We don't free pgd table via RCU callback */
+	default:
+		BUG();
+	}
+}
+
 #ifdef CONFIG_SMP
-void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
+void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index)
 {
 	unsigned long pgf = (unsigned long)table;
 
-	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
-	pgf |= shift;
+	BUG_ON(index > MAX_PGTABLE_INDEX_SIZE);
+	pgf |= index;
 	tlb_remove_table(tlb, (void *)pgf);
 }
 
 void __tlb_remove_table(void *_table)
 {
 	void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
-	unsigned int shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+	unsigned int index = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
 
-	if (!shift)
-		/* PTE page needs special handling */
-		pte_fragment_free(table, 0);
-	else {
-		BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
-		kmem_cache_free(PGT_CACHE(shift), table);
-	}
+	return pgtable_free(table, index);
 }
 #else
-void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
+void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index)
 {
-	if (!shift) {
-		/* PTE page needs special handling */
-		pte_fragment_free(table, 0);
-	} else {
-		BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
-		kmem_cache_free(PGT_CACHE(shift), table);
-	}
+
+	return pgtable_free(table, index);
 }
 #endif

From 8a6c697b993974eeb05f540fb0ff2515346dbefd Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 16 Apr 2018 16:57:22 +0530
Subject: [PATCH 069/251] powerpc/mm: Implement helpers for pagetable fragment
 support at PMD level

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h  |  2 +
 arch/powerpc/include/asm/book3s/64/hash-64k.h |  7 ++
 arch/powerpc/include/asm/book3s/64/mmu.h      |  1 +
 arch/powerpc/include/asm/book3s/64/pgalloc.h  |  2 +
 arch/powerpc/include/asm/book3s/64/pgtable.h  |  6 ++
 arch/powerpc/include/asm/book3s/64/radix-4k.h |  3 +
 .../powerpc/include/asm/book3s/64/radix-64k.h |  4 +
 arch/powerpc/mm/hash_utils_64.c               |  2 +
 arch/powerpc/mm/mmu_context_book3s64.c        | 37 ++++++--
 arch/powerpc/mm/pgtable-book3s64.c            | 84 +++++++++++++++++++
 arch/powerpc/mm/pgtable-radix.c               |  2 +
 11 files changed, 144 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 00c4db2a7682b..9a3798660cefa 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -42,6 +42,8 @@
 /* 8 bytes per each pte entry */
 #define H_PTE_FRAG_SIZE_SHIFT  (H_PTE_INDEX_SIZE + 3)
 #define H_PTE_FRAG_NR	(PAGE_SIZE >> H_PTE_FRAG_SIZE_SHIFT)
+#define H_PMD_FRAG_SIZE_SHIFT  (H_PMD_INDEX_SIZE + 3)
+#define H_PMD_FRAG_NR	(PAGE_SIZE >> H_PMD_FRAG_SIZE_SHIFT)
 
 /* memory key bits, only 8 keys supported */
 #define H_PTE_PKEY_BIT0	0
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index cc82745355b35..c81793d47af9a 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -46,6 +46,13 @@
 #define H_PTE_FRAG_SIZE_SHIFT  (H_PTE_INDEX_SIZE + 3 + 1)
 #define H_PTE_FRAG_NR	(PAGE_SIZE >> H_PTE_FRAG_SIZE_SHIFT)
 
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
+#define H_PMD_FRAG_SIZE_SHIFT  (H_PMD_INDEX_SIZE + 3 + 1)
+#else
+#define H_PMD_FRAG_SIZE_SHIFT  (H_PMD_INDEX_SIZE + 3)
+#endif
+#define H_PMD_FRAG_NR	(PAGE_SIZE >> H_PMD_FRAG_SIZE_SHIFT)
+
 #ifndef __ASSEMBLY__
 #include <asm/errno.h>
 
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index fde7803a2261c..9c8c669a6b6a3 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -138,6 +138,7 @@ typedef struct {
 	 * pagetable fragment support
 	 */
 	void *pte_frag;
+	void *pmd_frag;
 #ifdef CONFIG_SPAPR_TCE_IOMMU
 	struct list_head iommu_group_mem_list;
 #endif
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index ed313b8d3facd..005f400cbf308 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -42,7 +42,9 @@ extern struct kmem_cache *pgtable_cache[];
 		})
 
 extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
+extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
 extern void pte_fragment_free(unsigned long *, int);
+extern void pmd_fragment_free(unsigned long *);
 extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
 #ifdef CONFIG_SMP
 extern void __tlb_remove_table(void *_table);
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 9462bc18806c5..c9db19512b3cb 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -246,6 +246,12 @@ extern unsigned long __pte_frag_size_shift;
 #define PTE_FRAG_SIZE_SHIFT __pte_frag_size_shift
 #define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
 
+extern unsigned long __pmd_frag_nr;
+#define PMD_FRAG_NR __pmd_frag_nr
+extern unsigned long __pmd_frag_size_shift;
+#define PMD_FRAG_SIZE_SHIFT __pmd_frag_size_shift
+#define PMD_FRAG_SIZE (1UL << PMD_FRAG_SIZE_SHIFT)
+
 #define PTRS_PER_PTE	(1 << PTE_INDEX_SIZE)
 #define PTRS_PER_PMD	(1 << PMD_INDEX_SIZE)
 #define PTRS_PER_PUD	(1 << PUD_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/book3s/64/radix-4k.h b/arch/powerpc/include/asm/book3s/64/radix-4k.h
index ca366ec863103..863c3e8286fbf 100644
--- a/arch/powerpc/include/asm/book3s/64/radix-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/radix-4k.h
@@ -15,4 +15,7 @@
 #define RADIX_PTE_FRAG_SIZE_SHIFT  (RADIX_PTE_INDEX_SIZE + 3)
 #define RADIX_PTE_FRAG_NR	(PAGE_SIZE >> RADIX_PTE_FRAG_SIZE_SHIFT)
 
+#define RADIX_PMD_FRAG_SIZE_SHIFT  (RADIX_PMD_INDEX_SIZE + 3)
+#define RADIX_PMD_FRAG_NR	(PAGE_SIZE >> RADIX_PMD_FRAG_SIZE_SHIFT)
+
 #endif /* _ASM_POWERPC_PGTABLE_RADIX_4K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/radix-64k.h b/arch/powerpc/include/asm/book3s/64/radix-64k.h
index 830082496876d..ccb78ca9d0c54 100644
--- a/arch/powerpc/include/asm/book3s/64/radix-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/radix-64k.h
@@ -16,4 +16,8 @@
  */
 #define RADIX_PTE_FRAG_SIZE_SHIFT  (RADIX_PTE_INDEX_SIZE + 3)
 #define RADIX_PTE_FRAG_NR	(PAGE_SIZE >> RADIX_PTE_FRAG_SIZE_SHIFT)
+
+#define RADIX_PMD_FRAG_SIZE_SHIFT  (RADIX_PMD_INDEX_SIZE + 3)
+#define RADIX_PMD_FRAG_NR	(PAGE_SIZE >> RADIX_PMD_FRAG_SIZE_SHIFT)
+
 #endif /* _ASM_POWERPC_PGTABLE_RADIX_64K_H */
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 5beeec6fbb9be..2fb1c8df05e68 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1012,6 +1012,8 @@ void __init hash__early_init_mmu(void)
 	 */
 	__pte_frag_nr = H_PTE_FRAG_NR;
 	__pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT;
+	__pmd_frag_nr = H_PMD_FRAG_NR;
+	__pmd_frag_size_shift = H_PMD_FRAG_SIZE_SHIFT;
 
 	__pte_index_size = H_PTE_INDEX_SIZE;
 	__pmd_index_size = H_PMD_INDEX_SIZE;
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 87ee78973a35f..f3d4b4a0e5616 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -160,6 +160,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 	mm->context.id = index;
 
 	mm->context.pte_frag = NULL;
+	mm->context.pmd_frag = NULL;
 #ifdef CONFIG_SPAPR_TCE_IOMMU
 	mm_iommu_init(mm);
 #endif
@@ -190,16 +191,11 @@ static void destroy_contexts(mm_context_t *ctx)
 	spin_unlock(&mmu_context_lock);
 }
 
-static void destroy_pagetable_page(struct mm_struct *mm)
+static void pte_frag_destroy(void *pte_frag)
 {
 	int count;
-	void *pte_frag;
 	struct page *page;
 
-	pte_frag = mm->context.pte_frag;
-	if (!pte_frag)
-		return;
-
 	page = virt_to_page(pte_frag);
 	/* drop all the pending references */
 	count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
@@ -210,6 +206,35 @@ static void destroy_pagetable_page(struct mm_struct *mm)
 	}
 }
 
+static void pmd_frag_destroy(void *pmd_frag)
+{
+	int count;
+	struct page *page;
+
+	page = virt_to_page(pmd_frag);
+	/* drop all the pending references */
+	count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT;
+	/* We allow PTE_FRAG_NR fragments from a PTE page */
+	if (page_ref_sub_and_test(page, PMD_FRAG_NR - count)) {
+		pgtable_pmd_page_dtor(page);
+		free_unref_page(page);
+	}
+}
+
+static void destroy_pagetable_page(struct mm_struct *mm)
+{
+	void *frag;
+
+	frag = mm->context.pte_frag;
+	if (frag)
+		pte_frag_destroy(frag);
+
+	frag = mm->context.pmd_frag;
+	if (frag)
+		pmd_frag_destroy(frag);
+	return;
+}
+
 void destroy_context(struct mm_struct *mm)
 {
 #ifdef CONFIG_SPAPR_TCE_IOMMU
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 0a05e99b54a1d..47323ed8d7b54 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -20,6 +20,11 @@
 #include "mmu_decl.h"
 #include <trace/events/thp.h>
 
+unsigned long __pmd_frag_nr;
+EXPORT_SYMBOL(__pmd_frag_nr);
+unsigned long __pmd_frag_size_shift;
+EXPORT_SYMBOL(__pmd_frag_size_shift);
+
 int (*register_process_table)(unsigned long base, unsigned long page_size,
 			      unsigned long tbl_size);
 
@@ -226,6 +231,85 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
 }
 EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
 
+static pmd_t *get_pmd_from_cache(struct mm_struct *mm)
+{
+	void *pmd_frag, *ret;
+
+	spin_lock(&mm->page_table_lock);
+	ret = mm->context.pmd_frag;
+	if (ret) {
+		pmd_frag = ret + PMD_FRAG_SIZE;
+		/*
+		 * If we have taken up all the fragments mark PTE page NULL
+		 */
+		if (((unsigned long)pmd_frag & ~PAGE_MASK) == 0)
+			pmd_frag = NULL;
+		mm->context.pmd_frag = pmd_frag;
+	}
+	spin_unlock(&mm->page_table_lock);
+	return (pmd_t *)ret;
+}
+
+static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
+{
+	void *ret = NULL;
+	struct page *page;
+	gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO;
+
+	if (mm == &init_mm)
+		gfp &= ~__GFP_ACCOUNT;
+	page = alloc_page(gfp);
+	if (!page)
+		return NULL;
+	if (!pgtable_pmd_page_ctor(page)) {
+		__free_pages(page, 0);
+		return NULL;
+	}
+
+	ret = page_address(page);
+	/*
+	 * if we support only one fragment just return the
+	 * allocated page.
+	 */
+	if (PMD_FRAG_NR == 1)
+		return ret;
+
+	spin_lock(&mm->page_table_lock);
+	/*
+	 * If we find pgtable_page set, we return
+	 * the allocated page with single fragement
+	 * count.
+	 */
+	if (likely(!mm->context.pmd_frag)) {
+		set_page_count(page, PMD_FRAG_NR);
+		mm->context.pmd_frag = ret + PMD_FRAG_SIZE;
+	}
+	spin_unlock(&mm->page_table_lock);
+
+	return (pmd_t *)ret;
+}
+
+pmd_t *pmd_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr)
+{
+	pmd_t *pmd;
+
+	pmd = get_pmd_from_cache(mm);
+	if (pmd)
+		return pmd;
+
+	return __alloc_for_pmdcache(mm);
+}
+
+void pmd_fragment_free(unsigned long *pmd)
+{
+	struct page *page = virt_to_page(pmd);
+
+	if (put_page_testzero(page)) {
+		pgtable_pmd_page_dtor(page);
+		free_unref_page(page);
+	}
+}
+
 static pte_t *get_pte_from_cache(struct mm_struct *mm)
 {
 	void *pte_frag, *ret;
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 473415750cbff..32e58024e7cbc 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -640,6 +640,8 @@ void __init radix__early_init_mmu(void)
 #endif
 	__pte_frag_nr = RADIX_PTE_FRAG_NR;
 	__pte_frag_size_shift = RADIX_PTE_FRAG_SIZE_SHIFT;
+	__pmd_frag_nr = RADIX_PMD_FRAG_NR;
+	__pmd_frag_size_shift = RADIX_PMD_FRAG_SIZE_SHIFT;
 
 	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
 		radix_init_native();

From 738f9645550ea6887f119e44dc479821e44571bd Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 16 Apr 2018 16:57:23 +0530
Subject: [PATCH 070/251] powerpc/mm: Use page fragments for allocation page
 table at PMD level

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/hash.h    | 10 ----------
 arch/powerpc/include/asm/book3s/64/pgalloc.h |  8 +++-----
 arch/powerpc/include/asm/book3s/64/pgtable.h |  4 ++--
 arch/powerpc/mm/hash_utils_64.c              |  1 -
 arch/powerpc/mm/pgtable-book3s64.c           |  3 +--
 arch/powerpc/mm/pgtable-radix.c              |  1 -
 arch/powerpc/mm/pgtable_64.c                 |  2 --
 7 files changed, 6 insertions(+), 23 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index cc8cd656ccfe9..0387b155f13de 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -23,16 +23,6 @@
 				 H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)
 #define H_PGTABLE_RANGE		(ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
 
-#if (defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)) && \
-	defined(CONFIG_PPC_64K_PAGES)
-/*
- * only with hash 64k we need to use the second half of pmd page table
- * to store pointer to deposited pgtable_t
- */
-#define H_PMD_CACHE_INDEX	(H_PMD_INDEX_SIZE + 1)
-#else
-#define H_PMD_CACHE_INDEX	H_PMD_INDEX_SIZE
-#endif
 /*
  * We store the slot details in the second half of page table.
  * Increase the pud level table so that hugetlb ptes can be stored
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 005f400cbf308..01ee40f11f3a1 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -90,8 +90,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 	 * need to do this for 4k.
 	 */
 #if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_64K_PAGES) && \
-	((H_PGD_INDEX_SIZE == H_PUD_CACHE_INDEX) ||		     \
-	 (H_PGD_INDEX_SIZE == H_PMD_CACHE_INDEX))
+	(H_PGD_INDEX_SIZE == H_PUD_CACHE_INDEX)
 	memset(pgd, 0, PGD_TABLE_SIZE);
 #endif
 	return pgd;
@@ -138,13 +137,12 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
-		pgtable_gfp_flags(mm, GFP_KERNEL));
+	return pmd_fragment_alloc(mm, addr);
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
-	kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd);
+	pmd_fragment_free((unsigned long *)pmd);
 }
 
 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index c9db19512b3cb..c233915abb68e 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -212,13 +212,13 @@ extern unsigned long __pte_index_size;
 extern unsigned long __pmd_index_size;
 extern unsigned long __pud_index_size;
 extern unsigned long __pgd_index_size;
-extern unsigned long __pmd_cache_index;
 extern unsigned long __pud_cache_index;
 #define PTE_INDEX_SIZE  __pte_index_size
 #define PMD_INDEX_SIZE  __pmd_index_size
 #define PUD_INDEX_SIZE  __pud_index_size
 #define PGD_INDEX_SIZE  __pgd_index_size
-#define PMD_CACHE_INDEX __pmd_cache_index
+/* pmd table use page table fragments */
+#define PMD_CACHE_INDEX  0
 #define PUD_CACHE_INDEX __pud_cache_index
 /*
  * Because of use of pte fragments and THP, size of page table
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 2fb1c8df05e68..3dc3dd3ea0b68 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1020,7 +1020,6 @@ void __init hash__early_init_mmu(void)
 	__pud_index_size = H_PUD_INDEX_SIZE;
 	__pgd_index_size = H_PGD_INDEX_SIZE;
 	__pud_cache_index = H_PUD_CACHE_INDEX;
-	__pmd_cache_index = H_PMD_CACHE_INDEX;
 	__pte_table_size = H_PTE_TABLE_SIZE;
 	__pmd_table_size = H_PMD_TABLE_SIZE;
 	__pud_table_size = H_PUD_TABLE_SIZE;
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 47323ed8d7b54..abda2b92f1baa 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -400,7 +400,7 @@ static inline void pgtable_free(void *table, int index)
 		pte_fragment_free(table, 0);
 		break;
 	case PMD_INDEX:
-		kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), table);
+		pmd_fragment_free(table);
 		break;
 	case PUD_INDEX:
 		kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), table);
@@ -431,7 +431,6 @@ void __tlb_remove_table(void *_table)
 #else
 void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index)
 {
-
 	return pgtable_free(table, index);
 }
 #endif
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 32e58024e7cbc..ce24d72ea6799 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -617,7 +617,6 @@ void __init radix__early_init_mmu(void)
 	__pud_index_size = RADIX_PUD_INDEX_SIZE;
 	__pgd_index_size = RADIX_PGD_INDEX_SIZE;
 	__pud_cache_index = RADIX_PUD_INDEX_SIZE;
-	__pmd_cache_index = RADIX_PMD_INDEX_SIZE;
 	__pte_table_size = RADIX_PTE_TABLE_SIZE;
 	__pmd_table_size = RADIX_PMD_TABLE_SIZE;
 	__pud_table_size = RADIX_PUD_TABLE_SIZE;
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index f0208f8f9dbb9..53e9eeecd5d44 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -72,8 +72,6 @@ unsigned long __pud_index_size;
 EXPORT_SYMBOL(__pud_index_size);
 unsigned long __pgd_index_size;
 EXPORT_SYMBOL(__pgd_index_size);
-unsigned long __pmd_cache_index;
-EXPORT_SYMBOL(__pmd_cache_index);
 unsigned long __pud_cache_index;
 EXPORT_SYMBOL(__pud_cache_index);
 unsigned long __pte_table_size;

From 675d995297d42f69484100516cd30a71d25f4c7c Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 16 Apr 2018 16:57:24 +0530
Subject: [PATCH 071/251] powerpc/book3s64: Enable split pmd ptlock.

Testing with a threaded version of mmap_bench which allocate 1G chunks and
with large number of threads we find:

without patch

    32.72%  mmap_bench  [kernel.vmlinux]            [k] do_raw_spin_lock
            |
            ---do_raw_spin_lock
               |
                --32.68%--0
                          |
                          |--15.82%--pte_fragment_alloc
                          |          |
                          |           --15.79%--do_huge_pmd_anonymous_page
                          |                     __handle_mm_fault
                          |                     handle_mm_fault
                          |                     __do_page_fault
                          |                     handle_page_fault
                          |                     test_mmap
                          |                     test_mmap
                          |                     start_thread
                          |                     __clone
                          |
                          |--14.95%--do_huge_pmd_anonymous_page
                          |          __handle_mm_fault
                          |          handle_mm_fault
                          |          __do_page_fault
                          |          handle_page_fault
                          |          test_mmap
                          |          test_mmap
                          |          start_thread
                          |          __clone
                          |

with patch

    12.89%  mmap_bench  [kernel.vmlinux]            [k] do_raw_spin_lock
            |
            ---do_raw_spin_lock
               |
                --12.83%--0
                          |
                          |--3.21%--pagevec_lru_move_fn
                          |          __lru_cache_add
                          |          |
                          |           --2.74%--do_huge_pmd_anonymous_page
                          |                     __handle_mm_fault
                          |                     handle_mm_fault
                          |                     __do_page_fault
                          |                     handle_page_fault
                          |                     test_mmap
                          |                     test_mmap
                          |                     start_thread
                          |                     __clone
                          |
                          |--3.11%--do_huge_pmd_anonymous_page
                          |          __handle_mm_fault
                          |          handle_mm_fault
                          |          __do_page_fault
                          |          handle_page_fault
                          |          test_mmap
                          |          test_mmap
                          |          start_thread
                          |          __clone

.....
                          |
                           --0.55%--pte_fragment_alloc
                                     |
                                      --0.55%--do_huge_pmd_anonymous_page
                                                __handle_mm_fault
                                                handle_mm_fault
                                                __do_page_fault
                                                handle_page_fault
                                                test_mmap
                                                test_mmap
                                                start_thread
                                                __clone

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/Kconfig.cputype | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 67d3125d06100..cc892dcfa1145 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -292,6 +292,10 @@ config PPC_STD_MMU_32
 	def_bool y
 	depends on PPC_STD_MMU && PPC32
 
+config ARCH_ENABLE_SPLIT_PMD_PTLOCK
+	def_bool y
+	depends on PPC_BOOK3S_64
+
 config PPC_RADIX_MMU
 	bool "Radix MMU Support"
 	depends on PPC_BOOK3S_64

From c068e6b8caa0c796535cb12f64767cc521b20508 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Thu, 10 May 2018 23:59:18 +0200
Subject: [PATCH 072/251] powerpc/embedded6xx/flipper-pic: Don't match all IRQ
 domains
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On the Wii, there is a secondary IRQ controller (hlwd-pic), so
flipper-pic's match operation should not be hardcoded to return 1.
In fact, the default matching logic is sufficient, and we can completely
omit flipper_pic_match.

Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/embedded6xx/flipper-pic.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
index 7206f3f573d45..db0be007fd063 100644
--- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
@@ -108,16 +108,8 @@ static int flipper_pic_map(struct irq_domain *h, unsigned int virq,
 	return 0;
 }
 
-static int flipper_pic_match(struct irq_domain *h, struct device_node *np,
-			     enum irq_domain_bus_token bus_token)
-{
-	return 1;
-}
-
-
 static const struct irq_domain_ops flipper_irq_domain_ops = {
 	.map = flipper_pic_map,
-	.match = flipper_pic_match,
 };
 
 /*

From 9dcb3df4281876731e4e8bff7940514d72375154 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Thu, 10 May 2018 23:59:19 +0200
Subject: [PATCH 073/251] powerpc/embedded6xx/hlwd-pic: Prevent interrupts from
 being handled by Starlet
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The interrupt controller inside the Wii's Hollywood chip is connected to
two masters, the "Broadway" PowerPC and the "Starlet" ARM926, each with
their own interrupt status and mask registers.

When booting the Wii with mini[1], interrupts from the SD card
controller (IRQ 7) are handled by the ARM, because mini provides SD
access over IPC. Linux however can't currently use or disable this IPC
service, so both sides try to handle IRQ 7 without coordination.

Let's instead make sure that all interrupts that are unmasked on the PPC
side are masked on the ARM side; this will also make sure that Linux can
properly talk to the SD card controller (and potentially other devices).

If access to a device through IPC is desired in the future, interrupts
from that device should not be handled by Linux directly.

[1]: https://github.com/lewurm/mini

Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/embedded6xx/hlwd-pic.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
index 89c54de88b7a0..bf4a125faec66 100644
--- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
@@ -35,6 +35,8 @@
  */
 #define HW_BROADWAY_ICR		0x00
 #define HW_BROADWAY_IMR		0x04
+#define HW_STARLET_ICR		0x08
+#define HW_STARLET_IMR		0x0c
 
 
 /*
@@ -74,6 +76,9 @@ static void hlwd_pic_unmask(struct irq_data *d)
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 
 	setbits32(io_base + HW_BROADWAY_IMR, 1 << irq);
+
+	/* Make sure the ARM (aka. Starlet) doesn't handle this interrupt. */
+	clrbits32(io_base + HW_STARLET_IMR, 1 << irq);
 }
 
 

From 0078778a86b14f85bf50e96d9ddeb3b70b55805d Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 9 May 2018 12:20:18 +1000
Subject: [PATCH 074/251] powerpc/mm/radix: implement LPID based TLB flushes to
 be used by KVM

Implement a local TLB flush for invalidating an LPID with variants for
process or partition scope. And a global TLB flush for invalidating
a partition scoped page of an LPID.

These will be used by KVM in subsequent patches.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 .../include/asm/book3s/64/tlbflush-radix.h    |   7 +
 arch/powerpc/mm/tlb-radix.c                   | 207 ++++++++++++++++++
 2 files changed, 214 insertions(+)

diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 19b45ba6caf91..ef5c3f2994c93 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -51,4 +51,11 @@ extern void radix__flush_tlb_all(void);
 extern void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
 					unsigned long address);
 
+extern void radix__flush_tlb_lpid_page(unsigned int lpid,
+					unsigned long addr,
+					unsigned long page_size);
+extern void radix__flush_pwc_lpid(unsigned int lpid);
+extern void radix__local_flush_tlb_lpid(unsigned int lpid);
+extern void radix__local_flush_tlb_lpid_guest(unsigned int lpid);
+
 #endif
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index a5d7309c2d055..5ac3206c51ccf 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -118,6 +118,53 @@ static inline void __tlbie_pid(unsigned long pid, unsigned long ric)
 	trace_tlbie(0, 0, rb, rs, ric, prs, r);
 }
 
+static inline void __tlbiel_lpid(unsigned long lpid, int set,
+				unsigned long ric)
+{
+	unsigned long rb,rs,prs,r;
+
+	rb = PPC_BIT(52); /* IS = 2 */
+	rb |= set << PPC_BITLSHIFT(51);
+	rs = 0;  /* LPID comes from LPIDR */
+	prs = 0; /* partition scoped */
+	r = 1;   /* radix format */
+
+	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	trace_tlbie(lpid, 1, rb, rs, ric, prs, r);
+}
+
+static inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
+{
+	unsigned long rb,rs,prs,r;
+
+	rb = PPC_BIT(52); /* IS = 2 */
+	rs = lpid;
+	prs = 0; /* partition scoped */
+	r = 1;   /* radix format */
+
+	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
+}
+
+static inline void __tlbiel_lpid_guest(unsigned long lpid, int set,
+				unsigned long ric)
+{
+	unsigned long rb,rs,prs,r;
+
+	rb = PPC_BIT(52); /* IS = 2 */
+	rb |= set << PPC_BITLSHIFT(51);
+	rs = 0;  /* LPID comes from LPIDR */
+	prs = 1; /* process scoped */
+	r = 1;   /* radix format */
+
+	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	trace_tlbie(lpid, 1, rb, rs, ric, prs, r);
+}
+
+
 static inline void __tlbiel_va(unsigned long va, unsigned long pid,
 			       unsigned long ap, unsigned long ric)
 {
@@ -150,6 +197,22 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid,
 	trace_tlbie(0, 0, rb, rs, ric, prs, r);
 }
 
+static inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
+			      unsigned long ap, unsigned long ric)
+{
+	unsigned long rb,rs,prs,r;
+
+	rb = va & ~(PPC_BITMASK(52, 63));
+	rb |= ap << PPC_BITLSHIFT(58);
+	rs = lpid;
+	prs = 0; /* partition scoped */
+	r = 1;   /* radix format */
+
+	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
+}
+
 static inline void fixup_tlbie(void)
 {
 	unsigned long pid = 0;
@@ -161,6 +224,16 @@ static inline void fixup_tlbie(void)
 	}
 }
 
+static inline void fixup_tlbie_lpid(unsigned long lpid)
+{
+	unsigned long va = ((1UL << 52) - 1);
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
+		asm volatile("ptesync": : :"memory");
+		__tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
+	}
+}
+
 /*
  * We use 128 set in radix mode and 256 set in hpt mode.
  */
@@ -214,6 +287,86 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
 
+static inline void _tlbiel_lpid(unsigned long lpid, unsigned long ric)
+{
+	int set;
+
+	VM_BUG_ON(mfspr(SPRN_LPID) != lpid);
+
+	asm volatile("ptesync": : :"memory");
+
+	/*
+	 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
+	 * also flush the entire Page Walk Cache.
+	 */
+	__tlbiel_lpid(lpid, 0, ric);
+
+	/* For PWC, only one flush is needed */
+	if (ric == RIC_FLUSH_PWC) {
+		asm volatile("ptesync": : :"memory");
+		return;
+	}
+
+	/* For the remaining sets, just flush the TLB */
+	for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
+		__tlbiel_lpid(lpid, set, RIC_FLUSH_TLB);
+
+	asm volatile("ptesync": : :"memory");
+	asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
+}
+
+static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
+{
+	asm volatile("ptesync": : :"memory");
+
+	/*
+	 * Workaround the fact that the "ric" argument to __tlbie_pid
+	 * must be a compile-time contraint to match the "i" constraint
+	 * in the asm statement.
+	 */
+	switch (ric) {
+	case RIC_FLUSH_TLB:
+		__tlbie_lpid(lpid, RIC_FLUSH_TLB);
+		break;
+	case RIC_FLUSH_PWC:
+		__tlbie_lpid(lpid, RIC_FLUSH_PWC);
+		break;
+	case RIC_FLUSH_ALL:
+	default:
+		__tlbie_lpid(lpid, RIC_FLUSH_ALL);
+	}
+	fixup_tlbie_lpid(lpid);
+	asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
+static inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric)
+{
+	int set;
+
+	VM_BUG_ON(mfspr(SPRN_LPID) != lpid);
+
+	asm volatile("ptesync": : :"memory");
+
+	/*
+	 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
+	 * also flush the entire Page Walk Cache.
+	 */
+	__tlbiel_lpid_guest(lpid, 0, ric);
+
+	/* For PWC, only one flush is needed */
+	if (ric == RIC_FLUSH_PWC) {
+		asm volatile("ptesync": : :"memory");
+		return;
+	}
+
+	/* For the remaining sets, just flush the TLB */
+	for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
+		__tlbiel_lpid_guest(lpid, set, RIC_FLUSH_TLB);
+
+	asm volatile("ptesync": : :"memory");
+}
+
+
 static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
 				    unsigned long pid, unsigned long page_size,
 				    unsigned long psize)
@@ -268,6 +421,17 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid,
 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
 
+static inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
+			      unsigned long psize, unsigned long ric)
+{
+	unsigned long ap = mmu_get_ap(psize);
+
+	asm volatile("ptesync": : :"memory");
+	__tlbie_lpid_va(va, lpid, ap, ric);
+	fixup_tlbie_lpid(lpid);
+	asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
 static inline void _tlbie_va_range(unsigned long start, unsigned long end,
 				    unsigned long pid, unsigned long page_size,
 				    unsigned long psize, bool also_pwc)
@@ -534,6 +698,49 @@ static int radix_get_mmu_psize(int page_size)
 	return psize;
 }
 
+/*
+ * Flush partition scoped LPID address translation for all CPUs.
+ */
+void radix__flush_tlb_lpid_page(unsigned int lpid,
+					unsigned long addr,
+					unsigned long page_size)
+{
+	int psize = radix_get_mmu_psize(page_size);
+
+	_tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB);
+}
+EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page);
+
+/*
+ * Flush partition scoped PWC from LPID for all CPUs.
+ */
+void radix__flush_pwc_lpid(unsigned int lpid)
+{
+	_tlbie_lpid(lpid, RIC_FLUSH_PWC);
+}
+EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
+
+/*
+ * Flush partition scoped translations from LPID (=LPIDR)
+ */
+void radix__local_flush_tlb_lpid(unsigned int lpid)
+{
+	_tlbiel_lpid(lpid, RIC_FLUSH_ALL);
+}
+EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid);
+
+/*
+ * Flush process scoped translations from LPID (=LPIDR).
+ * Important difference, the guest normally manages its own translations,
+ * but some cases e.g., vCPU CPU migration require KVM to flush.
+ */
+void radix__local_flush_tlb_lpid_guest(unsigned int lpid)
+{
+	_tlbiel_lpid_guest(lpid, RIC_FLUSH_ALL);
+}
+EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid_guest);
+
+
 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
 				  unsigned long end, int psize);
 

From 9f9eae5ce717f497812dfc1bda5219bc589b455d Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Wed, 28 Mar 2018 21:58:11 +0200
Subject: [PATCH 075/251] powerpc/kvm: Prefer fault_in_pages_readable function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Directly use fault_in_pages_readable instead of manual __get_user code. Fix
warning treated as error with W=1:

  arch/powerpc/kernel/kvm.c:675:6: error: variable ‘tmp’ set but not used [-Werror=unused-but-set-variable]

Suggested-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Mathieu Malaterre <malat@debian.org>
Reviewed-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/kvm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 9ad37f827a975..683b5b3805bd1 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -25,6 +25,7 @@
 #include <linux/kvm_para.h>
 #include <linux/slab.h>
 #include <linux/of.h>
+#include <linux/pagemap.h>
 
 #include <asm/reg.h>
 #include <asm/sections.h>
@@ -672,14 +673,13 @@ static void kvm_use_magic_page(void)
 {
 	u32 *p;
 	u32 *start, *end;
-	u32 tmp;
 	u32 features;
 
 	/* Tell the host to map the magic page to -4096 on all CPUs */
 	on_each_cpu(kvm_map_magic_page, &features, 1);
 
 	/* Quick self-test to see if the mapping works */
-	if (__get_user(tmp, (u32*)KVM_MAGIC_PAGE)) {
+	if (!fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) {
 		kvm_patching_worked = false;
 		return;
 	}

From 5279821a6f5ff75d7dce632e49b9fbca5a2b98da Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Wed, 16 May 2018 08:58:57 +0200
Subject: [PATCH 076/251] powerpc: get rid of PMD_PAGE_SIZE() and _PMD_SIZE

PMD_PAGE_SIZE() is nowhere used and _PMD_SIZE is only
used by PMD_PAGE_SIZE().

This patch removes them.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/nohash/32/pte-40x.h | 3 ---
 arch/powerpc/include/asm/pte-common.h        | 8 --------
 2 files changed, 11 deletions(-)

diff --git a/arch/powerpc/include/asm/nohash/32/pte-40x.h b/arch/powerpc/include/asm/nohash/32/pte-40x.h
index 124f9ac23a1e6..bb4b3a4b92a07 100644
--- a/arch/powerpc/include/asm/nohash/32/pte-40x.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-40x.h
@@ -52,12 +52,9 @@
 
 #define _PMD_PRESENT	0x400	/* PMD points to page of PTEs */
 #define _PMD_BAD	0x802
-#define _PMD_SIZE	0x0e0	/* size field, != 0 for large-page PMD entry */
 #define _PMD_SIZE_4M	0x0c0
 #define _PMD_SIZE_16M	0x0e0
 
-#define PMD_PAGE_SIZE(pmdval)	(1024 << (((pmdval) & _PMD_SIZE) >> 4))
-
 /* Until my rework is finished, 40x still needs atomic PTE updates */
 #define PTE_ATOMIC_UPDATES	1
 
diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h
index c4a72c7a8c831..050b0d7753245 100644
--- a/arch/powerpc/include/asm/pte-common.h
+++ b/arch/powerpc/include/asm/pte-common.h
@@ -60,10 +60,6 @@
 #ifndef _PMD_PRESENT_MASK
 #define _PMD_PRESENT_MASK	_PMD_PRESENT
 #endif
-#ifndef _PMD_SIZE
-#define _PMD_SIZE	0
-#define PMD_PAGE_SIZE(pmd)	bad_call_to_PMD_PAGE_SIZE()
-#endif
 #ifndef _PMD_USER
 #define _PMD_USER	0
 #endif
@@ -88,11 +84,7 @@
 #define _PTE_NONE_MASK	_PAGE_HPTEFLAGS
 #endif
 
-/* Make sure we get a link error if PMD_PAGE_SIZE is ever called on a
- * kernel without large page PMD support
- */
 #ifndef __ASSEMBLY__
-extern unsigned long bad_call_to_PMD_PAGE_SIZE(void);
 
 /*
  * Don't just check for any non zero bits in __PAGE_USER, since for book3e

From 96f391cf40ee5c9201cc7b55abe3903761e6a2e2 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Tue, 10 Apr 2018 08:34:37 +0200
Subject: [PATCH 077/251] Revert "powerpc/64: Fix checksum folding in
 csum_add()"

This reverts commit 6ad966d7303b70165228dba1ee8da1a05c10eefe.

That commit was pointless, because csum_add() sums two 32 bits
values, so the sum is 0x1fffffffe at the maximum.
And then when adding upper part (1) and lower part (0xfffffffe),
the result is 0xffffffff which doesn't carry.
Any lower value will not carry either.

And behind the fact that this commit is useless, it also kills the
whole purpose of having an arch specific inline csum_add()
because the resulting code gets even worse than what is obtained
with the generic implementation of csum_add()

0000000000000240 <.csum_add>:
 240:	38 00 ff ff 	li      r0,-1
 244:	7c 84 1a 14 	add     r4,r4,r3
 248:	78 00 00 20 	clrldi  r0,r0,32
 24c:	78 89 00 22 	rldicl  r9,r4,32,32
 250:	7c 80 00 38 	and     r0,r4,r0
 254:	7c 09 02 14 	add     r0,r9,r0
 258:	78 09 00 22 	rldicl  r9,r0,32,32
 25c:	7c 00 4a 14 	add     r0,r0,r9
 260:	78 03 00 20 	clrldi  r3,r0,32
 264:	4e 80 00 20 	blr

In comparison, the generic implementation of csum_add() gives:

0000000000000290 <.csum_add>:
 290:	7c 63 22 14 	add     r3,r3,r4
 294:	7f 83 20 40 	cmplw   cr7,r3,r4
 298:	7c 10 10 26 	mfocrf  r0,1
 29c:	54 00 ef fe 	rlwinm  r0,r0,29,31,31
 2a0:	7c 60 1a 14 	add     r3,r0,r3
 2a4:	78 63 00 20 	clrldi  r3,r3,32
 2a8:	4e 80 00 20 	blr

And the reverted implementation for PPC64 gives:

0000000000000240 <.csum_add>:
 240:	7c 84 1a 14 	add     r4,r4,r3
 244:	78 80 00 22 	rldicl  r0,r4,32,32
 248:	7c 80 22 14 	add     r4,r0,r4
 24c:	78 83 00 20 	clrldi  r3,r4,32
 250:	4e 80 00 20 	blr

Fixes: 6ad966d7303b7 ("powerpc/64: Fix checksum folding in csum_add()")
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Acked-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/checksum.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index 842124b199b58..4e63787dc3bec 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -112,7 +112,7 @@ static inline __wsum csum_add(__wsum csum, __wsum addend)
 
 #ifdef __powerpc64__
 	res += (__force u64)addend;
-	return (__force __wsum) from64to32(res);
+	return (__force __wsum)((u32)res + (res >> 32));
 #else
 	asm("addc %0,%0,%1;"
 	    "addze %0,%0;"

From 24c78586cc6798028205e12c34febf0337bcbded Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Tue, 17 Apr 2018 19:08:16 +0200
Subject: [PATCH 078/251] powerpc: Avoid an unnecessary test and branch in
 longjmp()

Doing the test at exit of the function avoids an unnecessary
test and branch inside longjmp().

Semantics are unchanged.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/misc.S | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 384357cb8bc00..c60b70da1e4be 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -85,10 +85,7 @@ _GLOBAL(setjmp)
 	blr
 
 _GLOBAL(longjmp)
-	PPC_LCMPI r4,0
-	bne	1f
-	li	r4,1
-1:	PPC_LL	r13,4*SZL(r3)
+	PPC_LL	r13,4*SZL(r3)
 	PPC_LL	r14,5*SZL(r3)
 	PPC_LL	r15,6*SZL(r3)
 	PPC_LL	r16,7*SZL(r3)
@@ -113,7 +110,9 @@ _GLOBAL(longjmp)
 	PPC_LL	r1,SZL(r3)
 	PPC_LL	r2,2*SZL(r3)
 	mtlr	r0
-	mr	r3,r4
+	mr.	r3, r4
+	bnelr
+	li	r3, 1
 	blr
 
 _GLOBAL(current_stack_pointer)

From a1f3ae3fe8a1883c339f1bc89d1c941b3809e084 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Tue, 17 Apr 2018 19:08:18 +0200
Subject: [PATCH 079/251] powerpc/32: Use stmw/lmw for registers save/restore
 in asm

arch/powerpc/Makefile activates -mmultiple on BE PPC32 configs
in order to use multiple word instructions in functions entry/exit.

The patch does the same for the asm parts, for consistency.

On processors like the 8xx on which insn fetching is pretty slow,
this speeds up registers save/restore.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
[mpe: PPC32 is BE only, so drop the endian checks]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/ppc_asm.h  |  6 ++----
 arch/powerpc/kernel/misc.S          | 10 ++++++++++
 arch/powerpc/kernel/ppc_save_regs.S |  4 ++++
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 13f7f4c0e1eae..75ece56dcd625 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -80,10 +80,8 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
 #else
 #define SAVE_GPR(n, base)	stw	n,GPR0+4*(n)(base)
 #define REST_GPR(n, base)	lwz	n,GPR0+4*(n)(base)
-#define SAVE_NVGPRS(base)	SAVE_GPR(13, base); SAVE_8GPRS(14, base); \
-				SAVE_10GPRS(22, base)
-#define REST_NVGPRS(base)	REST_GPR(13, base); REST_8GPRS(14, base); \
-				REST_10GPRS(22, base)
+#define SAVE_NVGPRS(base)	stmw	13, GPR0+4*13(base)
+#define REST_NVGPRS(base)	lmw	13, GPR0+4*13(base)
 #endif
 
 #define SAVE_2GPRS(n, base)	SAVE_GPR(n, base); SAVE_GPR(n+1, base)
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index c60b70da1e4be..2f18fc1368d0a 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -60,6 +60,10 @@ _GLOBAL(setjmp)
 	PPC_STL	r0,0(r3)
 	PPC_STL	r1,SZL(r3)
 	PPC_STL	r2,2*SZL(r3)
+#ifdef CONFIG_PPC32
+	mfcr	r12
+	stmw	r12, 3*SZL(r3)
+#else
 	mfcr	r0
 	PPC_STL	r0,3*SZL(r3)
 	PPC_STL	r13,4*SZL(r3)
@@ -81,10 +85,15 @@ _GLOBAL(setjmp)
 	PPC_STL	r29,20*SZL(r3)
 	PPC_STL	r30,21*SZL(r3)
 	PPC_STL	r31,22*SZL(r3)
+#endif
 	li	r3,0
 	blr
 
 _GLOBAL(longjmp)
+#ifdef CONFIG_PPC32
+	lmw	r12, 3*SZL(r3)
+	mtcrf	0x38, r12
+#else
 	PPC_LL	r13,4*SZL(r3)
 	PPC_LL	r14,5*SZL(r3)
 	PPC_LL	r15,6*SZL(r3)
@@ -106,6 +115,7 @@ _GLOBAL(longjmp)
 	PPC_LL	r31,22*SZL(r3)
 	PPC_LL	r0,3*SZL(r3)
 	mtcrf	0x38,r0
+#endif
 	PPC_LL	r0,0(r3)
 	PPC_LL	r1,SZL(r3)
 	PPC_LL	r2,2*SZL(r3)
diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S
index 1b1787d528965..8afbe213d729b 100644
--- a/arch/powerpc/kernel/ppc_save_regs.S
+++ b/arch/powerpc/kernel/ppc_save_regs.S
@@ -25,6 +25,9 @@
  */
 _GLOBAL(ppc_save_regs)
 	PPC_STL	r0,0*SZL(r3)
+#ifdef CONFIG_PPC32
+	stmw	r2, 2*SZL(r3)
+#else
 	PPC_STL	r2,2*SZL(r3)
 	PPC_STL	r3,3*SZL(r3)
 	PPC_STL	r4,4*SZL(r3)
@@ -55,6 +58,7 @@ _GLOBAL(ppc_save_regs)
 	PPC_STL	r29,29*SZL(r3)
 	PPC_STL	r30,30*SZL(r3)
 	PPC_STL	r31,31*SZL(r3)
+#endif
 	/* go up one stack frame for SP */
 	PPC_LL	r4,0(r1)
 	PPC_STL	r4,1*SZL(r3)

From c0beffc4f4c658fde86d52c837e784326e9cc875 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 15 May 2018 01:59:46 +1000
Subject: [PATCH 080/251] powerpc/powernv: Fix opal_event_shutdown() called
 with interrupts disabled

A kernel crash in process context that calls emergency_restart from
panic will end up calling opal_event_shutdown with interrupts disabled
but not in interrupt. This causes a sleeping function to be called
which gives the following warning with sysrq+c:

    Rebooting in 10 seconds..
    BUG: sleeping function called from invalid context at kernel/locking/mutex.c:238
    in_atomic(): 0, irqs_disabled(): 1, pid: 7669, name: bash
    CPU: 20 PID: 7669 Comm: bash Tainted: G      D W         4.17.0-rc5+ #3
    Call Trace:
    dump_stack+0xb0/0xf4 (unreliable)
    ___might_sleep+0x174/0x1a0
    mutex_lock+0x38/0xb0
    __free_irq+0x68/0x460
    free_irq+0x70/0xc0
    opal_event_shutdown+0xb4/0xf0
    opal_shutdown+0x24/0xa0
    pnv_shutdown+0x28/0x40
    machine_shutdown+0x44/0x60
    machine_restart+0x28/0x80
    emergency_restart+0x30/0x50
    panic+0x2a0/0x328
    oops_end+0x1ec/0x1f0
    bad_page_fault+0xe8/0x154
    handle_page_fault+0x34/0x38
    --- interrupt: 300 at sysrq_handle_crash+0x44/0x60
    LR = __handle_sysrq+0xfc/0x260
    flag_spec.62335+0x12b844/0x1e8db4 (unreliable)
    __handle_sysrq+0xfc/0x260
    write_sysrq_trigger+0xa8/0xb0
    proc_reg_write+0xac/0x110
    __vfs_write+0x6c/0x240
    vfs_write+0xd0/0x240
    ksys_write+0x6c/0x110

Fixes: 9f0fd0499d30 ("powerpc/powernv: Add a virtual irqchip for opal events")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/opal-irqchip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
index 9d1b8c0aaf93b..05ffe05f0fdc7 100644
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -177,7 +177,7 @@ void opal_event_shutdown(void)
 		if (!opal_irqs[i])
 			continue;
 
-		if (in_interrupt())
+		if (in_interrupt() || irqs_disabled())
 			disable_irq_nosync(opal_irqs[i]);
 		else
 			free_irq(opal_irqs[i], NULL);

From 20acf7fc9409e48cfbfb38262aa53466dfbc22bb Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Fri, 18 May 2018 11:18:33 +1000
Subject: [PATCH 081/251] powerpc/lib: Fix "integer constant is too large"
 build failure

My powerpc-linux-gnu-gcc v4.4.5 compiler can't build a 32-bit kernel
any more:

arch/powerpc/lib/sstep.c: In function 'do_popcnt':
arch/powerpc/lib/sstep.c:1068: error: integer constant is too large for 'long' type
arch/powerpc/lib/sstep.c:1069: error: integer constant is too large for 'long' type
arch/powerpc/lib/sstep.c:1069: error: integer constant is too large for 'long' type
arch/powerpc/lib/sstep.c:1070: error: integer constant is too large for 'long' type
arch/powerpc/lib/sstep.c:1079: error: integer constant is too large for 'long' type
arch/powerpc/lib/sstep.c: In function 'do_prty':
arch/powerpc/lib/sstep.c:1117: error: integer constant is too large for 'long' type

This file gets compiled with -std=gnu89 which means a constant can be
given the type 'long' even if it won't fit. Fix the errors with a 'ULL'
suffix on the relevant constants.

Fixes: 2c979c489fee ("powerpc/lib/sstep: Add prty instruction emulation")
Fixes: dcbd19b48d31 ("powerpc/lib/sstep: Add popcnt instruction emulation")
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/lib/sstep.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 34d68f1b1b405..49427a3ee1045 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1065,9 +1065,10 @@ static nokprobe_inline void do_popcnt(const struct pt_regs *regs,
 {
 	unsigned long long out = v1;
 
-	out -= (out >> 1) & 0x5555555555555555;
-	out = (0x3333333333333333 & out) + (0x3333333333333333 & (out >> 2));
-	out = (out + (out >> 4)) & 0x0f0f0f0f0f0f0f0f;
+	out -= (out >> 1) & 0x5555555555555555ULL;
+	out = (0x3333333333333333ULL & out) +
+	      (0x3333333333333333ULL & (out >> 2));
+	out = (out + (out >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
 
 	if (size == 8) {	/* popcntb */
 		op->val = out;
@@ -1076,7 +1077,7 @@ static nokprobe_inline void do_popcnt(const struct pt_regs *regs,
 	out += out >> 8;
 	out += out >> 16;
 	if (size == 32) {	/* popcntw */
-		op->val = out & 0x0000003f0000003f;
+		op->val = out & 0x0000003f0000003fULL;
 		return;
 	}
 
@@ -1114,7 +1115,7 @@ static nokprobe_inline void do_prty(const struct pt_regs *regs,
 
 	res ^= res >> 16;
 	if (size == 32) {		/* prtyw */
-		op->val = res & 0x0000000100000001;
+		op->val = res & 0x0000000100000001ULL;
 		return;
 	}
 

From ba01b058a52abcb0539d94ae976ef1c357e06730 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 18 May 2018 10:31:17 +0100
Subject: [PATCH 082/251] powerpc/rtas: Fix spelling mistake "Discharching" ->
 "Discharging"

Trivial fix to spelling mistake in battery_charging array.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/rtas-proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index fb070d8cad07d..11ef978e95dbf 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -528,7 +528,7 @@ static void ppc_rtas_process_sensor(struct seq_file *m,
 		"EPOW power off" };
 	const char * battery_cyclestate[]  = { "None", "In progress", 
 						"Requested" };
-	const char * battery_charging[]    = { "Charging", "Discharching", 
+	const char * battery_charging[]    = { "Charging", "Discharging",
 						"No current flow" };
 	const char * ibm_drconnector[]     = { "Empty", "Present", "Unusable", 
 						"Exchange" };

From d2032678e57fc508d7878307badde8f89b632ba3 Mon Sep 17 00:00:00 2001
From: Anju T Sudhakar <anju@linux.vnet.ibm.com>
Date: Wed, 16 May 2018 12:05:18 +0530
Subject: [PATCH 083/251] powerpc/perf: Fix memory allocation for core-imc
 based on num_possible_cpus()

Currently memory is allocated for core-imc based on cpu_present_mask,
which has bit 'cpu' set iff cpu is populated. We use (cpu number / threads
per core) as the array index to access the memory.

Under some circumstances firmware marks a CPU as GUARDed CPU and boot the
system, until cleared of errors, these CPU's are unavailable for all
subsequent boots. GUARDed CPUs are possible but not present from linux
view, so it blows a hole when we assume the max length of our allocation
is driven by our max present cpus, where as one of the cpus might be online
and be beyond the max present cpus, due to the hole.
So (cpu number / threads per core) value bounds the array index and leads
to memory overflow.

Call trace observed during a guard test:

Faulting instruction address: 0xc000000000149f1c
cpu 0x69: Vector: 380 (Data Access Out of Range) at [c000003fea303420]
    pc:c000000000149f1c: prefetch_freepointer+0x14/0x30
    lr:c00000000014e0f8: __kmalloc+0x1a8/0x1ac
    sp:c000003fea3036a0
   msr:9000000000009033
   dar:c9c54b2c91dbf6b7
  current = 0xc000003fea2c0000
  paca    = 0xc00000000fddd880	 softe: 3	 irq_happened: 0x01
    pid   = 1, comm = swapper/104
Linux version 4.16.7-openpower1 (smc@smc-desktop) (gcc version 6.4.0
(Buildroot 2018.02.1-00006-ga8d1126)) #2 SMP Fri May 4 16:44:54 PDT 2018
enter ? for help
call trace:
	 __kmalloc+0x1a8/0x1ac
	 (unreliable)
	 init_imc_pmu+0x7f4/0xbf0
	 opal_imc_counters_probe+0x3fc/0x43c
	 platform_drv_probe+0x48/0x80
	 driver_probe_device+0x22c/0x308
	 __driver_attach+0xa0/0xd8
	 bus_for_each_dev+0x88/0xb4
	 driver_attach+0x2c/0x40
	 bus_add_driver+0x1e8/0x228
	 driver_register+0xd0/0x114
	 __platform_driver_register+0x50/0x64
	 opal_imc_driver_init+0x24/0x38
	 do_one_initcall+0x150/0x15c
	 kernel_init_freeable+0x250/0x254
	 kernel_init+0x1c/0x150
	 ret_from_kernel_thread+0x5c/0xc8

Allocating memory for core-imc based on cpu_possible_mask, which has
bit 'cpu' set iff cpu is populatable, will fix this issue.

Reported-by: Pridhiviraj Paidipeddi <ppaidipe@linux.vnet.ibm.com>
Signed-off-by: Anju T Sudhakar <anju@linux.vnet.ibm.com>
Reviewed-by: Balbir Singh <bsingharora@gmail.com>
Tested-by: Pridhiviraj Paidipeddi <ppaidipe@linux.vnet.ibm.com>
Fixes: 39a846db1d57 ("powerpc/perf: Add core IMC PMU support")
Cc: stable@vger.kernel.org # v4.14+
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/imc-pmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index d7532e7b9ab5c..75fb23c24ee8d 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -1146,7 +1146,7 @@ static int init_nest_pmu_ref(void)
 
 static void cleanup_all_core_imc_memory(void)
 {
-	int i, nr_cores = DIV_ROUND_UP(num_present_cpus(), threads_per_core);
+	int i, nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
 	struct imc_mem_info *ptr = core_imc_pmu->mem_info;
 	int size = core_imc_pmu->counter_mem_size;
 
@@ -1264,7 +1264,7 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
 		if (!pmu_ptr->pmu.name)
 			return -ENOMEM;
 
-		nr_cores = DIV_ROUND_UP(num_present_cpus(), threads_per_core);
+		nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
 		pmu_ptr->mem_info = kcalloc(nr_cores, sizeof(struct imc_mem_info),
 								GFP_KERNEL);
 

From 8056fe28d04607106e7d418bd9ee2e63562def50 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 14 May 2018 22:50:31 +1000
Subject: [PATCH 084/251] powerpc/io: Add __raw_writeq_be()
 __raw_rm_writeq_be()

Add byte-swapping versions of __raw_writeq() and __raw_rm_writeq().

This allows us to avoid sparse warnings caused by passing __be64 to
__raw_writeq(), which takes unsigned long:

  arch/powerpc/platforms/powernv/pci-ioda.c:1981:38:
  warning: incorrect type in argument 1 (different base types)
      expected unsigned long [unsigned] v
      got restricted __be64 [usertype] <noident>

It's also generally preferable to use a byte-swapping accessor rather
than doing it by hand in the code, which is more bug prone.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Samuel Mendoza-Jonas <sam@mendozajonas.com>
---
 arch/powerpc/include/asm/io.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index af074923d5989..e0331e7545685 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -367,6 +367,11 @@ static inline void __raw_writeq(unsigned long v, volatile void __iomem *addr)
 	*(volatile unsigned long __force *)PCI_FIX_ADDR(addr) = v;
 }
 
+static inline void __raw_writeq_be(unsigned long v, volatile void __iomem *addr)
+{
+	__raw_writeq((__force unsigned long)cpu_to_be64(v), addr);
+}
+
 /*
  * Real mode versions of the above. Those instructions are only supposed
  * to be used in hypervisor real mode as per the architecture spec.
@@ -395,6 +400,11 @@ static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr)
 		: : "r" (val), "r" (paddr) : "memory");
 }
 
+static inline void __raw_rm_writeq_be(u64 val, volatile void __iomem *paddr)
+{
+	__raw_rm_writeq((__force u64)cpu_to_be64(val), paddr);
+}
+
 static inline u8 __raw_rm_readb(volatile void __iomem *paddr)
 {
 	u8 ret;

From 001ff2ee137de5fb7a099a36f7b29488114964fb Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 14 May 2018 22:50:32 +1000
Subject: [PATCH 085/251] powerpc/powernv: Use __raw_[rm_]writeq_be() in
 pci-ioda.c

This allows us to squash some sparse warnings and also avoids having
to do explicity endian conversions in the code.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Samuel Mendoza-Jonas <sam@mendozajonas.com>
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 9731098005726..1b02f86dfec10 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1976,9 +1976,10 @@ static void pnv_pci_p7ioc_tce_invalidate(struct iommu_table *tbl,
         mb(); /* Ensure above stores are visible */
         while (start <= end) {
 		if (rm)
-			__raw_rm_writeq(cpu_to_be64(start), invalidate);
+			__raw_rm_writeq_be(start, invalidate);
 		else
-			__raw_writeq(cpu_to_be64(start), invalidate);
+			__raw_writeq_be(start, invalidate);
+
                 start += inc;
         }
 
@@ -2055,9 +2056,9 @@ static void pnv_pci_phb3_tce_invalidate_entire(struct pnv_phb *phb, bool rm)
 
 	mb(); /* Ensure previous TCE table stores are visible */
 	if (rm)
-		__raw_rm_writeq(cpu_to_be64(val), invalidate);
+		__raw_rm_writeq_be(val, invalidate);
 	else
-		__raw_writeq(cpu_to_be64(val), invalidate);
+		__raw_writeq_be(val, invalidate);
 }
 
 static inline void pnv_pci_phb3_tce_invalidate_pe(struct pnv_ioda_pe *pe)
@@ -2067,7 +2068,7 @@ static inline void pnv_pci_phb3_tce_invalidate_pe(struct pnv_ioda_pe *pe)
 	unsigned long val = PHB3_TCE_KILL_INVAL_PE | (pe->pe_number & 0xFF);
 
 	mb(); /* Ensure above stores are visible */
-	__raw_writeq(cpu_to_be64(val), invalidate);
+	__raw_writeq_be(val, invalidate);
 }
 
 static void pnv_pci_phb3_tce_invalidate(struct pnv_ioda_pe *pe, bool rm,
@@ -2090,9 +2091,9 @@ static void pnv_pci_phb3_tce_invalidate(struct pnv_ioda_pe *pe, bool rm,
 
 	while (start <= end) {
 		if (rm)
-			__raw_rm_writeq(cpu_to_be64(start), invalidate);
+			__raw_rm_writeq_be(start, invalidate);
 		else
-			__raw_writeq(cpu_to_be64(start), invalidate);
+			__raw_writeq_be(start, invalidate);
 		start += inc;
 	}
 }

From c786cf767b3c327cc095c36617059c932c2f43bd Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 14 May 2018 22:50:33 +1000
Subject: [PATCH 086/251] powerpc/powernv: Use __raw_[rm_]writeq_be() in
 npu-dma.c

This allows us to squash some sparse warnings and also avoids having
to do explicity endian conversions in the code.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Samuel Mendoza-Jonas <sam@mendozajonas.com>
---
 arch/powerpc/platforms/powernv/npu-dma.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 525e966dce341..8cdf91f5d3a4e 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -459,10 +459,9 @@ static void mmio_launch_invalidate(struct mmio_atsd_reg *mmio_atsd_reg,
 	struct npu *npu = mmio_atsd_reg->npu;
 	int reg = mmio_atsd_reg->reg;
 
-	__raw_writeq(cpu_to_be64(va),
-		npu->mmio_atsd_regs[reg] + XTS_ATSD_AVA);
+	__raw_writeq_be(va, npu->mmio_atsd_regs[reg] + XTS_ATSD_AVA);
 	eieio();
-	__raw_writeq(cpu_to_be64(launch), npu->mmio_atsd_regs[reg]);
+	__raw_writeq_be(launch, npu->mmio_atsd_regs[reg]);
 }
 
 static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],

From cd6ef7eebf171bfcba7dc2df719c2a4958775040 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Thu, 17 May 2018 15:37:14 +1000
Subject: [PATCH 087/251] powerpc/ptrace: Fix enforcement of DAWR constraints

Back when we first introduced the DAWR, in commit 4ae7ebe9522a
("powerpc: Change hardware breakpoint to allow longer ranges"), we
screwed up the constraint making it a 1024 byte boundary rather than a
512. This makes the check overly permissive. Fortunately GDB is the
only real user and it always did they right thing, so we never
noticed.

This fixes the constraint to 512 bytes.

Fixes: 4ae7ebe9522a ("powerpc: Change hardware breakpoint to allow longer ranges")
Cc: stable@vger.kernel.org # v3.9+
Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/hw_breakpoint.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 4c1012b80d3bb..80547dad37dae 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -178,8 +178,8 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 	if (cpu_has_feature(CPU_FTR_DAWR)) {
 		length_max = 512 ; /* 64 doublewords */
 		/* DAWR region can't cross 512 boundary */
-		if ((bp->attr.bp_addr >> 10) != 
-		    ((bp->attr.bp_addr + bp->attr.bp_len - 1) >> 10))
+		if ((bp->attr.bp_addr >> 9) !=
+		    ((bp->attr.bp_addr + bp->attr.bp_len - 1) >> 9))
 			return -EINVAL;
 	}
 	if (info->len >

From 4f7c06e26ec9cf7fe9f0c54dc90079b6a4f4b2c3 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Thu, 17 May 2018 15:37:15 +1000
Subject: [PATCH 088/251] powerpc/ptrace: Fix setting 512B aligned breakpoints
 with PTRACE_SET_DEBUGREG

In commit e2a800beaca1 ("powerpc/hw_brk: Fix off by one error when
validating DAWR region end") we fixed setting the DAWR end point to
its max value via PPC_PTRACE_SETHWDEBUG. Unfortunately we broke
PTRACE_SET_DEBUGREG when setting a 512 byte aligned breakpoint.

PTRACE_SET_DEBUGREG currently sets the length of the breakpoint to
zero (memset() in hw_breakpoint_init()). This worked with
arch_validate_hwbkpt_settings() before the above patch was applied but
is now broken if the breakpoint is 512byte aligned.

This sets the length of the breakpoint to 8 bytes when using
PTRACE_SET_DEBUGREG.

Fixes: e2a800beaca1 ("powerpc/hw_brk: Fix off by one error when validating DAWR region end")
Cc: stable@vger.kernel.org # v3.11+
Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/ptrace.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index d23cf632edf06..0f63dd5972e9a 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -2443,6 +2443,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 	/* Create a new breakpoint request if one doesn't exist already */
 	hw_breakpoint_init(&attr);
 	attr.bp_addr = hw_brk.address;
+	attr.bp_len = 8;
 	arch_bp_generic_fields(hw_brk.type,
 			       &attr.bp_type);
 

From e4f2419fac381c5a7659834169dbe79659da2d0d Mon Sep 17 00:00:00 2001
From: Peter Rosin <peda@axentia.se>
Date: Thu, 3 Aug 2017 14:59:34 +0200
Subject: [PATCH 089/251] powerpc/fsl/dts: fix the i2c-mux compatible for
 t104xqds

The sanctioned compatible is "nxp,pca9547".

Signed-off-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/boot/dts/fsl/t104xqds.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/boot/dts/fsl/t104xqds.dtsi b/arch/powerpc/boot/dts/fsl/t104xqds.dtsi
index 2fd4cbe7098f8..6154797322524 100644
--- a/arch/powerpc/boot/dts/fsl/t104xqds.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t104xqds.dtsi
@@ -269,7 +269,7 @@
 
 		i2c@118000 {
 			pca9547@77 {
-				compatible = "philips,pca9547";
+				compatible = "nxp,pca9547";
 				reg = <0x77>;
 			};
 			rtc@68 {

From 00c946a06ec8414ad22f0e8dcd17187bdd127a72 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Fri, 6 Oct 2017 10:48:57 +1100
Subject: [PATCH 090/251] selftests/powerpc: Remove redundant cp_abort test

Paste on POWER9 only works on accelerators and no longer on real
memory. Hence this test is broken so remove it.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/Makefile      |   1 -
 .../powerpc/context_switch/.gitignore         |   1 -
 .../selftests/powerpc/context_switch/Makefile |   5 -
 .../powerpc/context_switch/cp_abort.c         | 110 ------------------
 4 files changed, 117 deletions(-)
 delete mode 100644 tools/testing/selftests/powerpc/context_switch/.gitignore
 delete mode 100644 tools/testing/selftests/powerpc/context_switch/Makefile
 delete mode 100644 tools/testing/selftests/powerpc/context_switch/cp_abort.c

diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index f6b1338730db5..201b598558b9c 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -17,7 +17,6 @@ SUB_DIRS = alignment		\
 	   benchmarks		\
 	   cache_shape		\
 	   copyloops		\
-	   context_switch	\
 	   dscr			\
 	   mm			\
 	   pmu			\
diff --git a/tools/testing/selftests/powerpc/context_switch/.gitignore b/tools/testing/selftests/powerpc/context_switch/.gitignore
deleted file mode 100644
index c1431af7b51c5..0000000000000
--- a/tools/testing/selftests/powerpc/context_switch/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-cp_abort
diff --git a/tools/testing/selftests/powerpc/context_switch/Makefile b/tools/testing/selftests/powerpc/context_switch/Makefile
deleted file mode 100644
index e9351bb4285d0..0000000000000
--- a/tools/testing/selftests/powerpc/context_switch/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-TEST_GEN_PROGS := cp_abort
-
-include ../../lib.mk
-
-$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/context_switch/cp_abort.c b/tools/testing/selftests/powerpc/context_switch/cp_abort.c
deleted file mode 100644
index 5a5b55afda0e5..0000000000000
--- a/tools/testing/selftests/powerpc/context_switch/cp_abort.c
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Adapted from Anton Blanchard's context switch microbenchmark.
- *
- * Copyright 2009, Anton Blanchard, IBM Corporation.
- * Copyright 2016, Mikey Neuling, Chris Smart, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * This program tests the copy paste abort functionality of a P9
- * (or later) by setting up two processes on the same CPU, one
- * which executes the copy instruction and the other which
- * executes paste.
- *
- * The paste instruction should never succeed, as the cp_abort
- * instruction is called by the kernel during a context switch.
- *
- */
-
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include "utils.h"
-#include <sched.h>
-
-#define READ_FD 0
-#define WRITE_FD 1
-
-#define NUM_LOOPS 1000
-
-/* This defines the "paste" instruction from Power ISA 3.0 Book II, section 4.4. */
-#define PASTE(RA, RB, L, RC) \
-	.long (0x7c00070c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10) | (RC) << (31-31))
-
-int paste(void *i)
-{
-	int cr;
-
-	asm volatile(str(PASTE(0, %1, 1, 1))";"
-			"mfcr %0;"
-			: "=r" (cr)
-			: "b" (i)
-			: "memory"
-		    );
-	return cr;
-}
-
-/* This defines the "copy" instruction from Power ISA 3.0 Book II, section 4.4. */
-#define COPY(RA, RB, L) \
-	.long (0x7c00060c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10))
-
-void copy(void *i)
-{
-	asm volatile(str(COPY(0, %0, 1))";"
-			:
-			: "b" (i)
-			: "memory"
-		    );
-}
-
-int test_cp_abort(void)
-{
-	/* 128 bytes for a full cache line */
-	char buf[128] __cacheline_aligned;
-	cpu_set_t cpuset;
-	int fd1[2], fd2[2], pid;
-	char c;
-
-	/* only run this test on a P9 or later */
-	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
-
-	/*
-	 * Run both processes on the same CPU, so that copy is more likely
-	 * to leak into a paste.
-	 */
-	CPU_ZERO(&cpuset);
-	CPU_SET(pick_online_cpu(), &cpuset);
-	FAIL_IF(sched_setaffinity(0, sizeof(cpuset), &cpuset));
-
-	FAIL_IF(pipe(fd1) || pipe(fd2));
-
-	pid = fork();
-	FAIL_IF(pid < 0);
-
-	if (!pid) {
-		for (int i = 0; i < NUM_LOOPS; i++) {
-			FAIL_IF((write(fd1[WRITE_FD], &c, 1)) != 1);
-			FAIL_IF((read(fd2[READ_FD], &c, 1)) != 1);
-			/* A paste succeeds if CR0 EQ bit is set */
-			FAIL_IF(paste(buf) & 0x20000000);
-		}
-	} else {
-		for (int i = 0; i < NUM_LOOPS; i++) {
-			FAIL_IF((read(fd1[READ_FD], &c, 1)) != 1);
-			copy(buf);
-			FAIL_IF((write(fd2[WRITE_FD], &c, 1) != 1));
-		}
-	}
-	return 0;
-
-}
-
-int main(int argc, char *argv[])
-{
-	return test_harness(test_cp_abort, "cp_abort");
-}

From 5cdcb01e0af5a709c9bebe0e0450dc0c2f11a4d5 Mon Sep 17 00:00:00 2001
From: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Date: Mon, 7 May 2018 15:55:36 +0530
Subject: [PATCH 091/251] powernv: opal-sensor: Add support to read 64bit
 sensor values

This patch adds support to read 64-bit sensor values. This method is
used to read energy sensors and counters which are of type u64.

Signed-off-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/opal-api.h           |  1 +
 arch/powerpc/include/asm/opal.h               |  2 +
 arch/powerpc/platforms/powernv/opal-sensor.c  | 53 +++++++++++++++++++
 .../powerpc/platforms/powernv/opal-wrappers.S |  1 +
 4 files changed, 57 insertions(+)

diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index d886a5b7ff210..f34d173a2ebfe 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -204,6 +204,7 @@
 #define OPAL_NPU_SPA_SETUP			159
 #define OPAL_NPU_SPA_CLEAR_CACHE		160
 #define OPAL_NPU_TL_SET				161
+#define OPAL_SENSOR_READ_U64			162
 #define OPAL_PCI_GET_PBCQ_TUNNEL_BAR		164
 #define OPAL_PCI_SET_PBCQ_TUNNEL_BAR		165
 #define OPAL_LAST				165
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 03e1a920491ed..3960def0e39c4 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -201,6 +201,7 @@ int64_t opal_get_param(uint64_t token, uint32_t param_id, uint64_t buffer,
 int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer,
 		uint64_t length);
 int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data);
+int64_t opal_sensor_read_u64(u32 sensor_hndl, int token, __be64 *sensor_data);
 int64_t opal_handle_hmi(void);
 int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end);
 int64_t opal_unregister_dump_region(uint32_t id);
@@ -323,6 +324,7 @@ extern int opal_async_wait_response(uint64_t token, struct opal_msg *msg);
 extern int opal_async_wait_response_interruptible(uint64_t token,
 		struct opal_msg *msg);
 extern int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data);
+extern int opal_get_sensor_data_u64(u32 sensor_hndl, u64 *sensor_data);
 
 struct rtc_time;
 extern unsigned long opal_get_boot_time(void);
diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c
index 0a7074bb91dca..35a5f4b9aeb59 100644
--- a/arch/powerpc/platforms/powernv/opal-sensor.c
+++ b/arch/powerpc/platforms/powernv/opal-sensor.c
@@ -72,6 +72,59 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data)
 }
 EXPORT_SYMBOL_GPL(opal_get_sensor_data);
 
+int opal_get_sensor_data_u64(u32 sensor_hndl, u64 *sensor_data)
+{
+	int ret, token;
+	struct opal_msg msg;
+	__be64 data;
+
+	if (!opal_check_token(OPAL_SENSOR_READ_U64)) {
+		u32 sdata;
+
+		ret = opal_get_sensor_data(sensor_hndl, &sdata);
+		if (!ret)
+			*sensor_data = sdata;
+		return ret;
+	}
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0)
+		return token;
+
+	ret = opal_sensor_read_u64(sensor_hndl, token, &data);
+	switch (ret) {
+	case OPAL_ASYNC_COMPLETION:
+		ret = opal_async_wait_response(token, &msg);
+		if (ret) {
+			pr_err("%s: Failed to wait for the async response, %d\n",
+			       __func__, ret);
+			goto out_token;
+		}
+
+		ret = opal_error_code(opal_get_async_rc(msg));
+		*sensor_data = be64_to_cpu(data);
+		break;
+
+	case OPAL_SUCCESS:
+		ret = 0;
+		*sensor_data = be64_to_cpu(data);
+		break;
+
+	case OPAL_WRONG_STATE:
+		ret = -EIO;
+		break;
+
+	default:
+		ret = opal_error_code(ret);
+		break;
+	}
+
+out_token:
+	opal_async_release_token(token);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(opal_get_sensor_data_u64);
+
 int __init opal_sensor_init(void)
 {
 	struct platform_device *pdev;
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 3da30c2f26b45..8482df255969c 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -325,3 +325,4 @@ OPAL_CALL(opal_npu_spa_clear_cache,		OPAL_NPU_SPA_CLEAR_CACHE);
 OPAL_CALL(opal_npu_tl_set,			OPAL_NPU_TL_SET);
 OPAL_CALL(opal_pci_get_pbcq_tunnel_bar,		OPAL_PCI_GET_PBCQ_TUNNEL_BAR);
 OPAL_CALL(opal_pci_set_pbcq_tunnel_bar,		OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
+OPAL_CALL(opal_sensor_read_u64,			OPAL_SENSOR_READ_U64);

From 3c8c049aa7bdffaab2e53401fd5270a3acc32631 Mon Sep 17 00:00:00 2001
From: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Date: Mon, 7 May 2018 15:55:37 +0530
Subject: [PATCH 092/251] hwmon: (ibmpowernv): Add support to read 64 bit
 sensors

The firmware has supported for reading sensor values of size u32.
This patch adds support to use newer firmware functions which allows
to read the sensors of size u64.

Signed-off-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/hwmon/ibmpowernv.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/hwmon/ibmpowernv.c b/drivers/hwmon/ibmpowernv.c
index 5ccdd0b526506..74d9b5ae55ec8 100644
--- a/drivers/hwmon/ibmpowernv.c
+++ b/drivers/hwmon/ibmpowernv.c
@@ -101,9 +101,10 @@ static ssize_t show_sensor(struct device *dev, struct device_attribute *devattr,
 	struct sensor_data *sdata = container_of(devattr, struct sensor_data,
 						 dev_attr);
 	ssize_t ret;
-	u32 x;
+	u64 x;
+
+	ret =  opal_get_sensor_data_u64(sdata->id, &x);
 
-	ret = opal_get_sensor_data(sdata->id, &x);
 	if (ret)
 		return ret;
 
@@ -114,7 +115,7 @@ static ssize_t show_sensor(struct device *dev, struct device_attribute *devattr,
 	else if (sdata->type == POWER_INPUT)
 		x *= 1000000;
 
-	return sprintf(buf, "%u\n", x);
+	return sprintf(buf, "%llu\n", x);
 }
 
 static ssize_t show_label(struct device *dev, struct device_attribute *devattr,

From 43d2974b66d916a6df16e536da542e3a65aab7b9 Mon Sep 17 00:00:00 2001
From: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Date: Mon, 7 May 2018 15:55:38 +0530
Subject: [PATCH 093/251] hwmon: (ibmpowernv) Add energy sensors

This patch exports the accumulated power numbers of each power
sensor maintained by OCC.

Signed-off-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/hwmon/ibmpowernv.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/hwmon/ibmpowernv.c b/drivers/hwmon/ibmpowernv.c
index 74d9b5ae55ec8..0298745d46e40 100644
--- a/drivers/hwmon/ibmpowernv.c
+++ b/drivers/hwmon/ibmpowernv.c
@@ -51,6 +51,7 @@ enum sensors {
 	POWER_SUPPLY,
 	POWER_INPUT,
 	CURRENT,
+	ENERGY,
 	MAX_SENSOR_TYPE,
 };
 
@@ -78,6 +79,7 @@ static struct sensor_group {
 	{ "in"    },
 	{ "power" },
 	{ "curr"  },
+	{ "energy" },
 };
 
 struct sensor_data {

From d1c7211281c5e1799f00b2228157530e0f7a671c Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:01:44 +0800
Subject: [PATCH 094/251] powerpc: Export msr_check_and_set() to modules

PR KVM will need to reuse msr_check_and_set().
This patch exports this API for reuse.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Reviewed-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/process.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 1237f13fed518..25db000fa5b33 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -154,6 +154,7 @@ unsigned long msr_check_and_set(unsigned long bits)
 
 	return newmsr;
 }
+EXPORT_SYMBOL_GPL(msr_check_and_set);
 
 void __msr_check_and_clear(unsigned long bits)
 {

From ab3759b5734544dd1430527c3d89730990cfa4bb Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:01:45 +0800
Subject: [PATCH 095/251] powerpc/reg: Add TEXASR related macros

This patches add some macros for CR0/TEXASR bits so that PR KVM TM
logic (tbegin./treclaim./tabort.) can make use of them later.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Reviewed-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/reg.h              | 32 +++++++++++++++++----
 arch/powerpc/platforms/powernv/copy-paste.h |  3 +-
 2 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index cb0f272ce1235..75c5b2cd9d667 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -146,6 +146,12 @@
 #define MSR_64BIT	0
 #endif
 
+/* Condition Register related */
+#define CR0_SHIFT	28
+#define CR0_MASK	0xF
+#define CR0_TBEGIN_FAILURE	(0x2 << 28) /* 0b0010 */
+
+
 /* Power Management - Processor Stop Status and Control Register Fields */
 #define PSSCR_RL_MASK		0x0000000F /* Requested Level */
 #define PSSCR_MTL_MASK		0x000000F0 /* Maximum Transition Level */
@@ -239,13 +245,27 @@
 #define SPRN_TFIAR	0x81	/* Transaction Failure Inst Addr   */
 #define SPRN_TEXASR	0x82	/* Transaction EXception & Summary */
 #define SPRN_TEXASRU	0x83	/* ''	   ''	   ''	 Upper 32  */
-#define   TEXASR_ABORT	__MASK(63-31) /* terminated by tabort or treclaim */
-#define   TEXASR_SUSP	__MASK(63-32) /* tx failed in suspended state */
-#define   TEXASR_HV	__MASK(63-34) /* MSR[HV] when failure occurred */
-#define   TEXASR_PR	__MASK(63-35) /* MSR[PR] when failure occurred */
-#define   TEXASR_FS	__MASK(63-36) /* TEXASR Failure Summary */
-#define   TEXASR_EXACT	__MASK(63-37) /* TFIAR value is exact */
+
+#define TEXASR_FC_LG	(63 - 7)	/* Failure Code */
+#define TEXASR_AB_LG	(63 - 31)	/* Abort */
+#define TEXASR_SU_LG	(63 - 32)	/* Suspend */
+#define TEXASR_HV_LG	(63 - 34)	/* Hypervisor state*/
+#define TEXASR_PR_LG	(63 - 35)	/* Privilege level */
+#define TEXASR_FS_LG	(63 - 36)	/* failure summary */
+#define TEXASR_EX_LG	(63 - 37)	/* TFIAR exact bit */
+#define TEXASR_ROT_LG	(63 - 38)	/* ROT bit */
+
+#define   TEXASR_ABORT	__MASK(TEXASR_AB_LG) /* terminated by tabort or treclaim */
+#define   TEXASR_SUSP	__MASK(TEXASR_SU_LG) /* tx failed in suspended state */
+#define   TEXASR_HV	__MASK(TEXASR_HV_LG) /* MSR[HV] when failure occurred */
+#define   TEXASR_PR	__MASK(TEXASR_PR_LG) /* MSR[PR] when failure occurred */
+#define   TEXASR_FS	__MASK(TEXASR_FS_LG) /* TEXASR Failure Summary */
+#define   TEXASR_EXACT	__MASK(TEXASR_EX_LG) /* TFIAR value is exact */
+#define   TEXASR_ROT	__MASK(TEXASR_ROT_LG)
+#define   TEXASR_FC	(ASM_CONST(0xFF) << TEXASR_FC_LG)
+
 #define SPRN_TFHAR	0x80	/* Transaction Failure Handler Addr */
+
 #define SPRN_TIDR	144	/* Thread ID register */
 #define SPRN_CTRLF	0x088
 #define SPRN_CTRLT	0x098
diff --git a/arch/powerpc/platforms/powernv/copy-paste.h b/arch/powerpc/platforms/powernv/copy-paste.h
index c9a5036234310..3fa62de96d9ca 100644
--- a/arch/powerpc/platforms/powernv/copy-paste.h
+++ b/arch/powerpc/platforms/powernv/copy-paste.h
@@ -7,9 +7,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <asm/ppc-opcode.h>
+#include <asm/reg.h>
 
-#define CR0_SHIFT	28
-#define CR0_MASK	0xF
 /*
  * Copy/paste instructions:
  *

From eacbb218fbbab5923775059f7232a9622dc47b2a Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:01:46 +0800
Subject: [PATCH 096/251] powerpc: Export tm_enable()/tm_disable/tm_abort()
 APIs

This patch exports tm_enable()/tm_disable/tm_abort() APIs, which
will be used for PR KVM transactional memory logic.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Reviewed-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/asm-prototypes.h |  3 +++
 arch/powerpc/include/asm/tm.h             |  2 --
 arch/powerpc/kernel/tm.S                  | 12 ++++++++++++
 arch/powerpc/mm/hash_utils_64.c           |  1 +
 4 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index d9713ad62e3ca..dfdcb2374c284 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -141,4 +141,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip);
 void pnv_power9_force_smt4_catch(void);
 void pnv_power9_force_smt4_release(void);
 
+void tm_enable(void);
+void tm_disable(void);
+void tm_abort(uint8_t cause);
 #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
diff --git a/arch/powerpc/include/asm/tm.h b/arch/powerpc/include/asm/tm.h
index b1658c97047cb..e94f6db5e367b 100644
--- a/arch/powerpc/include/asm/tm.h
+++ b/arch/powerpc/include/asm/tm.h
@@ -10,12 +10,10 @@
 
 #ifndef __ASSEMBLY__
 
-extern void tm_enable(void);
 extern void tm_reclaim(struct thread_struct *thread,
 		       uint8_t cause);
 extern void tm_reclaim_current(uint8_t cause);
 extern void tm_recheckpoint(struct thread_struct *thread);
-extern void tm_abort(uint8_t cause);
 extern void tm_save_sprs(struct thread_struct *thread);
 extern void tm_restore_sprs(struct thread_struct *thread);
 
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index b92ac8e711db0..ff12f47a96b61 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -12,6 +12,7 @@
 #include <asm/ptrace.h>
 #include <asm/reg.h>
 #include <asm/bug.h>
+#include <asm/export.h>
 
 #ifdef CONFIG_VSX
 /* See fpu.S, this is borrowed from there */
@@ -55,6 +56,16 @@ _GLOBAL(tm_enable)
 	or	r4, r4, r3
 	mtmsrd	r4
 1:	blr
+EXPORT_SYMBOL_GPL(tm_enable);
+
+_GLOBAL(tm_disable)
+	mfmsr	r4
+	li	r3, MSR_TM >> 32
+	sldi	r3, r3, 32
+	andc	r4, r4, r3
+	mtmsrd	r4
+	blr
+EXPORT_SYMBOL_GPL(tm_disable);
 
 _GLOBAL(tm_save_sprs)
 	mfspr	r0, SPRN_TFHAR
@@ -78,6 +89,7 @@ _GLOBAL(tm_restore_sprs)
 _GLOBAL(tm_abort)
 	TABORT(R3)
 	blr
+EXPORT_SYMBOL_GPL(tm_abort);
 
 /* void tm_reclaim(struct thread_struct *thread,
  *		   uint8_t cause)
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 0bd3790d35df4..1bd8b4c1aab89 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -64,6 +64,7 @@
 #include <asm/trace.h>
 #include <asm/ps3.h>
 #include <asm/pte-walk.h>
+#include <asm/asm-prototypes.h>
 
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)

From 8a0b1120cb25ccd4480ba4fe3650bc22b0dfadf4 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Wed, 23 May 2018 09:04:04 +0200
Subject: [PATCH 097/251] powerpc/mm: Use instruction symbolic names in
 store_updates_sp()

Use symbolic names defined in asm/ppc-opcode.h
instead of hardcoded values.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/ppc-opcode.h |  1 +
 arch/powerpc/mm/fault.c               | 26 +++++++++++++-------------
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 18883b8a6dace..4436887bc415b 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -162,6 +162,7 @@
 /* VMX Vector Store Instructions */
 #define OP_31_XOP_STVX          231
 
+#define OP_31   31
 #define OP_LWZ  32
 #define OP_STFS 52
 #define OP_STFSU 53
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index c01d627e687ae..0c99f9b45e8f5 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -80,23 +80,23 @@ static bool store_updates_sp(struct pt_regs *regs)
 		return false;
 	/* check major opcode */
 	switch (inst >> 26) {
-	case 37:	/* stwu */
-	case 39:	/* stbu */
-	case 45:	/* sthu */
-	case 53:	/* stfsu */
-	case 55:	/* stfdu */
+	case OP_STWU:
+	case OP_STBU:
+	case OP_STHU:
+	case OP_STFSU:
+	case OP_STFDU:
 		return true;
-	case 62:	/* std or stdu */
+	case OP_STD:	/* std or stdu */
 		return (inst & 3) == 1;
-	case 31:
+	case OP_31:
 		/* check minor opcode */
 		switch ((inst >> 1) & 0x3ff) {
-		case 181:	/* stdux */
-		case 183:	/* stwux */
-		case 247:	/* stbux */
-		case 439:	/* sthux */
-		case 695:	/* stfsux */
-		case 759:	/* stfdux */
+		case OP_31_XOP_STDUX:
+		case OP_31_XOP_STWUX:
+		case OP_31_XOP_STBUX:
+		case OP_31_XOP_STHUX:
+		case OP_31_XOP_STFSUX:
+		case OP_31_XOP_STFDUX:
 			return true;
 		}
 	}

From 0e36b0d12501e278686634712975b785bae11641 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Wed, 23 May 2018 10:53:22 +0200
Subject: [PATCH 098/251] powerpc/mm: Only read faulting instruction when
 necessary in do_page_fault()

Commit a7a9dcd882a67 ("powerpc: Avoid taking a data miss on every
userspace instruction miss") has shown that limiting the read of
faulting instruction to likely cases improves performance.

This patch goes further into this direction by limiting the read
of the faulting instruction to the only cases where it is likely
needed.

On an MPC885, with the same benchmark app as in the commit referred
above, we see a reduction of about 3900 dTLB misses (approx 3%):

Before the patch:
 Performance counter stats for './fault 500' (10 runs):

         683033312      cpu-cycles                                                    ( +-  0.03% )
            134538      dTLB-load-misses                                              ( +-  0.03% )
             46099      iTLB-load-misses                                              ( +-  0.02% )
             19681      faults                                                        ( +-  0.02% )

       5.389747878 seconds time elapsed                                          ( +-  0.06% )

With the patch:

 Performance counter stats for './fault 500' (10 runs):

         682112862      cpu-cycles                                                    ( +-  0.03% )
            130619      dTLB-load-misses                                              ( +-  0.03% )
             46073      iTLB-load-misses                                              ( +-  0.05% )
             19681      faults                                                        ( +-  0.01% )

       5.381342641 seconds time elapsed                                          ( +-  0.07% )

The proper work of the huge stack expansion was tested with the
following app:

int main(int argc, char **argv)
{
	char buf[1024 * 1025];

	sprintf(buf, "Hello world !\n");
	printf(buf);

	exit(0);
}

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
[mpe: Add include of pagemap.h to fix build errors]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/fault.c | 50 ++++++++++++++++++++++++++++-------------
 1 file changed, 34 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 0c99f9b45e8f5..3fafacd3f9074 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -22,6 +22,7 @@
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/types.h>
+#include <linux/pagemap.h>
 #include <linux/ptrace.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
@@ -66,15 +67,11 @@ static inline bool notify_page_fault(struct pt_regs *regs)
 }
 
 /*
- * Check whether the instruction at regs->nip is a store using
+ * Check whether the instruction inst is a store using
  * an update addressing form which will update r1.
  */
-static bool store_updates_sp(struct pt_regs *regs)
+static bool store_updates_sp(unsigned int inst)
 {
-	unsigned int inst;
-
-	if (get_user(inst, (unsigned int __user *)regs->nip))
-		return false;
 	/* check for 1 in the rA field */
 	if (((inst >> 16) & 0x1f) != 1)
 		return false;
@@ -234,8 +231,8 @@ static bool bad_kernel_fault(bool is_exec, unsigned long error_code,
 }
 
 static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
-				struct vm_area_struct *vma,
-				bool store_update_sp)
+				struct vm_area_struct *vma, unsigned int flags,
+				bool *must_retry)
 {
 	/*
 	 * N.B. The POWER/Open ABI allows programs to access up to
@@ -247,6 +244,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
 	 * expand to 1MB without further checks.
 	 */
 	if (address + 0x100000 < vma->vm_end) {
+		unsigned int __user *nip = (unsigned int __user *)regs->nip;
 		/* get user regs even if this fault is in kernel mode */
 		struct pt_regs *uregs = current->thread.regs;
 		if (uregs == NULL)
@@ -264,8 +262,22 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
 		 * between the last mapped region and the stack will
 		 * expand the stack rather than segfaulting.
 		 */
-		if (address + 2048 < uregs->gpr[1] && !store_update_sp)
-			return true;
+		if (address + 2048 >= uregs->gpr[1])
+			return false;
+
+		if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) &&
+		    access_ok(VERIFY_READ, nip, sizeof(*nip))) {
+			unsigned int inst;
+			int res;
+
+			pagefault_disable();
+			res = __get_user_inatomic(inst, nip);
+			pagefault_enable();
+			if (!res)
+				return !store_updates_sp(inst);
+			*must_retry = true;
+		}
+		return true;
 	}
 	return false;
 }
@@ -403,7 +415,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
 	int is_user = user_mode(regs);
 	int is_write = page_fault_is_write(error_code);
 	int fault, major = 0;
-	bool store_update_sp = false;
+	bool must_retry = false;
 
 	if (notify_page_fault(regs))
 		return 0;
@@ -454,9 +466,6 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
 	 * can result in fault, which will cause a deadlock when called with
 	 * mmap_sem held
 	 */
-	if (is_write && is_user)
-		store_update_sp = store_updates_sp(regs);
-
 	if (is_user)
 		flags |= FAULT_FLAG_USER;
 	if (is_write)
@@ -503,8 +512,17 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
 		return bad_area(regs, address);
 
 	/* The stack is being expanded, check if it's valid */
-	if (unlikely(bad_stack_expansion(regs, address, vma, store_update_sp)))
-		return bad_area(regs, address);
+	if (unlikely(bad_stack_expansion(regs, address, vma, flags,
+					 &must_retry))) {
+		if (!must_retry)
+			return bad_area(regs, address);
+
+		up_read(&mm->mmap_sem);
+		if (fault_in_pages_readable((const char __user *)regs->nip,
+					    sizeof(unsigned int)))
+			return bad_area_nosemaphore(regs, address);
+		goto retry;
+	}
 
 	/* Try to expand it */
 	if (unlikely(expand_stack(vma, address)))

From bd79010fb3a9aa160e1780e2496798e3ac6e7ba5 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Tue, 22 May 2018 16:13:59 +1000
Subject: [PATCH 099/251] selftests/powerpc: Add missing .gitignores

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/alignment/.gitignore | 1 +
 tools/testing/selftests/powerpc/tm/.gitignore        | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tools/testing/selftests/powerpc/alignment/.gitignore b/tools/testing/selftests/powerpc/alignment/.gitignore
index 1d980e3d70399..9d383073b7ad4 100644
--- a/tools/testing/selftests/powerpc/alignment/.gitignore
+++ b/tools/testing/selftests/powerpc/alignment/.gitignore
@@ -3,3 +3,4 @@ copy_first_unaligned
 paste_unaligned
 paste_last_unaligned
 copy_paste_unaligned_common
+alignment_handler
diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore
index bb90d4b79524e..c3ee8393dae80 100644
--- a/tools/testing/selftests/powerpc/tm/.gitignore
+++ b/tools/testing/selftests/powerpc/tm/.gitignore
@@ -14,3 +14,4 @@ tm-signal-context-chk-vsx
 tm-vmx-unavail
 tm-unavailable
 tm-trap
+tm-sigreturn

From 9c2ddfe55c42bf4b9bc336a0650ab78f9222a159 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Tue, 22 May 2018 16:14:27 +1000
Subject: [PATCH 100/251] selftests/powerpc: Add ptrace hw breakpoint test

This test the ptrace hw breakpoints via PTRACE_SET_DEBUGREG and
PPC_PTRACE_SETHWDEBUG.  This test was use to find the bugs fixed by
these recent commits:

  4f7c06e26e powerpc/ptrace: Fix setting 512B aligned breakpoints with PTRACE_SET_DEBUGREG
  cd6ef7eebf powerpc/ptrace: Fix enforcement of DAWR constraints

Signed-off-by: Michael Neuling <mikey@neuling.org>
[mpe: Add SPDX tag, clang format it]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 .../selftests/powerpc/ptrace/.gitignore       |   1 +
 .../testing/selftests/powerpc/ptrace/Makefile |   2 +-
 .../selftests/powerpc/ptrace/ptrace-hwbreak.c | 342 ++++++++++++++++++
 3 files changed, 344 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c

diff --git a/tools/testing/selftests/powerpc/ptrace/.gitignore b/tools/testing/selftests/powerpc/ptrace/.gitignore
index 349acfafc95b6..9dcc16ea8179c 100644
--- a/tools/testing/selftests/powerpc/ptrace/.gitignore
+++ b/tools/testing/selftests/powerpc/ptrace/.gitignore
@@ -8,3 +8,4 @@ ptrace-vsx
 ptrace-tm-vsx
 ptrace-tm-spd-vsx
 ptrace-tm-spr
+ptrace-hwbreak
diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile
index 4803052665043..0e2f4601d1a8e 100644
--- a/tools/testing/selftests/powerpc/ptrace/Makefile
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
               ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \
-              ptrace-tm-spd-vsx ptrace-tm-spr
+              ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak
 
 include ../../lib.mk
 
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
new file mode 100644
index 0000000000000..3066d310f32b6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Ptrace test for hw breakpoints
+ *
+ * Based on tools/testing/selftests/breakpoints/breakpoint_test.c
+ *
+ * This test forks and the parent then traces the child doing various
+ * types of ptrace enabled breakpoints
+ *
+ * Copyright (C) 2018 Michael Neuling, IBM Corporation.
+ */
+
+#include <sys/ptrace.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <sys/user.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include "ptrace.h"
+
+/* Breakpoint access modes */
+enum {
+	BP_X = 1,
+	BP_RW = 2,
+	BP_W = 4,
+};
+
+static pid_t child_pid;
+static struct ppc_debug_info dbginfo;
+
+static void get_dbginfo(void)
+{
+	int ret;
+
+	ret = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo);
+	if (ret) {
+		perror("Can't get breakpoint info\n");
+		exit(-1);
+	}
+}
+
+static bool hwbreak_present(void)
+{
+	return (dbginfo.num_data_bps != 0);
+}
+
+static bool dawr_present(void)
+{
+	return !!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_DAWR);
+}
+
+static void set_breakpoint_addr(void *addr)
+{
+	int ret;
+
+	ret = ptrace(PTRACE_SET_DEBUGREG, child_pid, 0, addr);
+	if (ret) {
+		perror("Can't set breakpoint addr\n");
+		exit(-1);
+	}
+}
+
+static int set_hwbreakpoint_addr(void *addr, int range)
+{
+	int ret;
+
+	struct ppc_hw_breakpoint info;
+
+	info.version = 1;
+	info.trigger_type = PPC_BREAKPOINT_TRIGGER_RW;
+	info.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
+	if (range > 0)
+		info.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
+	info.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
+	info.addr = (__u64)addr;
+	info.addr2 = (__u64)addr + range;
+	info.condition_value = 0;
+
+	ret = ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info);
+	if (ret < 0) {
+		perror("Can't set breakpoint\n");
+		exit(-1);
+	}
+	return ret;
+}
+
+static int del_hwbreakpoint_addr(int watchpoint_handle)
+{
+	int ret;
+
+	ret = ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, watchpoint_handle);
+	if (ret < 0) {
+		perror("Can't delete hw breakpoint\n");
+		exit(-1);
+	}
+	return ret;
+}
+
+#define DAWR_LENGTH_MAX 512
+
+/* Dummy variables to test read/write accesses */
+static unsigned long long
+	dummy_array[DAWR_LENGTH_MAX / sizeof(unsigned long long)]
+	__attribute__((aligned(512)));
+static unsigned long long *dummy_var = dummy_array;
+
+static void write_var(int len)
+{
+	long long *plval;
+	char *pcval;
+	short *psval;
+	int *pival;
+
+	switch (len) {
+	case 1:
+		pcval = (char *)dummy_var;
+		*pcval = 0xff;
+		break;
+	case 2:
+		psval = (short *)dummy_var;
+		*psval = 0xffff;
+		break;
+	case 4:
+		pival = (int *)dummy_var;
+		*pival = 0xffffffff;
+		break;
+	case 8:
+		plval = (long long *)dummy_var;
+		*plval = 0xffffffffffffffffLL;
+		break;
+	}
+}
+
+static void read_var(int len)
+{
+	char cval __attribute__((unused));
+	short sval __attribute__((unused));
+	int ival __attribute__((unused));
+	long long lval __attribute__((unused));
+
+	switch (len) {
+	case 1:
+		cval = *(char *)dummy_var;
+		break;
+	case 2:
+		sval = *(short *)dummy_var;
+		break;
+	case 4:
+		ival = *(int *)dummy_var;
+		break;
+	case 8:
+		lval = *(long long *)dummy_var;
+		break;
+	}
+}
+
+/*
+ * Do the r/w accesses to trigger the breakpoints. And run
+ * the usual traps.
+ */
+static void trigger_tests(void)
+{
+	int len, ret;
+
+	ret = ptrace(PTRACE_TRACEME, 0, NULL, 0);
+	if (ret) {
+		perror("Can't be traced?\n");
+		return;
+	}
+
+	/* Wake up father so that it sets up the first test */
+	kill(getpid(), SIGUSR1);
+
+	/* Test write watchpoints */
+	for (len = 1; len <= sizeof(long); len <<= 1)
+		write_var(len);
+
+	/* Test read/write watchpoints (on read accesses) */
+	for (len = 1; len <= sizeof(long); len <<= 1)
+		read_var(len);
+
+	/* Test when breakpoint is unset */
+
+	/* Test write watchpoints */
+	for (len = 1; len <= sizeof(long); len <<= 1)
+		write_var(len);
+
+	/* Test read/write watchpoints (on read accesses) */
+	for (len = 1; len <= sizeof(long); len <<= 1)
+		read_var(len);
+}
+
+static void check_success(const char *msg)
+{
+	const char *msg2;
+	int status;
+
+	/* Wait for the child to SIGTRAP */
+	wait(&status);
+
+	msg2 = "Failed";
+
+	if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
+		msg2 = "Child process hit the breakpoint";
+	}
+
+	printf("%s Result: [%s]\n", msg, msg2);
+}
+
+static void launch_watchpoints(char *buf, int mode, int len,
+			       struct ppc_debug_info *dbginfo, bool dawr)
+{
+	const char *mode_str;
+	unsigned long data = (unsigned long)(dummy_var);
+	int wh, range;
+
+	data &= ~0x7UL;
+
+	if (mode == BP_W) {
+		data |= (1UL << 1);
+		mode_str = "write";
+	} else {
+		data |= (1UL << 0);
+		data |= (1UL << 1);
+		mode_str = "read";
+	}
+
+	/* Set DABR_TRANSLATION bit */
+	data |= (1UL << 2);
+
+	/* use PTRACE_SET_DEBUGREG breakpoints */
+	set_breakpoint_addr((void *)data);
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
+	check_success(buf);
+	/* Unregister hw brkpoint */
+	set_breakpoint_addr(NULL);
+
+	data = (data & ~7); /* remove dabr control bits */
+
+	/* use PPC_PTRACE_SETHWDEBUG breakpoint */
+	if (!(dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE))
+		return; /* not supported */
+	wh = set_hwbreakpoint_addr((void *)data, 0);
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
+	check_success(buf);
+	/* Unregister hw brkpoint */
+	del_hwbreakpoint_addr(wh);
+
+	/* try a wider range */
+	range = 8;
+	if (dawr)
+		range = 512 - ((int)data & (DAWR_LENGTH_MAX - 1));
+	wh = set_hwbreakpoint_addr((void *)data, range);
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
+	check_success(buf);
+	/* Unregister hw brkpoint */
+	del_hwbreakpoint_addr(wh);
+}
+
+/* Set the breakpoints and check the child successfully trigger them */
+static int launch_tests(bool dawr)
+{
+	char buf[1024];
+	int len, i, status;
+
+	struct ppc_debug_info dbginfo;
+
+	i = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo);
+	if (i) {
+		perror("Can't set breakpoint info\n");
+		exit(-1);
+	}
+	if (!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_RANGE))
+		printf("WARNING: Kernel doesn't support PPC_PTRACE_SETHWDEBUG\n");
+
+	/* Write watchpoint */
+	for (len = 1; len <= sizeof(long); len <<= 1)
+		launch_watchpoints(buf, BP_W, len, &dbginfo, dawr);
+
+	/* Read-Write watchpoint */
+	for (len = 1; len <= sizeof(long); len <<= 1)
+		launch_watchpoints(buf, BP_RW, len, &dbginfo, dawr);
+
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+
+	/*
+	 * Now we have unregistered the breakpoint, access by child
+	 * should not cause SIGTRAP.
+	 */
+
+	wait(&status);
+
+	if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
+		printf("FAIL: Child process hit the breakpoint, which is not expected\n");
+		ptrace(PTRACE_CONT, child_pid, NULL, 0);
+		return TEST_FAIL;
+	}
+
+	if (WIFEXITED(status))
+		printf("Child exited normally\n");
+
+	return TEST_PASS;
+}
+
+static int ptrace_hwbreak(void)
+{
+	pid_t pid;
+	int ret;
+	bool dawr;
+
+	pid = fork();
+	if (!pid) {
+		trigger_tests();
+		return 0;
+	}
+
+	wait(NULL);
+
+	child_pid = pid;
+
+	get_dbginfo();
+	SKIP_IF(!hwbreak_present());
+	dawr = dawr_present();
+
+	ret = launch_tests(dawr);
+
+	wait(NULL);
+
+	return ret;
+}
+
+int main(int argc, char **argv, char **envp)
+{
+	return test_harness(ptrace_hwbreak, "ptrace-hwbreak");
+}

From e4ccb1dae6bdef228d729c076c38161ef6e7ca34 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Thu, 24 May 2018 11:02:06 +0000
Subject: [PATCH 101/251] powerpc/8xx: fix invalid register expression in
 head_8xx.S

New binutils generate the following warning

  AS      arch/powerpc/kernel/head_8xx.o
arch/powerpc/kernel/head_8xx.S: Assembler messages:
arch/powerpc/kernel/head_8xx.S:916: Warning: invalid register expression

This patch fixes it.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/head_8xx.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index d8670a37d70cc..6cab07e767327 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -913,7 +913,7 @@ start_here:
 	tovirt(r6,r6)
 	lis	r5, abatron_pteptrs@h
 	ori	r5, r5, abatron_pteptrs@l
-	stw	r5, 0xf0(r0)	/* Must match your Abatron config file */
+	stw	r5, 0xf0(0)	/* Must match your Abatron config file */
 	tophys(r5,r5)
 	stw	r6, 0(r5)
 

From 7daf59300999693b85233762b847f1b2313cccbd Mon Sep 17 00:00:00 2001
From: Michal Suchanek <msuchanek@suse.de>
Date: Wed, 23 May 2018 20:00:54 +0200
Subject: [PATCH 102/251] powerpc/xmon: Also setup debugger hooks when
 single-stepping

When single-stepping kernel code from xmon without a debug hook
enabled the kernel crashes. This can happen when kernel starts with
xmon on crash disabled but xmon is entered using sysrq.

Call force_enable_xmon when single-stepping in xmon to install the
xmon debug hooks.

Fixes: e1368d0c9edb ("powerpc/xmon: Setup debugger hooks when first break-point is set")
Signed-off-by: Michal Suchanek <msuchanek@suse.de>
Reviewed-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/xmon/xmon.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index a0842f1ff72ca..252df9741d204 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -778,6 +778,16 @@ static int xmon_fault_handler(struct pt_regs *regs)
 	return 0;
 }
 
+/* Force enable xmon if not already enabled */
+static inline void force_enable_xmon(void)
+{
+	/* Enable xmon hooks if needed */
+	if (!xmon_on) {
+		printf("xmon: Enabling debugger hooks\n");
+		xmon_on = 1;
+	}
+}
+
 static struct bpt *at_breakpoint(unsigned long pc)
 {
 	int i;
@@ -1094,6 +1104,7 @@ static int do_step(struct pt_regs *regs)
 	unsigned int instr;
 	int stepped;
 
+	force_enable_xmon();
 	/* check we are in 64-bit kernel mode, translation enabled */
 	if ((regs->msr & (MSR_64BIT|MSR_PR|MSR_IR)) == (MSR_64BIT|MSR_IR)) {
 		if (mread(regs->nip, &instr, 4) == 4) {
@@ -1268,16 +1279,6 @@ static long check_bp_loc(unsigned long addr)
 	return 1;
 }
 
-/* Force enable xmon if not already enabled */
-static inline void force_enable_xmon(void)
-{
-	/* Enable xmon hooks if needed */
-	if (!xmon_on) {
-		printf("xmon: Enabling debugger hooks\n");
-		xmon_on = 1;
-	}
-}
-
 static char *breakpoint_help_string =
     "Breakpoint command usage:\n"
     "b                show breakpoints\n"

From 6671683db8540e5766f44a1089549c168730ae41 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 21 May 2018 21:06:19 +1000
Subject: [PATCH 103/251] powerpc/xmon: Specify the full format in DUMP() macro

In dump_one_paca() the DUMP macro unconditionally prepends '#' to the
printf format specifier. In most cases we're using either 'x' or 'lx'
etc. and that is OK. But for 'p' and other formats using '#' is
actually undefined, and once we enable printf() checking for
xmon_printf() we will get warnings from the compiler.

So just have each usage specify the full format, that way we can omit
'#' when it's inappropriate.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Mathieu Malaterre <malat@debian.org>
---
 arch/powerpc/xmon/xmon.c | 102 +++++++++++++++++++--------------------
 1 file changed, 51 insertions(+), 51 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 252df9741d204..a3d597c8cfa22 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2350,27 +2350,27 @@ static void dump_one_paca(int cpu)
 	printf(" %-*s = %s\n", 20, "present", cpu_present(cpu) ? "yes" : "no");
 	printf(" %-*s = %s\n", 20, "online", cpu_online(cpu) ? "yes" : "no");
 
-#define DUMP(paca, name, format) \
-	printf(" %-*s = %#-*"format"\t(0x%lx)\n", 20, #name, 18, paca->name, \
+#define DUMP(paca, name, format)				\
+	printf(" %-*s = "format"\t(0x%lx)\n", 20, #name, 18, paca->name, \
 		offsetof(struct paca_struct, name));
 
-	DUMP(p, lock_token, "x");
-	DUMP(p, paca_index, "x");
-	DUMP(p, kernel_toc, "lx");
-	DUMP(p, kernelbase, "lx");
-	DUMP(p, kernel_msr, "lx");
-	DUMP(p, emergency_sp, "px");
+	DUMP(p, lock_token, "%#-*x");
+	DUMP(p, paca_index, "%#-*x");
+	DUMP(p, kernel_toc, "%#-*lx");
+	DUMP(p, kernelbase, "%#-*lx");
+	DUMP(p, kernel_msr, "%#-*lx");
+	DUMP(p, emergency_sp, "%-*px");
 #ifdef CONFIG_PPC_BOOK3S_64
-	DUMP(p, nmi_emergency_sp, "px");
-	DUMP(p, mc_emergency_sp, "px");
-	DUMP(p, in_nmi, "x");
-	DUMP(p, in_mce, "x");
-	DUMP(p, hmi_event_available, "x");
+	DUMP(p, nmi_emergency_sp, "%-*px");
+	DUMP(p, mc_emergency_sp, "%-*px");
+	DUMP(p, in_nmi, "%#-*x");
+	DUMP(p, in_mce, "%#-*x");
+	DUMP(p, hmi_event_available, "%#-*x");
 #endif
-	DUMP(p, data_offset, "lx");
-	DUMP(p, hw_cpu_id, "x");
-	DUMP(p, cpu_start, "x");
-	DUMP(p, kexec_state, "x");
+	DUMP(p, data_offset, "%#-*lx");
+	DUMP(p, hw_cpu_id, "%#-*x");
+	DUMP(p, cpu_start, "%#-*x");
+	DUMP(p, kexec_state, "%#-*x");
 #ifdef CONFIG_PPC_BOOK3S_64
 	for (i = 0; i < SLB_NUM_BOLTED; i++) {
 		u64 esid, vsid;
@@ -2386,54 +2386,54 @@ static void dump_one_paca(int cpu)
 				i, esid, vsid);
 		}
 	}
-	DUMP(p, vmalloc_sllp, "x");
-	DUMP(p, slb_cache_ptr, "x");
+	DUMP(p, vmalloc_sllp, "%#-*x");
+	DUMP(p, slb_cache_ptr, "%#-*x");
 	for (i = 0; i < SLB_CACHE_ENTRIES; i++)
 		printf(" slb_cache[%d]:        = 0x%016lx\n", i, p->slb_cache[i]);
 
-	DUMP(p, rfi_flush_fallback_area, "px");
+	DUMP(p, rfi_flush_fallback_area, "%-*px");
 #endif
-	DUMP(p, dscr_default, "llx");
+	DUMP(p, dscr_default, "%#-*llx");
 #ifdef CONFIG_PPC_BOOK3E
-	DUMP(p, pgd, "px");
-	DUMP(p, kernel_pgd, "px");
-	DUMP(p, tcd_ptr, "px");
-	DUMP(p, mc_kstack, "px");
-	DUMP(p, crit_kstack, "px");
-	DUMP(p, dbg_kstack, "px");
+	DUMP(p, pgd, "%-*px");
+	DUMP(p, kernel_pgd, "%-*px");
+	DUMP(p, tcd_ptr, "%-*px");
+	DUMP(p, mc_kstack, "%-*px");
+	DUMP(p, crit_kstack, "%-*px");
+	DUMP(p, dbg_kstack, "%-*px");
 #endif
-	DUMP(p, __current, "px");
-	DUMP(p, kstack, "lx");
+	DUMP(p, __current, "%-*px");
+	DUMP(p, kstack, "%#-*lx");
 	printf(" kstack_base          = 0x%016lx\n", p->kstack & ~(THREAD_SIZE - 1));
-	DUMP(p, stab_rr, "lx");
-	DUMP(p, saved_r1, "lx");
-	DUMP(p, trap_save, "x");
-	DUMP(p, irq_soft_mask, "x");
-	DUMP(p, irq_happened, "x");
-	DUMP(p, io_sync, "x");
-	DUMP(p, irq_work_pending, "x");
-	DUMP(p, nap_state_lost, "x");
-	DUMP(p, sprg_vdso, "llx");
+	DUMP(p, stab_rr, "%#-*lx");
+	DUMP(p, saved_r1, "%#-*lx");
+	DUMP(p, trap_save, "%#-*x");
+	DUMP(p, irq_soft_mask, "%#-*x");
+	DUMP(p, irq_happened, "%#-*x");
+	DUMP(p, io_sync, "%#-*x");
+	DUMP(p, irq_work_pending, "%#-*x");
+	DUMP(p, nap_state_lost, "%#-*x");
+	DUMP(p, sprg_vdso, "%#-*llx");
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	DUMP(p, tm_scratch, "llx");
+	DUMP(p, tm_scratch, "%#-*llx");
 #endif
 
 #ifdef CONFIG_PPC_POWERNV
-	DUMP(p, core_idle_state_ptr, "px");
-	DUMP(p, thread_idle_state, "x");
-	DUMP(p, thread_mask, "x");
-	DUMP(p, subcore_sibling_mask, "x");
+	DUMP(p, core_idle_state_ptr, "%-*px");
+	DUMP(p, thread_idle_state, "%#-*x");
+	DUMP(p, thread_mask, "%#-*x");
+	DUMP(p, subcore_sibling_mask, "%#-*x");
 #endif
 
-	DUMP(p, accounting.utime, "llx");
-	DUMP(p, accounting.stime, "llx");
-	DUMP(p, accounting.utime_scaled, "llx");
-	DUMP(p, accounting.starttime, "llx");
-	DUMP(p, accounting.starttime_user, "llx");
-	DUMP(p, accounting.startspurr, "llx");
-	DUMP(p, accounting.utime_sspurr, "llx");
-	DUMP(p, accounting.steal_time, "llx");
+	DUMP(p, accounting.utime, "%#-*llx");
+	DUMP(p, accounting.stime, "%#-*llx");
+	DUMP(p, accounting.utime_scaled, "%#-*llx");
+	DUMP(p, accounting.starttime, "%#-*llx");
+	DUMP(p, accounting.starttime_user, "%#-*llx");
+	DUMP(p, accounting.startspurr, "%#-*llx");
+	DUMP(p, accounting.utime_sspurr, "%#-*llx");
+	DUMP(p, accounting.steal_time, "%#-*llx");
 #undef DUMP
 
 	catch_memory_errors = 0;

From e70d8f55268ba95f00c61857df2bab638365f10f Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Sun, 25 Mar 2018 11:06:47 +0200
Subject: [PATCH 104/251] powerpc/xmon: Add __printf annotation to
 xmon_printf()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This allows the compiler to verify the format strings vs the types of
the arguments.

Update the other prototype declarations in asm/xmon.h.

Silence warnings (triggered at W=1) by adding relevant __printf
attribute. Move #define at bottom of the file to prevent conflict with
gcc attribute.

Solves the original warning:

  arch/powerpc/xmon/nonstdio.c:178:2: error: function might be
  possible candidate for ‘gnu_printf’ format attribute

In turn this uncovered many formatting errors in xmon.c, all fixed in
this patch.

Signed-off-by: Mathieu Malaterre <malat@debian.org>
[mpe: Always use px not p, fixup the 44x specific code, tweak change log]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/xmon.h |   2 +-
 arch/powerpc/xmon/nonstdio.h    |   8 +--
 arch/powerpc/xmon/spu-dis.c     |  18 ++---
 arch/powerpc/xmon/xmon.c        | 122 ++++++++++++++++----------------
 4 files changed, 75 insertions(+), 75 deletions(-)

diff --git a/arch/powerpc/include/asm/xmon.h b/arch/powerpc/include/asm/xmon.h
index eb42a0c6e1d9d..30ff69bd8f436 100644
--- a/arch/powerpc/include/asm/xmon.h
+++ b/arch/powerpc/include/asm/xmon.h
@@ -29,7 +29,7 @@ static inline void xmon_register_spus(struct list_head *list) { };
 extern int cpus_are_in_xmon(void);
 #endif
 
-extern void xmon_printf(const char *format, ...);
+extern __printf(1, 2) void xmon_printf(const char *format, ...);
 
 #endif /* __KERNEL __ */
 #endif /* __ASM_POWERPC_XMON_H */
diff --git a/arch/powerpc/xmon/nonstdio.h b/arch/powerpc/xmon/nonstdio.h
index 2202ec61972c4..e8deac6c84e29 100644
--- a/arch/powerpc/xmon/nonstdio.h
+++ b/arch/powerpc/xmon/nonstdio.h
@@ -1,13 +1,13 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #define EOF	(-1)
 
-#define printf	xmon_printf
-#define putchar	xmon_putchar
-
 extern void xmon_set_pagination_lpp(unsigned long lpp);
 extern void xmon_start_pagination(void);
 extern void xmon_end_pagination(void);
 extern int xmon_putchar(int c);
 extern void xmon_puts(const char *);
 extern char *xmon_gets(char *, int);
-extern void xmon_printf(const char *, ...);
+extern __printf(1, 2) void xmon_printf(const char *fmt, ...);
+
+#define printf	xmon_printf
+#define putchar	xmon_putchar
diff --git a/arch/powerpc/xmon/spu-dis.c b/arch/powerpc/xmon/spu-dis.c
index e5f89837c82e0..4cbc7da88524a 100644
--- a/arch/powerpc/xmon/spu-dis.c
+++ b/arch/powerpc/xmon/spu-dis.c
@@ -102,7 +102,7 @@ print_insn_spu (unsigned long insn, unsigned long memaddr)
 
   if (index == 0)
     {
-      printf(".long 0x%x", insn);
+      printf(".long 0x%lx", insn);
     }
   else
     {
@@ -134,27 +134,27 @@ print_insn_spu (unsigned long insn, unsigned long memaddr)
 	  switch (arg)
 	    {
 	    case A_T:
-	      printf("$%d",
+	      printf("$%lu",
 				     DECODE_INSN_RT (insn));
 	      break;
 	    case A_A:
-	      printf("$%d",
+	      printf("$%lu",
 				     DECODE_INSN_RA (insn));
 	      break;
 	    case A_B:
-	      printf("$%d",
+	      printf("$%lu",
 				     DECODE_INSN_RB (insn));
 	      break;
 	    case A_C:
-	      printf("$%d",
+	      printf("$%lu",
 				     DECODE_INSN_RC (insn));
 	      break;
 	    case A_S:
-	      printf("$sp%d",
+	      printf("$sp%lu",
 				     DECODE_INSN_RA (insn));
 	      break;
 	    case A_H:
-	      printf("$ch%d",
+	      printf("$ch%lu",
 				     DECODE_INSN_RA (insn));
 	      break;
 	    case A_P:
@@ -162,11 +162,11 @@ print_insn_spu (unsigned long insn, unsigned long memaddr)
 	      printf("(");
 	      break;
 	    case A_U7A:
-	      printf("%d",
+	      printf("%lu",
 				     173 - DECODE_INSN_U8 (insn));
 	      break;
 	    case A_U7B:
-	      printf("%d",
+	      printf("%lu",
 				     155 - DECODE_INSN_U8 (insn));
 	      break;
 	    case A_S3:
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index a3d597c8cfa22..152eb185bfcf0 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -515,7 +515,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 		get_output_lock();
 		excprint(regs);
 		if (bp) {
-			printf("cpu 0x%x stopped at breakpoint 0x%lx (",
+			printf("cpu 0x%x stopped at breakpoint 0x%tx (",
 			       cpu, BP_NUM(bp));
 			xmon_print_symbol(regs->nip, " ", ")\n");
 		}
@@ -622,7 +622,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 		excprint(regs);
 		bp = at_breakpoint(regs->nip);
 		if (bp) {
-			printf("Stopped at breakpoint %lx (", BP_NUM(bp));
+			printf("Stopped at breakpoint %tx (", BP_NUM(bp));
 			xmon_print_symbol(regs->nip, " ", ")\n");
 		}
 		if (unrecoverable_excp(regs))
@@ -1171,7 +1171,7 @@ static int cpu_cmd(void)
 	}
 	/* try to switch to cpu specified */
 	if (!cpumask_test_cpu(cpu, &cpus_in_xmon)) {
-		printf("cpu 0x%x isn't in xmon\n", cpu);
+		printf("cpu 0x%lx isn't in xmon\n", cpu);
 		return 0;
 	}
 	xmon_taken = 0;
@@ -1185,7 +1185,7 @@ static int cpu_cmd(void)
 			/* take control back */
 			mb();
 			xmon_owner = smp_processor_id();
-			printf("cpu 0x%x didn't take control\n", cpu);
+			printf("cpu 0x%lx didn't take control\n", cpu);
 			return 0;
 		}
 		barrier();
@@ -1375,7 +1375,7 @@ bpt_cmds(void)
 			}
 		}
 
-		printf("Cleared breakpoint %lx (", BP_NUM(bp));
+		printf("Cleared breakpoint %tx (", BP_NUM(bp));
 		xmon_print_symbol(bp->address, " ", ")\n");
 		bp->enabled = 0;
 		break;
@@ -1402,7 +1402,7 @@ bpt_cmds(void)
 			for (bp = bpts; bp < &bpts[NBPTS]; ++bp) {
 				if (!bp->enabled)
 					continue;
-				printf("%2x %s   ", BP_NUM(bp),
+				printf("%tx %s   ", BP_NUM(bp),
 				    (bp->enabled & BP_CIABR) ? "inst": "trap");
 				xmon_print_symbol(bp->address, "  ", "\n");
 			}
@@ -1619,11 +1619,11 @@ static void excprint(struct pt_regs *fp)
 #endif /* CONFIG_SMP */
 
 	trap = TRAP(fp);
-	printf("Vector: %lx %s at [%lx]\n", fp->trap, getvecname(trap), fp);
+	printf("Vector: %lx %s at [%px]\n", fp->trap, getvecname(trap), fp);
 	printf("    pc: ");
 	xmon_print_symbol(fp->nip, ": ", "\n");
 
-	printf("    lr: ", fp->link);
+	printf("    lr: ");
 	xmon_print_symbol(fp->link, ": ", "\n");
 
 	printf("    sp: %lx\n", fp->gpr[1]);
@@ -1635,13 +1635,13 @@ static void excprint(struct pt_regs *fp)
 			printf(" dsisr: %lx\n", fp->dsisr);
 	}
 
-	printf("  current = 0x%lx\n", current);
+	printf("  current = 0x%px\n", current);
 #ifdef CONFIG_PPC64
-	printf("  paca    = 0x%lx\t softe: %d\t irq_happened: 0x%02x\n",
+	printf("  paca    = 0x%px\t softe: %d\t irq_happened: 0x%02x\n",
 	       local_paca, local_paca->irq_soft_mask, local_paca->irq_happened);
 #endif
 	if (current) {
-		printf("    pid   = %ld, comm = %s\n",
+		printf("    pid   = %d, comm = %s\n",
 		       current->pid, current->comm);
 	}
 
@@ -1677,16 +1677,16 @@ static void prregs(struct pt_regs *fp)
 #ifdef CONFIG_PPC64
 	if (FULL_REGS(fp)) {
 		for (n = 0; n < 16; ++n)
-			printf("R%.2ld = "REG"   R%.2ld = "REG"\n",
+			printf("R%.2d = "REG"   R%.2d = "REG"\n",
 			       n, fp->gpr[n], n+16, fp->gpr[n+16]);
 	} else {
 		for (n = 0; n < 7; ++n)
-			printf("R%.2ld = "REG"   R%.2ld = "REG"\n",
+			printf("R%.2d = "REG"   R%.2d = "REG"\n",
 			       n, fp->gpr[n], n+7, fp->gpr[n+7]);
 	}
 #else
 	for (n = 0; n < 32; ++n) {
-		printf("R%.2d = %.8x%s", n, fp->gpr[n],
+		printf("R%.2d = %.8lx%s", n, fp->gpr[n],
 		       (n & 3) == 3? "\n": "   ");
 		if (n == 12 && !FULL_REGS(fp)) {
 			printf("\n");
@@ -1790,9 +1790,9 @@ static void dump_206_sprs(void)
 
 	/* Actually some of these pre-date 2.06, but whatevs */
 
-	printf("srr0   = %.16lx  srr1  = %.16lx dsisr  = %.8x\n",
+	printf("srr0   = %.16lx  srr1  = %.16lx dsisr  = %.8lx\n",
 		mfspr(SPRN_SRR0), mfspr(SPRN_SRR1), mfspr(SPRN_DSISR));
-	printf("dscr   = %.16lx  ppr   = %.16lx pir    = %.8x\n",
+	printf("dscr   = %.16lx  ppr   = %.16lx pir    = %.8lx\n",
 		mfspr(SPRN_DSCR), mfspr(SPRN_PPR), mfspr(SPRN_PIR));
 	printf("amr    = %.16lx  uamor = %.16lx\n",
 		mfspr(SPRN_AMR), mfspr(SPRN_UAMOR));
@@ -1800,11 +1800,11 @@ static void dump_206_sprs(void)
 	if (!(mfmsr() & MSR_HV))
 		return;
 
-	printf("sdr1   = %.16lx  hdar  = %.16lx hdsisr = %.8x\n",
+	printf("sdr1   = %.16lx  hdar  = %.16lx hdsisr = %.8lx\n",
 		mfspr(SPRN_SDR1), mfspr(SPRN_HDAR), mfspr(SPRN_HDSISR));
 	printf("hsrr0  = %.16lx hsrr1  = %.16lx hdec   = %.16lx\n",
 		mfspr(SPRN_HSRR0), mfspr(SPRN_HSRR1), mfspr(SPRN_HDEC));
-	printf("lpcr   = %.16lx  pcr   = %.16lx lpidr  = %.8x\n",
+	printf("lpcr   = %.16lx  pcr   = %.16lx lpidr  = %.8lx\n",
 		mfspr(SPRN_LPCR), mfspr(SPRN_PCR), mfspr(SPRN_LPID));
 	printf("hsprg0 = %.16lx hsprg1 = %.16lx amor   = %.16lx\n",
 		mfspr(SPRN_HSPRG0), mfspr(SPRN_HSPRG1), mfspr(SPRN_AMOR));
@@ -1821,10 +1821,10 @@ static void dump_207_sprs(void)
 	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
 		return;
 
-	printf("dpdes  = %.16lx  tir   = %.16lx cir    = %.8x\n",
+	printf("dpdes  = %.16lx  tir   = %.16lx cir    = %.8lx\n",
 		mfspr(SPRN_DPDES), mfspr(SPRN_TIR), mfspr(SPRN_CIR));
 
-	printf("fscr   = %.16lx  tar   = %.16lx pspb   = %.8x\n",
+	printf("fscr   = %.16lx  tar   = %.16lx pspb   = %.8lx\n",
 		mfspr(SPRN_FSCR), mfspr(SPRN_TAR), mfspr(SPRN_PSPB));
 
 	msr = mfmsr();
@@ -1837,12 +1837,12 @@ static void dump_207_sprs(void)
 
 	printf("mmcr0  = %.16lx  mmcr1 = %.16lx mmcr2  = %.16lx\n",
 		mfspr(SPRN_MMCR0), mfspr(SPRN_MMCR1), mfspr(SPRN_MMCR2));
-	printf("pmc1   = %.8x pmc2 = %.8x  pmc3 = %.8x  pmc4   = %.8x\n",
+	printf("pmc1   = %.8lx pmc2 = %.8lx  pmc3 = %.8lx  pmc4   = %.8lx\n",
 		mfspr(SPRN_PMC1), mfspr(SPRN_PMC2),
 		mfspr(SPRN_PMC3), mfspr(SPRN_PMC4));
-	printf("mmcra  = %.16lx   siar = %.16lx pmc5   = %.8x\n",
+	printf("mmcra  = %.16lx   siar = %.16lx pmc5   = %.8lx\n",
 		mfspr(SPRN_MMCRA), mfspr(SPRN_SIAR), mfspr(SPRN_PMC5));
-	printf("sdar   = %.16lx   sier = %.16lx pmc6   = %.8x\n",
+	printf("sdar   = %.16lx   sier = %.16lx pmc6   = %.8lx\n",
 		mfspr(SPRN_SDAR), mfspr(SPRN_SIER), mfspr(SPRN_PMC6));
 	printf("ebbhr  = %.16lx  ebbrr = %.16lx bescr  = %.16lx\n",
 		mfspr(SPRN_EBBHR), mfspr(SPRN_EBBRR), mfspr(SPRN_BESCR));
@@ -2356,9 +2356,9 @@ static void dump_one_paca(int cpu)
 
 	DUMP(p, lock_token, "%#-*x");
 	DUMP(p, paca_index, "%#-*x");
-	DUMP(p, kernel_toc, "%#-*lx");
-	DUMP(p, kernelbase, "%#-*lx");
-	DUMP(p, kernel_msr, "%#-*lx");
+	DUMP(p, kernel_toc, "%#-*llx");
+	DUMP(p, kernelbase, "%#-*llx");
+	DUMP(p, kernel_msr, "%#-*llx");
 	DUMP(p, emergency_sp, "%-*px");
 #ifdef CONFIG_PPC_BOOK3S_64
 	DUMP(p, nmi_emergency_sp, "%-*px");
@@ -2367,7 +2367,7 @@ static void dump_one_paca(int cpu)
 	DUMP(p, in_mce, "%#-*x");
 	DUMP(p, hmi_event_available, "%#-*x");
 #endif
-	DUMP(p, data_offset, "%#-*lx");
+	DUMP(p, data_offset, "%#-*llx");
 	DUMP(p, hw_cpu_id, "%#-*x");
 	DUMP(p, cpu_start, "%#-*x");
 	DUMP(p, kexec_state, "%#-*x");
@@ -2382,14 +2382,14 @@ static void dump_one_paca(int cpu)
 		vsid = be64_to_cpu(p->slb_shadow_ptr->save_area[i].vsid);
 
 		if (esid || vsid) {
-			printf(" slb_shadow[%d]:       = 0x%016lx 0x%016lx\n",
+			printf(" slb_shadow[%d]:       = 0x%016llx 0x%016llx\n",
 				i, esid, vsid);
 		}
 	}
 	DUMP(p, vmalloc_sllp, "%#-*x");
 	DUMP(p, slb_cache_ptr, "%#-*x");
 	for (i = 0; i < SLB_CACHE_ENTRIES; i++)
-		printf(" slb_cache[%d]:        = 0x%016lx\n", i, p->slb_cache[i]);
+		printf(" slb_cache[%d]:        = 0x%016x\n", i, p->slb_cache[i]);
 
 	DUMP(p, rfi_flush_fallback_area, "%-*px");
 #endif
@@ -2403,10 +2403,10 @@ static void dump_one_paca(int cpu)
 	DUMP(p, dbg_kstack, "%-*px");
 #endif
 	DUMP(p, __current, "%-*px");
-	DUMP(p, kstack, "%#-*lx");
-	printf(" kstack_base          = 0x%016lx\n", p->kstack & ~(THREAD_SIZE - 1));
-	DUMP(p, stab_rr, "%#-*lx");
-	DUMP(p, saved_r1, "%#-*lx");
+	DUMP(p, kstack, "%#-*llx");
+	printf(" kstack_base          = 0x%016llx\n", p->kstack & ~(THREAD_SIZE - 1));
+	DUMP(p, stab_rr, "%#-*llx");
+	DUMP(p, saved_r1, "%#-*llx");
 	DUMP(p, trap_save, "%#-*x");
 	DUMP(p, irq_soft_mask, "%#-*x");
 	DUMP(p, irq_happened, "%#-*x");
@@ -2426,14 +2426,14 @@ static void dump_one_paca(int cpu)
 	DUMP(p, subcore_sibling_mask, "%#-*x");
 #endif
 
-	DUMP(p, accounting.utime, "%#-*llx");
-	DUMP(p, accounting.stime, "%#-*llx");
-	DUMP(p, accounting.utime_scaled, "%#-*llx");
-	DUMP(p, accounting.starttime, "%#-*llx");
-	DUMP(p, accounting.starttime_user, "%#-*llx");
-	DUMP(p, accounting.startspurr, "%#-*llx");
-	DUMP(p, accounting.utime_sspurr, "%#-*llx");
-	DUMP(p, accounting.steal_time, "%#-*llx");
+	DUMP(p, accounting.utime, "%#-*lx");
+	DUMP(p, accounting.stime, "%#-*lx");
+	DUMP(p, accounting.utime_scaled, "%#-*lx");
+	DUMP(p, accounting.starttime, "%#-*lx");
+	DUMP(p, accounting.starttime_user, "%#-*lx");
+	DUMP(p, accounting.startspurr, "%#-*lx");
+	DUMP(p, accounting.utime_sspurr, "%#-*lx");
+	DUMP(p, accounting.steal_time, "%#-*lx");
 #undef DUMP
 
 	catch_memory_errors = 0;
@@ -2579,7 +2579,7 @@ static void dump_by_size(unsigned long addr, long count, int size)
 			default: val = 0;
 			}
 
-			printf("%0*lx", size * 2, val);
+			printf("%0*llx", size * 2, val);
 		}
 		printf("\n");
 	}
@@ -2743,7 +2743,7 @@ generic_inst_dump(unsigned long adr, long count, int praddr,
 		dotted = 0;
 		last_inst = inst;
 		if (praddr)
-			printf(REG"  %.8x", adr, inst);
+			printf(REG"  %.8lx", adr, inst);
 		printf("\t");
 		dump_func(inst, adr);
 		printf("\n");
@@ -2875,7 +2875,7 @@ memdiffs(unsigned char *p1, unsigned char *p2, unsigned nb, unsigned maxpr)
 	for( n = nb; n > 0; --n )
 		if( *p1++ != *p2++ )
 			if( ++prt <= maxpr )
-				printf("%.16x %.2x # %.16x %.2x\n", p1 - 1,
+				printf("%px %.2x # %px %.2x\n", p1 - 1,
 					p1[-1], p2 - 1, p2[-1]);
 	if( prt > maxpr )
 		printf("Total of %d differences\n", prt);
@@ -2935,13 +2935,13 @@ memzcan(void)
 		if (ok && !ook) {
 			printf("%.8x .. ", a);
 		} else if (!ok && ook)
-			printf("%.8x\n", a - mskip);
+			printf("%.8lx\n", a - mskip);
 		ook = ok;
 		if (a + mskip < a)
 			break;
 	}
 	if (ook)
-		printf("%.8x\n", a - mskip);
+		printf("%.8lx\n", a - mskip);
 }
 
 static void show_task(struct task_struct *tsk)
@@ -3025,13 +3025,13 @@ static void show_pte(unsigned long addr)
 		return;
 	}
 
-	printf("pgd  @ 0x%016lx\n", pgdir);
+	printf("pgd  @ 0x%px\n", pgdir);
 
 	if (pgd_huge(*pgdp)) {
 		format_pte(pgdp, pgd_val(*pgdp));
 		return;
 	}
-	printf("pgdp @ 0x%016lx = 0x%016lx\n", pgdp, pgd_val(*pgdp));
+	printf("pgdp @ 0x%px = 0x%016lx\n", pgdp, pgd_val(*pgdp));
 
 	pudp = pud_offset(pgdp, addr);
 
@@ -3045,7 +3045,7 @@ static void show_pte(unsigned long addr)
 		return;
 	}
 
-	printf("pudp @ 0x%016lx = 0x%016lx\n", pudp, pud_val(*pudp));
+	printf("pudp @ 0x%px = 0x%016lx\n", pudp, pud_val(*pudp));
 
 	pmdp = pmd_offset(pudp, addr);
 
@@ -3058,7 +3058,7 @@ static void show_pte(unsigned long addr)
 		format_pte(pmdp, pmd_val(*pmdp));
 		return;
 	}
-	printf("pmdp @ 0x%016lx = 0x%016lx\n", pmdp, pmd_val(*pmdp));
+	printf("pmdp @ 0x%px = 0x%016lx\n", pmdp, pmd_val(*pmdp));
 
 	ptep = pte_offset_map(pmdp, addr);
 	if (pte_none(*ptep)) {
@@ -3457,9 +3457,9 @@ static void dump_tlb_44x(void)
 		asm volatile("tlbre  %0,%1,0" : "=r" (w0) : "r" (i));
 		asm volatile("tlbre  %0,%1,1" : "=r" (w1) : "r" (i));
 		asm volatile("tlbre  %0,%1,2" : "=r" (w2) : "r" (i));
-		printf("[%02x] %08x %08x %08x ", i, w0, w1, w2);
+		printf("[%02x] %08lx %08lx %08lx ", i, w0, w1, w2);
 		if (w0 & PPC44x_TLB_VALID) {
-			printf("V %08x -> %01x%08x %c%c%c%c%c",
+			printf("V %08lx -> %01lx%08lx %c%c%c%c%c",
 			       w0 & PPC44x_TLB_EPN_MASK,
 			       w1 & PPC44x_TLB_ERPN_MASK,
 			       w1 & PPC44x_TLB_RPN_MASK,
@@ -3883,19 +3883,19 @@ static void dump_spu_fields(struct spu *spu)
 	DUMP_FIELD(spu, "0x%lx", ls_size);
 	DUMP_FIELD(spu, "0x%x", node);
 	DUMP_FIELD(spu, "0x%lx", flags);
-	DUMP_FIELD(spu, "%d", class_0_pending);
-	DUMP_FIELD(spu, "0x%lx", class_0_dar);
-	DUMP_FIELD(spu, "0x%lx", class_1_dar);
-	DUMP_FIELD(spu, "0x%lx", class_1_dsisr);
-	DUMP_FIELD(spu, "0x%lx", irqs[0]);
-	DUMP_FIELD(spu, "0x%lx", irqs[1]);
-	DUMP_FIELD(spu, "0x%lx", irqs[2]);
+	DUMP_FIELD(spu, "%llu", class_0_pending);
+	DUMP_FIELD(spu, "0x%llx", class_0_dar);
+	DUMP_FIELD(spu, "0x%llx", class_1_dar);
+	DUMP_FIELD(spu, "0x%llx", class_1_dsisr);
+	DUMP_FIELD(spu, "0x%x", irqs[0]);
+	DUMP_FIELD(spu, "0x%x", irqs[1]);
+	DUMP_FIELD(spu, "0x%x", irqs[2]);
 	DUMP_FIELD(spu, "0x%x", slb_replace);
 	DUMP_FIELD(spu, "%d", pid);
 	DUMP_FIELD(spu, "0x%p", mm);
 	DUMP_FIELD(spu, "0x%p", ctx);
 	DUMP_FIELD(spu, "0x%p", rq);
-	DUMP_FIELD(spu, "0x%p", timestamp);
+	DUMP_FIELD(spu, "0x%llx", timestamp);
 	DUMP_FIELD(spu, "0x%lx", problem_phys);
 	DUMP_FIELD(spu, "0x%p", problem);
 	DUMP_VALUE("0x%x", problem->spu_runcntl_RW,
@@ -3926,7 +3926,7 @@ static void dump_spu_ls(unsigned long num, int subcmd)
 		__delay(200);
 	} else {
 		catch_memory_errors = 0;
-		printf("*** Error: accessing spu info for spu %d\n", num);
+		printf("*** Error: accessing spu info for spu %ld\n", num);
 		return;
 	}
 	catch_memory_errors = 0;

From 9ce53e27265e7bab728774fac785f66db97e206e Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 21 May 2018 21:28:34 +1000
Subject: [PATCH 105/251] powerpc/xmon: Realign paca dump fields

We've added some fields with longer names since we originally wrote
this, so the fields are no longer lined up. Adjust the widths to make
it all look nice again, eg:

  0:mon> dp
  paca for cpu 0x0 @ c000000001fa0000:
   possible                  = yes
   ...
   slb_shadow            [0] = 0xc000000008000000 0x400ea1b217000500
   slb_shadow            [1] = 0xd000000008000001 0x400d43642f000510
   ...
   rfi_flush_fallback_area   = c0000000fff80000   (0xcc8)
   ...
   accounting.starttime_user = 0x51582f07         (0xae8)

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/xmon/xmon.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 152eb185bfcf0..56c83eab3d27a 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2346,12 +2346,12 @@ static void dump_one_paca(int cpu)
 
 	printf("paca for cpu 0x%x @ %px:\n", cpu, p);
 
-	printf(" %-*s = %s\n", 20, "possible", cpu_possible(cpu) ? "yes" : "no");
-	printf(" %-*s = %s\n", 20, "present", cpu_present(cpu) ? "yes" : "no");
-	printf(" %-*s = %s\n", 20, "online", cpu_online(cpu) ? "yes" : "no");
+	printf(" %-*s = %s\n", 25, "possible", cpu_possible(cpu) ? "yes" : "no");
+	printf(" %-*s = %s\n", 25, "present", cpu_present(cpu) ? "yes" : "no");
+	printf(" %-*s = %s\n", 25, "online", cpu_online(cpu) ? "yes" : "no");
 
 #define DUMP(paca, name, format)				\
-	printf(" %-*s = "format"\t(0x%lx)\n", 20, #name, 18, paca->name, \
+	printf(" %-*s = "format"\t(0x%lx)\n", 25, #name, 18, paca->name, \
 		offsetof(struct paca_struct, name));
 
 	DUMP(p, lock_token, "%#-*x");
@@ -2382,14 +2382,15 @@ static void dump_one_paca(int cpu)
 		vsid = be64_to_cpu(p->slb_shadow_ptr->save_area[i].vsid);
 
 		if (esid || vsid) {
-			printf(" slb_shadow[%d]:       = 0x%016llx 0x%016llx\n",
-				i, esid, vsid);
+			printf(" %-*s[%d] = 0x%016llx 0x%016llx\n",
+			       22, "slb_shadow", i, esid, vsid);
 		}
 	}
 	DUMP(p, vmalloc_sllp, "%#-*x");
 	DUMP(p, slb_cache_ptr, "%#-*x");
 	for (i = 0; i < SLB_CACHE_ENTRIES; i++)
-		printf(" slb_cache[%d]:        = 0x%016x\n", i, p->slb_cache[i]);
+		printf(" %-*s[%d] = 0x%016x\n",
+		       22, "slb_cache", i, p->slb_cache[i]);
 
 	DUMP(p, rfi_flush_fallback_area, "%-*px");
 #endif
@@ -2404,7 +2405,7 @@ static void dump_one_paca(int cpu)
 #endif
 	DUMP(p, __current, "%-*px");
 	DUMP(p, kstack, "%#-*llx");
-	printf(" kstack_base          = 0x%016llx\n", p->kstack & ~(THREAD_SIZE - 1));
+	printf(" %-*s = 0x%016llx\n", 25, "kstack_base", p->kstack & ~(THREAD_SIZE - 1));
 	DUMP(p, stab_rr, "%#-*llx");
 	DUMP(p, saved_r1, "%#-*llx");
 	DUMP(p, trap_save, "%#-*x");

From 2e0986d761324376021c880ddbf00c6d04ef1841 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 21 May 2018 19:47:20 +1000
Subject: [PATCH 106/251] powerpc/xmon: Update paca fields dumped in xmon

The set of paca fields we dump in xmon has gotten somewhat out of
date. Update to add some recently added fields.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/xmon/xmon.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 56c83eab3d27a..c2e9270728c75 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2425,6 +2425,16 @@ static void dump_one_paca(int cpu)
 	DUMP(p, thread_idle_state, "%#-*x");
 	DUMP(p, thread_mask, "%#-*x");
 	DUMP(p, subcore_sibling_mask, "%#-*x");
+	DUMP(p, thread_sibling_pacas, "%-*px");
+	DUMP(p, requested_psscr, "%#-*llx");
+	DUMP(p, stop_sprs.pid, "%#-*llx");
+	DUMP(p, stop_sprs.ldbar, "%#-*llx");
+	DUMP(p, stop_sprs.fscr, "%#-*llx");
+	DUMP(p, stop_sprs.hfscr, "%#-*llx");
+	DUMP(p, stop_sprs.mmcr1, "%#-*llx");
+	DUMP(p, stop_sprs.mmcr2, "%#-*llx");
+	DUMP(p, stop_sprs.mmcra, "%#-*llx");
+	DUMP(p, dont_stop.counter, "%#-*x");
 #endif
 
 	DUMP(p, accounting.utime, "%#-*lx");

From eae5f709a4d738c52b6ab636981755d76349ea9e Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Fri, 6 Apr 2018 22:12:19 +0200
Subject: [PATCH 107/251] powerpc: Add __printf verification to prom_printf
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

__printf is useful to verify format and arguments. Fix arg mismatch
reported by gcc, remove the following warnings (with W=1):

  arch/powerpc/kernel/prom_init.c:1467:31: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’
  arch/powerpc/kernel/prom_init.c:1471:31: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’
  arch/powerpc/kernel/prom_init.c:1504:33: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’
  arch/powerpc/kernel/prom_init.c:1505:33: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’
  arch/powerpc/kernel/prom_init.c:1506:33: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’
  arch/powerpc/kernel/prom_init.c:1507:33: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’
  arch/powerpc/kernel/prom_init.c:1508:33: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’
  arch/powerpc/kernel/prom_init.c:1509:33: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’
  arch/powerpc/kernel/prom_init.c:1975:39: error: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 2 has type ‘unsigned int’
  arch/powerpc/kernel/prom_init.c:1986:27: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’
  arch/powerpc/kernel/prom_init.c:2567:38: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’
  arch/powerpc/kernel/prom_init.c:2567:46: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 3 has type ‘long unsigned int’
  arch/powerpc/kernel/prom_init.c:2569:38: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’
  arch/powerpc/kernel/prom_init.c:2569:46: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 3 has type ‘long unsigned int’

The patch also include arg mismatch fix for case with #define DEBUG_PROM
(warning not listed here).

This patch fix also the following warnings revealed by checkpatch:

  WARNING: Prefer using '"%s...", __func__' to using 'alloc_up', this function's name, in a string
  #101: FILE: arch/powerpc/kernel/prom_init.c:1235:
  + prom_debug("alloc_up(%lx, %lx)\n", size, align);

and

  WARNING: Prefer using '"%s...", __func__' to using 'alloc_down', this function's name, in a string
  #138: FILE: arch/powerpc/kernel/prom_init.c:1278:
  + prom_debug("alloc_down(%lx, %lx, %s)\n", size, align,

Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/prom_init.c | 114 ++++++++++++++++----------------
 1 file changed, 58 insertions(+), 56 deletions(-)

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index f9d6befb55a6e..67f9c157bcc08 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -334,6 +334,7 @@ static void __init prom_print_dec(unsigned long val)
 	call_prom("write", 3, 1, prom.stdout, buf+i, size);
 }
 
+__printf(1, 2)
 static void __init prom_printf(const char *format, ...)
 {
 	const char *p, *q, *s;
@@ -1160,7 +1161,7 @@ static void __init prom_send_capabilities(void)
 		 */
 
 		cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads());
-		prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n",
+		prom_printf("Max number of cores passed to firmware: %u (NR_CPUS = %d)\n",
 			    cores, NR_CPUS);
 
 		ibm_architecture_vec.vec5.max_cpus = cpu_to_be32(cores);
@@ -1242,7 +1243,7 @@ static unsigned long __init alloc_up(unsigned long size, unsigned long align)
 
 	if (align)
 		base = _ALIGN_UP(base, align);
-	prom_debug("alloc_up(%x, %x)\n", size, align);
+	prom_debug("%s(%lx, %lx)\n", __func__, size, align);
 	if (ram_top == 0)
 		prom_panic("alloc_up() called with mem not initialized\n");
 
@@ -1253,7 +1254,7 @@ static unsigned long __init alloc_up(unsigned long size, unsigned long align)
 
 	for(; (base + size) <= alloc_top; 
 	    base = _ALIGN_UP(base + 0x100000, align)) {
-		prom_debug("    trying: 0x%x\n\r", base);
+		prom_debug("    trying: 0x%lx\n\r", base);
 		addr = (unsigned long)prom_claim(base, size, 0);
 		if (addr != PROM_ERROR && addr != 0)
 			break;
@@ -1265,12 +1266,12 @@ static unsigned long __init alloc_up(unsigned long size, unsigned long align)
 		return 0;
 	alloc_bottom = addr + size;
 
-	prom_debug(" -> %x\n", addr);
-	prom_debug("  alloc_bottom : %x\n", alloc_bottom);
-	prom_debug("  alloc_top    : %x\n", alloc_top);
-	prom_debug("  alloc_top_hi : %x\n", alloc_top_high);
-	prom_debug("  rmo_top      : %x\n", rmo_top);
-	prom_debug("  ram_top      : %x\n", ram_top);
+	prom_debug(" -> %lx\n", addr);
+	prom_debug("  alloc_bottom : %lx\n", alloc_bottom);
+	prom_debug("  alloc_top    : %lx\n", alloc_top);
+	prom_debug("  alloc_top_hi : %lx\n", alloc_top_high);
+	prom_debug("  rmo_top      : %lx\n", rmo_top);
+	prom_debug("  ram_top      : %lx\n", ram_top);
 
 	return addr;
 }
@@ -1285,7 +1286,7 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align,
 {
 	unsigned long base, addr = 0;
 
-	prom_debug("alloc_down(%x, %x, %s)\n", size, align,
+	prom_debug("%s(%lx, %lx, %s)\n", __func__, size, align,
 		   highmem ? "(high)" : "(low)");
 	if (ram_top == 0)
 		prom_panic("alloc_down() called with mem not initialized\n");
@@ -1313,7 +1314,7 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align,
 	base = _ALIGN_DOWN(alloc_top - size, align);
 	for (; base > alloc_bottom;
 	     base = _ALIGN_DOWN(base - 0x100000, align))  {
-		prom_debug("    trying: 0x%x\n\r", base);
+		prom_debug("    trying: 0x%lx\n\r", base);
 		addr = (unsigned long)prom_claim(base, size, 0);
 		if (addr != PROM_ERROR && addr != 0)
 			break;
@@ -1324,12 +1325,12 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align,
 	alloc_top = addr;
 
  bail:
-	prom_debug(" -> %x\n", addr);
-	prom_debug("  alloc_bottom : %x\n", alloc_bottom);
-	prom_debug("  alloc_top    : %x\n", alloc_top);
-	prom_debug("  alloc_top_hi : %x\n", alloc_top_high);
-	prom_debug("  rmo_top      : %x\n", rmo_top);
-	prom_debug("  ram_top      : %x\n", ram_top);
+	prom_debug(" -> %lx\n", addr);
+	prom_debug("  alloc_bottom : %lx\n", alloc_bottom);
+	prom_debug("  alloc_top    : %lx\n", alloc_top);
+	prom_debug("  alloc_top_hi : %lx\n", alloc_top_high);
+	prom_debug("  rmo_top      : %lx\n", rmo_top);
+	prom_debug("  ram_top      : %lx\n", ram_top);
 
 	return addr;
 }
@@ -1455,7 +1456,7 @@ static void __init prom_init_mem(void)
 
 			if (size == 0)
 				continue;
-			prom_debug("    %x %x\n", base, size);
+			prom_debug("    %lx %lx\n", base, size);
 			if (base == 0 && (of_platform & PLATFORM_LPAR))
 				rmo_top = size;
 			if ((base + size) > ram_top)
@@ -1475,12 +1476,12 @@ static void __init prom_init_mem(void)
 
 	if (prom_memory_limit) {
 		if (prom_memory_limit <= alloc_bottom) {
-			prom_printf("Ignoring mem=%x <= alloc_bottom.\n",
-				prom_memory_limit);
+			prom_printf("Ignoring mem=%lx <= alloc_bottom.\n",
+				    prom_memory_limit);
 			prom_memory_limit = 0;
 		} else if (prom_memory_limit >= ram_top) {
-			prom_printf("Ignoring mem=%x >= ram_top.\n",
-				prom_memory_limit);
+			prom_printf("Ignoring mem=%lx >= ram_top.\n",
+				    prom_memory_limit);
 			prom_memory_limit = 0;
 		} else {
 			ram_top = prom_memory_limit;
@@ -1512,12 +1513,13 @@ static void __init prom_init_mem(void)
 		alloc_bottom = PAGE_ALIGN(prom_initrd_end);
 
 	prom_printf("memory layout at init:\n");
-	prom_printf("  memory_limit : %x (16 MB aligned)\n", prom_memory_limit);
-	prom_printf("  alloc_bottom : %x\n", alloc_bottom);
-	prom_printf("  alloc_top    : %x\n", alloc_top);
-	prom_printf("  alloc_top_hi : %x\n", alloc_top_high);
-	prom_printf("  rmo_top      : %x\n", rmo_top);
-	prom_printf("  ram_top      : %x\n", ram_top);
+	prom_printf("  memory_limit : %lx (16 MB aligned)\n",
+		    prom_memory_limit);
+	prom_printf("  alloc_bottom : %lx\n", alloc_bottom);
+	prom_printf("  alloc_top    : %lx\n", alloc_top);
+	prom_printf("  alloc_top_hi : %lx\n", alloc_top_high);
+	prom_printf("  rmo_top      : %lx\n", rmo_top);
+	prom_printf("  ram_top      : %lx\n", ram_top);
 }
 
 static void __init prom_close_stdin(void)
@@ -1578,7 +1580,7 @@ static void __init prom_instantiate_opal(void)
 		return;
 	}
 
-	prom_printf("instantiating opal at 0x%x...", base);
+	prom_printf("instantiating opal at 0x%llx...", base);
 
 	if (call_prom_ret("call-method", 4, 3, rets,
 			  ADDR("load-opal-runtime"),
@@ -1594,10 +1596,10 @@ static void __init prom_instantiate_opal(void)
 
 	reserve_mem(base, size);
 
-	prom_debug("opal base     = 0x%x\n", base);
-	prom_debug("opal align    = 0x%x\n", align);
-	prom_debug("opal entry    = 0x%x\n", entry);
-	prom_debug("opal size     = 0x%x\n", (long)size);
+	prom_debug("opal base     = 0x%llx\n", base);
+	prom_debug("opal align    = 0x%llx\n", align);
+	prom_debug("opal entry    = 0x%llx\n", entry);
+	prom_debug("opal size     = 0x%llx\n", size);
 
 	prom_setprop(opal_node, "/ibm,opal", "opal-base-address",
 		     &base, sizeof(base));
@@ -1674,7 +1676,7 @@ static void __init prom_instantiate_rtas(void)
 
 	prom_debug("rtas base     = 0x%x\n", base);
 	prom_debug("rtas entry    = 0x%x\n", entry);
-	prom_debug("rtas size     = 0x%x\n", (long)size);
+	prom_debug("rtas size     = 0x%x\n", size);
 
 	prom_debug("prom_instantiate_rtas: end...\n");
 }
@@ -1732,7 +1734,7 @@ static void __init prom_instantiate_sml(void)
 	if (base == 0)
 		prom_panic("Could not allocate memory for sml\n");
 
-	prom_printf("instantiating sml at 0x%x...", base);
+	prom_printf("instantiating sml at 0x%llx...", base);
 
 	memset((void *)base, 0, size);
 
@@ -1751,8 +1753,8 @@ static void __init prom_instantiate_sml(void)
 	prom_setprop(ibmvtpm_node, "/vdevice/vtpm", "linux,sml-size",
 		     &size, sizeof(size));
 
-	prom_debug("sml base     = 0x%x\n", base);
-	prom_debug("sml size     = 0x%x\n", (long)size);
+	prom_debug("sml base     = 0x%llx\n", base);
+	prom_debug("sml size     = 0x%x\n", size);
 
 	prom_debug("prom_instantiate_sml: end...\n");
 }
@@ -1845,7 +1847,7 @@ static void __init prom_initialize_tce_table(void)
 
 		prom_debug("TCE table: %s\n", path);
 		prom_debug("\tnode = 0x%x\n", node);
-		prom_debug("\tbase = 0x%x\n", base);
+		prom_debug("\tbase = 0x%llx\n", base);
 		prom_debug("\tsize = 0x%x\n", minsize);
 
 		/* Initialize the table to have a one-to-one mapping
@@ -1932,12 +1934,12 @@ static void __init prom_hold_cpus(void)
 	}
 
 	prom_debug("prom_hold_cpus: start...\n");
-	prom_debug("    1) spinloop       = 0x%x\n", (unsigned long)spinloop);
-	prom_debug("    1) *spinloop      = 0x%x\n", *spinloop);
-	prom_debug("    1) acknowledge    = 0x%x\n",
+	prom_debug("    1) spinloop       = 0x%lx\n", (unsigned long)spinloop);
+	prom_debug("    1) *spinloop      = 0x%lx\n", *spinloop);
+	prom_debug("    1) acknowledge    = 0x%lx\n",
 		   (unsigned long)acknowledge);
-	prom_debug("    1) *acknowledge   = 0x%x\n", *acknowledge);
-	prom_debug("    1) secondary_hold = 0x%x\n", secondary_hold);
+	prom_debug("    1) *acknowledge   = 0x%lx\n", *acknowledge);
+	prom_debug("    1) secondary_hold = 0x%lx\n", secondary_hold);
 
 	/* Set the common spinloop variable, so all of the secondary cpus
 	 * will block when they are awakened from their OF spinloop.
@@ -1965,7 +1967,7 @@ static void __init prom_hold_cpus(void)
 		prom_getprop(node, "reg", &reg, sizeof(reg));
 		cpu_no = be32_to_cpu(reg);
 
-		prom_debug("cpu hw idx   = %lu\n", cpu_no);
+		prom_debug("cpu hw idx   = %u\n", cpu_no);
 
 		/* Init the acknowledge var which will be reset by
 		 * the secondary cpu when it awakens from its OF
@@ -1975,7 +1977,7 @@ static void __init prom_hold_cpus(void)
 
 		if (cpu_no != prom.cpu) {
 			/* Primary Thread of non-boot cpu or any thread */
-			prom_printf("starting cpu hw idx %lu... ", cpu_no);
+			prom_printf("starting cpu hw idx %u... ", cpu_no);
 			call_prom("start-cpu", 3, 0, node,
 				  secondary_hold, cpu_no);
 
@@ -1986,11 +1988,11 @@ static void __init prom_hold_cpus(void)
 			if (*acknowledge == cpu_no)
 				prom_printf("done\n");
 			else
-				prom_printf("failed: %x\n", *acknowledge);
+				prom_printf("failed: %lx\n", *acknowledge);
 		}
 #ifdef CONFIG_SMP
 		else
-			prom_printf("boot cpu hw idx %lu\n", cpu_no);
+			prom_printf("boot cpu hw idx %u\n", cpu_no);
 #endif /* CONFIG_SMP */
 	}
 
@@ -2268,7 +2270,7 @@ static void __init *make_room(unsigned long *mem_start, unsigned long *mem_end,
 	while ((*mem_start + needed) > *mem_end) {
 		unsigned long room, chunk;
 
-		prom_debug("Chunk exhausted, claiming more at %x...\n",
+		prom_debug("Chunk exhausted, claiming more at %lx...\n",
 			   alloc_bottom);
 		room = alloc_top - alloc_bottom;
 		if (room > DEVTREE_CHUNK_SIZE)
@@ -2494,7 +2496,7 @@ static void __init flatten_device_tree(void)
 	room = alloc_top - alloc_bottom - 0x4000;
 	if (room > DEVTREE_CHUNK_SIZE)
 		room = DEVTREE_CHUNK_SIZE;
-	prom_debug("starting device tree allocs at %x\n", alloc_bottom);
+	prom_debug("starting device tree allocs at %lx\n", alloc_bottom);
 
 	/* Now try to claim that */
 	mem_start = (unsigned long)alloc_up(room, PAGE_SIZE);
@@ -2557,7 +2559,7 @@ static void __init flatten_device_tree(void)
 		int i;
 		prom_printf("reserved memory map:\n");
 		for (i = 0; i < mem_reserve_cnt; i++)
-			prom_printf("  %x - %x\n",
+			prom_printf("  %llx - %llx\n",
 				    be64_to_cpu(mem_reserve_map[i].base),
 				    be64_to_cpu(mem_reserve_map[i].size));
 	}
@@ -2567,9 +2569,9 @@ static void __init flatten_device_tree(void)
 	 */
 	mem_reserve_cnt = MEM_RESERVE_MAP_SIZE;
 
-	prom_printf("Device tree strings 0x%x -> 0x%x\n",
+	prom_printf("Device tree strings 0x%lx -> 0x%lx\n",
 		    dt_string_start, dt_string_end);
-	prom_printf("Device tree struct  0x%x -> 0x%x\n",
+	prom_printf("Device tree struct  0x%lx -> 0x%lx\n",
 		    dt_struct_start, dt_struct_end);
 }
 
@@ -3001,7 +3003,7 @@ static void __init prom_find_boot_cpu(void)
 	prom_getprop(cpu_pkg, "reg", &rval, sizeof(rval));
 	prom.cpu = be32_to_cpu(rval);
 
-	prom_debug("Booting CPU hw index = %lu\n", prom.cpu);
+	prom_debug("Booting CPU hw index = %d\n", prom.cpu);
 }
 
 static void __init prom_check_initrd(unsigned long r3, unsigned long r4)
@@ -3023,8 +3025,8 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4)
 		reserve_mem(prom_initrd_start,
 			    prom_initrd_end - prom_initrd_start);
 
-		prom_debug("initrd_start=0x%x\n", prom_initrd_start);
-		prom_debug("initrd_end=0x%x\n", prom_initrd_end);
+		prom_debug("initrd_start=0x%lx\n", prom_initrd_start);
+		prom_debug("initrd_end=0x%lx\n", prom_initrd_end);
 	}
 #endif /* CONFIG_BLK_DEV_INITRD */
 }
@@ -3277,7 +3279,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 	/* Don't print anything after quiesce under OPAL, it crashes OFW */
 	if (of_platform != PLATFORM_OPAL) {
 		prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase);
-		prom_debug("->dt_header_start=0x%x\n", hdr);
+		prom_debug("->dt_header_start=0x%lx\n", hdr);
 	}
 
 #ifdef CONFIG_PPC32

From 7cf76a68f1bcf69214da2812e8f615e36e16e60e Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Wed, 28 Mar 2018 20:55:25 +0200
Subject: [PATCH 108/251] powerpc/altivec: Add missing prototypes for altivec
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some functions prototypes were missing for the non-altivec code. Add the
missing prototypes in a new header file, fix warnings treated as errors
with W=1:

  arch/powerpc/lib/xor_vmx_glue.c:18:6: error: no previous prototype for ‘xor_altivec_2’ [-Werror=missing-prototypes]
  arch/powerpc/lib/xor_vmx_glue.c:29:6: error: no previous prototype for ‘xor_altivec_3’ [-Werror=missing-prototypes]
  arch/powerpc/lib/xor_vmx_glue.c:40:6: error: no previous prototype for ‘xor_altivec_4’ [-Werror=missing-prototypes]
  arch/powerpc/lib/xor_vmx_glue.c:52:6: error: no previous prototype for ‘xor_altivec_5’ [-Werror=missing-prototypes]

The prototypes were already present in <asm/xor.h> but this header file is
meant to be included after <include/linux/raid/xor.h>. Trying to re-use
<asm/xor.h> directly would lead to warnings such as:

  arch/powerpc/include/asm/xor.h:39:15: error: variable ‘xor_block_altivec’ has initializer but incomplete type

Trying to re-use <asm/xor.h> after <include/linux/raid/xor.h> in
xor_vmx_glue.c would in turn trigger the following warnings:

  include/asm-generic/xor.h:688:34: error: ‘xor_block_32regs’ defined but not used [-Werror=unused-variable]

Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/xor.h         | 12 +-----------
 arch/powerpc/include/asm/xor_altivec.h | 19 +++++++++++++++++++
 arch/powerpc/lib/xor_vmx_glue.c        |  1 +
 3 files changed, 21 insertions(+), 11 deletions(-)
 create mode 100644 arch/powerpc/include/asm/xor_altivec.h

diff --git a/arch/powerpc/include/asm/xor.h b/arch/powerpc/include/asm/xor.h
index a36c2069d8ed5..7d6dc503349d0 100644
--- a/arch/powerpc/include/asm/xor.h
+++ b/arch/powerpc/include/asm/xor.h
@@ -24,17 +24,7 @@
 
 #include <asm/cputable.h>
 #include <asm/cpu_has_feature.h>
-
-void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
-		   unsigned long *v2_in);
-void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
-		   unsigned long *v2_in, unsigned long *v3_in);
-void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
-		   unsigned long *v2_in, unsigned long *v3_in,
-		   unsigned long *v4_in);
-void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
-		   unsigned long *v2_in, unsigned long *v3_in,
-		   unsigned long *v4_in, unsigned long *v5_in);
+#include <asm/xor_altivec.h>
 
 static struct xor_block_template xor_block_altivec = {
 	.name = "altivec",
diff --git a/arch/powerpc/include/asm/xor_altivec.h b/arch/powerpc/include/asm/xor_altivec.h
new file mode 100644
index 0000000000000..6ca923510b597
--- /dev/null
+++ b/arch/powerpc/include/asm/xor_altivec.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_XOR_ALTIVEC_H
+#define _ASM_POWERPC_XOR_ALTIVEC_H
+
+#ifdef CONFIG_ALTIVEC
+
+void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
+		   unsigned long *v2_in);
+void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
+		   unsigned long *v2_in, unsigned long *v3_in);
+void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
+		   unsigned long *v2_in, unsigned long *v3_in,
+		   unsigned long *v4_in);
+void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
+		   unsigned long *v2_in, unsigned long *v3_in,
+		   unsigned long *v4_in, unsigned long *v5_in);
+
+#endif
+#endif /* _ASM_POWERPC_XOR_ALTIVEC_H */
diff --git a/arch/powerpc/lib/xor_vmx_glue.c b/arch/powerpc/lib/xor_vmx_glue.c
index 6521fe5e8cef6..dab2b6bfcf36f 100644
--- a/arch/powerpc/lib/xor_vmx_glue.c
+++ b/arch/powerpc/lib/xor_vmx_glue.c
@@ -13,6 +13,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <asm/switch_to.h>
+#include <asm/xor_altivec.h>
 #include "xor_vmx.h"
 
 void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,

From d8731527acee53b4d46d59ff0b5fc36931ad0451 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Fri, 13 Apr 2018 20:41:43 +0200
Subject: [PATCH 109/251] powerpc/sparse: Fix plain integer as NULL pointer
 warning

Trivial fix to remove the following sparse warnings:

  arch/powerpc/kernel/module_32.c:112:74: warning: Using plain integer as NULL pointer
  arch/powerpc/kernel/module_32.c:117:74: warning: Using plain integer as NULL pointer
  drivers/macintosh/via-pmu.c:1155:28: warning: Using plain integer as NULL pointer
  drivers/macintosh/via-pmu.c:1230:20: warning: Using plain integer as NULL pointer
  drivers/macintosh/via-pmu.c:1385:36: warning: Using plain integer as NULL pointer
  drivers/macintosh/via-pmu.c:1752:23: warning: Using plain integer as NULL pointer
  drivers/macintosh/via-pmu.c:2084:19: warning: Using plain integer as NULL pointer
  drivers/macintosh/via-pmu.c:2110:32: warning: Using plain integer as NULL pointer
  drivers/macintosh/via-pmu.c:2167:19: warning: Using plain integer as NULL pointer
  drivers/macintosh/via-pmu.c:2183:19: warning: Using plain integer as NULL pointer
  drivers/macintosh/via-pmu.c:277:20: warning: Using plain integer as NULL pointer
  arch/powerpc/platforms/powermac/setup.c:155:67: warning: Using plain integer as NULL pointer
  arch/powerpc/platforms/powermac/setup.c:247:27: warning: Using plain integer as NULL pointer
  arch/powerpc/platforms/powermac/setup.c:249:27: warning: Using plain integer as NULL pointer
  arch/powerpc/platforms/powermac/setup.c:252:37: warning: Using plain integer as NULL pointer
  arch/powerpc/mm/tlb_hash32.c:127:21: warning: Using plain integer as NULL pointer
  arch/powerpc/mm/tlb_hash32.c:148:21: warning: Using plain integer as NULL pointer
  arch/powerpc/mm/tlb_hash32.c:44:21: warning: Using plain integer as NULL pointer
  arch/powerpc/mm/tlb_hash32.c:57:21: warning: Using plain integer as NULL pointer
  arch/powerpc/mm/tlb_hash32.c:87:21: warning: Using plain integer as NULL pointer
  arch/powerpc/kernel/btext.c:160:31: warning: Using plain integer as NULL pointer
  arch/powerpc/kernel/btext.c:167:22: warning: Using plain integer as NULL pointer
  arch/powerpc/kernel/btext.c:274:21: warning: Using plain integer as NULL pointer
  arch/powerpc/kernel/btext.c:285:31: warning: Using plain integer as NULL pointer
  arch/powerpc/include/asm/hugetlb.h:204:16: warning: Using plain integer as NULL pointer
  arch/powerpc/mm/ppc_mmu_32.c:170:21: warning: Using plain integer as NULL pointer
  arch/powerpc/platforms/powermac/pci.c:1227:23: warning: Using plain integer as NULL pointer
  arch/powerpc/platforms/powermac/pci.c:65:24: warning: Using plain integer as NULL pointer

Also use `--fix` command line option from `script/checkpatch --strict` to
remove the following:

  CHECK: Comparison to NULL could be written "!dispDeviceBase"
  #72: FILE: arch/powerpc/kernel/btext.c:160:
  +	if (dispDeviceBase == NULL)

  CHECK: Comparison to NULL could be written "!vbase"
  #80: FILE: arch/powerpc/kernel/btext.c:167:
  +	if (vbase == NULL)

  CHECK: Comparison to NULL could be written "!base"
  #89: FILE: arch/powerpc/kernel/btext.c:274:
  +	if (base == NULL)

  CHECK: Comparison to NULL could be written "!dispDeviceBase"
  #98: FILE: arch/powerpc/kernel/btext.c:285:
  +	if (dispDeviceBase == NULL)

  CHECK: Comparison to NULL could be written "strstr"
  #117: FILE: arch/powerpc/kernel/module_32.c:117:
  +		if (strstr(secstrings + sechdrs[i].sh_name, ".debug") != NULL)

  CHECK: Comparison to NULL could be written "!Hash"
  #130: FILE: arch/powerpc/mm/ppc_mmu_32.c:170:
  +	if (Hash == NULL)

  CHECK: Comparison to NULL could be written "Hash"
  #143: FILE: arch/powerpc/mm/tlb_hash32.c:44:
  +	if (Hash != NULL) {

  CHECK: Comparison to NULL could be written "!Hash"
  #152: FILE: arch/powerpc/mm/tlb_hash32.c:57:
  +	if (Hash == NULL) {

  CHECK: Comparison to NULL could be written "!Hash"
  #161: FILE: arch/powerpc/mm/tlb_hash32.c:87:
  +	if (Hash == NULL) {

  CHECK: Comparison to NULL could be written "!Hash"
  #170: FILE: arch/powerpc/mm/tlb_hash32.c:127:
  +	if (Hash == NULL) {

  CHECK: Comparison to NULL could be written "!Hash"
  #179: FILE: arch/powerpc/mm/tlb_hash32.c:148:
  +	if (Hash == NULL) {

  ERROR: space required after that ';' (ctx:VxV)
  #192: FILE: arch/powerpc/platforms/powermac/pci.c:65:
  +	for (; node != NULL;node = node->sibling) {

  CHECK: Comparison to NULL could be written "node"
  #192: FILE: arch/powerpc/platforms/powermac/pci.c:65:
  +	for (; node != NULL;node = node->sibling) {

  CHECK: Comparison to NULL could be written "!region"
  #201: FILE: arch/powerpc/platforms/powermac/pci.c:1227:
  +	if (region == NULL)

  CHECK: Comparison to NULL could be written "of_get_property"
  #214: FILE: arch/powerpc/platforms/powermac/setup.c:155:
  +		if (of_get_property(np, "cache-unified", NULL) != NULL && dc) {

  CHECK: Comparison to NULL could be written "!np"
  #223: FILE: arch/powerpc/platforms/powermac/setup.c:247:
  +		if (np == NULL)

  CHECK: Comparison to NULL could be written "np"
  #226: FILE: arch/powerpc/platforms/powermac/setup.c:249:
  +		if (np != NULL) {

  CHECK: Comparison to NULL could be written "l2cr"
  #230: FILE: arch/powerpc/platforms/powermac/setup.c:252:
  +			if (l2cr != NULL) {

  CHECK: Comparison to NULL could be written "via"
  #243: FILE: drivers/macintosh/via-pmu.c:277:
  +	if (via != NULL)

  CHECK: Comparison to NULL could be written "current_req"
  #252: FILE: drivers/macintosh/via-pmu.c:1155:
  +	if (current_req != NULL) {

  CHECK: Comparison to NULL could be written "!req"
  #261: FILE: drivers/macintosh/via-pmu.c:1230:
  +	if (req == NULL || pmu_state != idle

  CHECK: Comparison to NULL could be written "!req"
  #270: FILE: drivers/macintosh/via-pmu.c:1385:
  +			if (req == NULL) {

  CHECK: Comparison to NULL could be written "!pp"
  #288: FILE: drivers/macintosh/via-pmu.c:2084:
  +	if (pp == NULL)

  CHECK: Comparison to NULL could be written "!pp"
  #297: FILE: drivers/macintosh/via-pmu.c:2110:
  +	if (count < 1 || pp == NULL)

  CHECK: Comparison to NULL could be written "!pp"
  #306: FILE: drivers/macintosh/via-pmu.c:2167:
  +	if (pp == NULL)

  CHECK: Comparison to NULL could be written "pp"
  #315: FILE: drivers/macintosh/via-pmu.c:2183:
  +	if (pp != NULL) {

Link: https://github.com/linuxppc/linux/issues/37
Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/hugetlb.h      |  2 +-
 arch/powerpc/kernel/btext.c             |  8 ++++----
 arch/powerpc/kernel/module_32.c         |  4 ++--
 arch/powerpc/mm/ppc_mmu_32.c            |  2 +-
 arch/powerpc/mm/tlb_hash32.c            | 10 +++++-----
 arch/powerpc/platforms/powermac/pci.c   |  4 ++--
 arch/powerpc/platforms/powermac/setup.c |  8 ++++----
 drivers/macintosh/via-pmu.c             | 18 +++++++++---------
 8 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 78540c074d70b..96444bc08034c 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -202,7 +202,7 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma,
 static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
 				    unsigned pdshift)
 {
-	return 0;
+	return NULL;
 }
 #endif /* CONFIG_HUGETLB_PAGE */
 
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 6537cba1a7580..54403144623f3 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -157,14 +157,14 @@ void btext_map(void)
 
 	/* By default, we are no longer mapped */
 	boot_text_mapped = 0;
-	if (dispDeviceBase == 0)
+	if (!dispDeviceBase)
 		return;
 	base = ((unsigned long) dispDeviceBase) & 0xFFFFF000UL;
 	offset = ((unsigned long) dispDeviceBase) - base;
 	size = dispDeviceRowBytes * dispDeviceRect[3] + offset
 		+ dispDeviceRect[0];
 	vbase = __ioremap(base, size, pgprot_val(pgprot_noncached_wc(__pgprot(0))));
-	if (vbase == 0)
+	if (!vbase)
 		return;
 	logicalDisplayBase = vbase + offset;
 	boot_text_mapped = 1;
@@ -270,7 +270,7 @@ static unsigned char * calc_base(int x, int y)
 	unsigned char *base;
 
 	base = logicalDisplayBase;
-	if (base == 0)
+	if (!base)
 		base = dispDeviceBase;
 	base += (x + dispDeviceRect[0]) * (dispDeviceDepth >> 3);
 	base += (y + dispDeviceRect[1]) * dispDeviceRowBytes;
@@ -281,7 +281,7 @@ static unsigned char * calc_base(int x, int y)
 void btext_update_display(unsigned long phys, int width, int height,
 			  int depth, int pitch)
 {
-	if (dispDeviceBase == 0)
+	if (!dispDeviceBase)
 		return;
 
 	/* check it's the same frame buffer (within 256MB) */
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
index 5a7a78f125629..88d83771f462c 100644
--- a/arch/powerpc/kernel/module_32.c
+++ b/arch/powerpc/kernel/module_32.c
@@ -109,12 +109,12 @@ static unsigned long get_plt_size(const Elf32_Ehdr *hdr,
 	for (i = 1; i < hdr->e_shnum; i++) {
 		/* If it's called *.init*, and we're not init, we're
                    not interested */
-		if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0)
+		if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != NULL)
 		    != is_init)
 			continue;
 
 		/* We don't want to look at debug sections. */
-		if (strstr(secstrings + sechdrs[i].sh_name, ".debug") != 0)
+		if (strstr(secstrings + sechdrs[i].sh_name, ".debug"))
 			continue;
 
 		if (sechdrs[i].sh_type == SHT_RELA) {
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index 2a049fb8523d5..bea6c544e38f9 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -167,7 +167,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
 {
 	pmd_t *pmd;
 
-	if (Hash == 0)
+	if (!Hash)
 		return;
 	pmd = pmd_offset(pud_offset(pgd_offset(mm, ea), ea), ea);
 	if (!pmd_none(*pmd))
diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c
index 702d7689d714e..cf8472cf3d593 100644
--- a/arch/powerpc/mm/tlb_hash32.c
+++ b/arch/powerpc/mm/tlb_hash32.c
@@ -41,7 +41,7 @@ void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr)
 {
 	unsigned long ptephys;
 
-	if (Hash != 0) {
+	if (Hash) {
 		ptephys = __pa(ptep) & PAGE_MASK;
 		flush_hash_pages(mm->context.id, addr, ptephys, 1);
 	}
@@ -54,7 +54,7 @@ EXPORT_SYMBOL(flush_hash_entry);
  */
 void tlb_flush(struct mmu_gather *tlb)
 {
-	if (Hash == 0) {
+	if (!Hash) {
 		/*
 		 * 603 needs to flush the whole TLB here since
 		 * it doesn't use a hash table.
@@ -84,7 +84,7 @@ static void flush_range(struct mm_struct *mm, unsigned long start,
 	int count;
 	unsigned int ctx = mm->context.id;
 
-	if (Hash == 0) {
+	if (!Hash) {
 		_tlbia();
 		return;
 	}
@@ -124,7 +124,7 @@ void flush_tlb_mm(struct mm_struct *mm)
 {
 	struct vm_area_struct *mp;
 
-	if (Hash == 0) {
+	if (!Hash) {
 		_tlbia();
 		return;
 	}
@@ -145,7 +145,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
 	struct mm_struct *mm;
 	pmd_t *pmd;
 
-	if (Hash == 0) {
+	if (!Hash) {
 		_tlbie(vmaddr);
 		return;
 	}
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index 0b8174a799937..df762bb3c7356 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -62,7 +62,7 @@ struct device_node *k2_skiplist[2];
 
 static int __init fixup_one_level_bus_range(struct device_node *node, int higher)
 {
-	for (; node != 0;node = node->sibling) {
+	for (; node; node = node->sibling) {
 		const int * bus_range;
 		const unsigned int *class_code;
 		int len;
@@ -1219,7 +1219,7 @@ static void fixup_u4_pcie(struct pci_dev* dev)
 			region = r;
 	}
 	/* Nothing found, bail */
-	if (region == 0)
+	if (!region)
 		return;
 
 	/* Print things out */
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index ab668cb72263c..f40e87ca180b5 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -152,7 +152,7 @@ static void pmac_show_cpuinfo(struct seq_file *m)
 			of_get_property(np, "d-cache-size", NULL);
 		seq_printf(m, "L2 cache\t:");
 		has_l2cache = 1;
-		if (of_get_property(np, "cache-unified", NULL) != 0 && dc) {
+		if (of_get_property(np, "cache-unified", NULL) && dc) {
 			seq_printf(m, " %dK unified", *dc / 1024);
 		} else {
 			if (ic)
@@ -244,12 +244,12 @@ static void __init l2cr_init(void)
 	/* Checks "l2cr-value" property in the registry */
 	if (cpu_has_feature(CPU_FTR_L2CR)) {
 		struct device_node *np = of_find_node_by_name(NULL, "cpus");
-		if (np == 0)
+		if (!np)
 			np = of_find_node_by_type(NULL, "cpu");
-		if (np != 0) {
+		if (np) {
 			const unsigned int *l2cr =
 				of_get_property(np, "l2cr-value", NULL);
-			if (l2cr != 0) {
+			if (l2cr) {
 				ppc_override_l2cr = 1;
 				ppc_override_l2cr_value = *l2cr;
 				_set_L2CR(0);
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 433dbeddfcf9e..a3b6a4a703abe 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -274,7 +274,7 @@ int __init find_via_pmu(void)
 	u64 taddr;
 	const u32 *reg;
 
-	if (via != 0)
+	if (via)
 		return 1;
 	vias = of_find_node_by_name(NULL, "via-pmu");
 	if (vias == NULL)
@@ -1152,7 +1152,7 @@ pmu_queue_request(struct adb_request *req)
 	req->complete = 0;
 
 	spin_lock_irqsave(&pmu_lock, flags);
-	if (current_req != 0) {
+	if (current_req) {
 		last_req->next = req;
 		last_req = req;
 	} else {
@@ -1227,7 +1227,7 @@ pmu_start(void)
 	/* assert pmu_state == idle */
 	/* get the packet to send */
 	req = current_req;
-	if (req == 0 || pmu_state != idle
+	if (!req || pmu_state != idle
 	    || (/*req->reply_expected && */req_awaiting_reply))
 		return;
 
@@ -1382,7 +1382,7 @@ pmu_handle_data(unsigned char *data, int len)
 	if ((1 << pirq) & PMU_INT_ADB) {
 		if ((data[0] & PMU_INT_ADB_AUTO) == 0) {
 			struct adb_request *req = req_awaiting_reply;
-			if (req == 0) {
+			if (!req) {
 				printk(KERN_ERR "PMU: extra ADB reply\n");
 				return;
 			}
@@ -1749,7 +1749,7 @@ pmu_shutdown(void)
 int
 pmu_present(void)
 {
-	return via != 0;
+	return via != NULL;
 }
 
 #if defined(CONFIG_SUSPEND) && defined(CONFIG_PPC32)
@@ -2081,7 +2081,7 @@ pmu_open(struct inode *inode, struct file *file)
 	unsigned long flags;
 
 	pp = kmalloc(sizeof(struct pmu_private), GFP_KERNEL);
-	if (pp == 0)
+	if (!pp)
 		return -ENOMEM;
 	pp->rb_get = pp->rb_put = 0;
 	spin_lock_init(&pp->lock);
@@ -2107,7 +2107,7 @@ pmu_read(struct file *file, char __user *buf,
 	unsigned long flags;
 	int ret = 0;
 
-	if (count < 1 || pp == 0)
+	if (count < 1 || !pp)
 		return -EINVAL;
 	if (!access_ok(VERIFY_WRITE, buf, count))
 		return -EFAULT;
@@ -2164,7 +2164,7 @@ pmu_fpoll(struct file *filp, poll_table *wait)
 	__poll_t mask = 0;
 	unsigned long flags;
 	
-	if (pp == 0)
+	if (!pp)
 		return 0;
 	poll_wait(filp, &pp->wait, wait);
 	spin_lock_irqsave(&pp->lock, flags);
@@ -2180,7 +2180,7 @@ pmu_release(struct inode *inode, struct file *file)
 	struct pmu_private *pp = file->private_data;
 	unsigned long flags;
 
-	if (pp != 0) {
+	if (pp) {
 		file->private_data = NULL;
 		spin_lock_irqsave(&all_pvt_lock, flags);
 		list_del(&pp->list);

From 85aa4b98419d74dd5cc914e089349800ac4997ee Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Mon, 23 Apr 2018 21:36:38 +0200
Subject: [PATCH 110/251] powerpc/mm/radix: Use do/while(0) trick for single
 statement block
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In commit 7a22d6321c3d ("powerpc/mm/radix: Update command line parsing for
disable_radix") an `if` statement was added for a possible empty body
(prom_debug).

Fix the following warning, treated as error with W=1:

  arch/powerpc/kernel/prom_init.c:656:46: error: suggest braces around empty body in an ‘if’ statement [-Werror=empty-body]

Suggested-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Mathieu Malaterre <malat@debian.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/prom_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 67f9c157bcc08..425992e393bc9 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -103,7 +103,7 @@ int of_workarounds;
 #ifdef DEBUG_PROM
 #define prom_debug(x...)	prom_printf(x)
 #else
-#define prom_debug(x...)
+#define prom_debug(x...)	do { } while (0)
 #endif
 
 

From eed6964bce59294f293b1746a6e4a69fa709fa5b Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Mon, 23 Apr 2018 21:45:32 +0200
Subject: [PATCH 111/251] powerpc/wii: Make hlwd_pic_init function static
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The function hlwd_pic_init can be made static, so do it. Fix the following
warning treated as error (W=1):

../arch/powerpc/platforms/embedded6xx/hlwd-pic.c:158:20: error: no previous prototype for ‘hlwd_pic_init’ [-Werror=missing-prototypes]

Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/embedded6xx/hlwd-pic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
index bf4a125faec66..8112b39879d67 100644
--- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
@@ -160,7 +160,7 @@ static void __hlwd_quiesce(void __iomem *io_base)
 	out_be32(io_base + HW_BROADWAY_ICR, 0xffffffff);
 }
 
-struct irq_domain *hlwd_pic_init(struct device_node *np)
+static struct irq_domain *hlwd_pic_init(struct device_node *np)
 {
 	struct irq_domain *irq_domain;
 	struct resource res;

From f0e0b86638e9df6731b1d61c71ae180224664107 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Wed, 4 Apr 2018 22:09:11 +0200
Subject: [PATCH 112/251] powerpc/chrp/setup: Remove idu_size variable and make
 some functions static
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove variable declaration idu_size and associated code since not used.

These functions can all be static, make it so. Fix warnings treated as
errors with W=1:

  arch/powerpc/platforms/chrp/setup.c:97:6: error: no previous prototype for ‘chrp_show_cpuinfo’ [-Werror=missing-prototypes]
  arch/powerpc/platforms/chrp/setup.c:302:13: error: no previous prototype for ‘chrp_setup_arch’ [-Werror=missing-prototypes]
  arch/powerpc/platforms/chrp/setup.c:385:16: error: variable ‘idu_size’ set but not used [-Werror=unused-but-set-variable]
  arch/powerpc/platforms/chrp/setup.c:526:13: error: no previous prototype for ‘chrp_init_IRQ’ [-Werror=missing-prototypes]
  arch/powerpc/platforms/chrp/setup.c:559:1: error: no previous prototype for ‘chrp_init2’ [-Werror=missing-prototypes]

Suggested-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/chrp/setup.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index 481ed133e04b9..d6d8ffc0271ee 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -94,7 +94,7 @@ static const char *chrp_names[] = {
 	"Total Impact Briq"
 };
 
-void chrp_show_cpuinfo(struct seq_file *m)
+static void chrp_show_cpuinfo(struct seq_file *m)
 {
 	int i, sdramen;
 	unsigned int t;
@@ -299,7 +299,7 @@ static __init void chrp_init(void)
 	of_node_put(node);
 }
 
-void __init chrp_setup_arch(void)
+static void __init chrp_setup_arch(void)
 {
 	struct device_node *root = of_find_node_by_path("/");
 	const char *machine = NULL;
@@ -382,7 +382,7 @@ static void __init chrp_find_openpic(void)
 {
 	struct device_node *np, *root;
 	int len, i, j;
-	int isu_size, idu_size;
+	int isu_size;
 	const unsigned int *iranges, *opprop = NULL;
 	int oplen = 0;
 	unsigned long opaddr;
@@ -427,11 +427,9 @@ static void __init chrp_find_openpic(void)
 	}
 
 	isu_size = 0;
-	idu_size = 0;
 	if (len > 0 && iranges[1] != 0) {
 		printk(KERN_INFO "OpenPIC irqs %d..%d in IDU\n",
 		       iranges[0], iranges[0] + iranges[1] - 1);
-		idu_size = iranges[1];
 	}
 	if (len > 1)
 		isu_size = iranges[3];
@@ -523,7 +521,7 @@ static void __init chrp_find_8259(void)
 	}
 }
 
-void __init chrp_init_IRQ(void)
+static void __init chrp_init_IRQ(void)
 {
 #if defined(CONFIG_VT) && defined(CONFIG_INPUT_ADBHID) && defined(CONFIG_XMON)
 	struct device_node *kbd;
@@ -555,7 +553,7 @@ void __init chrp_init_IRQ(void)
 #endif
 }
 
-void __init
+static void __init
 chrp_init2(void)
 {
 #ifdef CONFIG_NVRAM

From 5a4b475cf8511da721f20ba432c244061db7139f Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Wed, 4 Apr 2018 22:07:46 +0200
Subject: [PATCH 113/251] powerpc/powermac: Mark variable x as unused
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since the value of x is never intended to be read, declare it with gcc
attribute as unused. Fix warning treated as error with W=1:

  arch/powerpc/platforms/powermac/bootx_init.c:471:21: error: variable ‘x’ set but not used [-Werror=unused-but-set-variable]

Suggested-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powermac/bootx_init.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index c3c9bbb3573ae..ba0964c176208 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -468,7 +468,7 @@ void __init bootx_init(unsigned long r3, unsigned long r4)
 	boot_infos_t *bi = (boot_infos_t *) r4;
 	unsigned long hdr;
 	unsigned long space;
-	unsigned long ptr, x;
+	unsigned long ptr;
 	char *model;
 	unsigned long offset = reloc_offset();
 
@@ -562,6 +562,8 @@ void __init bootx_init(unsigned long r3, unsigned long r4)
 	 * MMU switched OFF, so this should not be useful anymore.
 	 */
 	if (bi->version < 4) {
+		unsigned long x __maybe_unused;
+
 		bootx_printf("Touching pages...\n");
 
 		/*

From 910be6be6c00e6d33d47985586c392155ee7aa0e Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Wed, 4 Apr 2018 22:15:03 +0200
Subject: [PATCH 114/251] powerpc/chrp/pci: Make some functions static
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These functions can all be static, make it so. Fix warnings treated as
errors with W=1:

  arch/powerpc/platforms/chrp/pci.c:34:5: error: no previous prototype for ‘gg2_read_config’ [-Werror=missing-prototypes]
  arch/powerpc/platforms/chrp/pci.c:61:5: error: no previous prototype for ‘gg2_write_config’ [-Werror=missing-prototypes]
  arch/powerpc/platforms/chrp/pci.c:97:5: error: no previous prototype for ‘rtas_read_config’ [-Werror=missing-prototypes]
  arch/powerpc/platforms/chrp/pci.c:112:5: error: no previous prototype for ‘rtas_write_config’ [-Werror=missing-prototypes]

Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/chrp/pci.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c
index 0f512d35f7c52..5ddb57b829214 100644
--- a/arch/powerpc/platforms/chrp/pci.c
+++ b/arch/powerpc/platforms/chrp/pci.c
@@ -31,7 +31,7 @@ void __iomem *gg2_pci_config_base;
  * limit the bus number to 3 bits
  */
 
-int gg2_read_config(struct pci_bus *bus, unsigned int devfn, int off,
+static int gg2_read_config(struct pci_bus *bus, unsigned int devfn, int off,
 			   int len, u32 *val)
 {
 	volatile void __iomem *cfg_data;
@@ -58,7 +58,7 @@ int gg2_read_config(struct pci_bus *bus, unsigned int devfn, int off,
 	return PCIBIOS_SUCCESSFUL;
 }
 
-int gg2_write_config(struct pci_bus *bus, unsigned int devfn, int off,
+static int gg2_write_config(struct pci_bus *bus, unsigned int devfn, int off,
 			    int len, u32 val)
 {
 	volatile void __iomem *cfg_data;
@@ -94,8 +94,8 @@ static struct pci_ops gg2_pci_ops =
 /*
  * Access functions for PCI config space using RTAS calls.
  */
-int rtas_read_config(struct pci_bus *bus, unsigned int devfn, int offset,
-		     int len, u32 *val)
+static int rtas_read_config(struct pci_bus *bus, unsigned int devfn, int offset,
+			    int len, u32 *val)
 {
 	struct pci_controller *hose = pci_bus_to_host(bus);
 	unsigned long addr = (offset & 0xff) | ((devfn & 0xff) << 8)
@@ -109,8 +109,8 @@ int rtas_read_config(struct pci_bus *bus, unsigned int devfn, int offset,
 	return rval? PCIBIOS_DEVICE_NOT_FOUND: PCIBIOS_SUCCESSFUL;
 }
 
-int rtas_write_config(struct pci_bus *bus, unsigned int devfn, int offset,
-		      int len, u32 val)
+static int rtas_write_config(struct pci_bus *bus, unsigned int devfn, int offset,
+			     int len, u32 val)
 {
 	struct pci_controller *hose = pci_bus_to_host(bus);
 	unsigned long addr = (offset & 0xff) | ((devfn & 0xff) << 8)

From f91d5998996819eaf194ff48e00d4956a9270abd Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Wed, 4 Apr 2018 22:13:55 +0200
Subject: [PATCH 115/251] powerpc/powermac: Move pmac_pfunc_base_install
 prototype to header file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The pmac_pfunc_base_install prototype was declared in powermac/smp.c since
function was used there, move it to pmac_pfunc.h header to be visible in
pfunc_base.c. Fix a warning treated as error with W=1:

  arch/powerpc/platforms/powermac/pfunc_base.c:330:12: error: no previous prototype for ‘pmac_pfunc_base_install’ [-Werror=missing-prototypes]

Suggested-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/pmac_pfunc.h | 1 +
 arch/powerpc/platforms/powermac/smp.c | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/pmac_pfunc.h b/arch/powerpc/include/asm/pmac_pfunc.h
index 73bd8f28f2a8a..cee4e9f5b8cfa 100644
--- a/arch/powerpc/include/asm/pmac_pfunc.h
+++ b/arch/powerpc/include/asm/pmac_pfunc.h
@@ -245,6 +245,7 @@ extern void pmf_put_function(struct pmf_function *func);
 
 extern int pmf_call_one(struct pmf_function *func, struct pmf_args *args);
 
+int pmac_pfunc_base_install(void);
 
 /* Suspend/resume code called by via-pmu directly for now */
 extern void pmac_pfunc_base_suspend(void);
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index 95275e0e2efa5..447da6db450a3 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -65,7 +65,6 @@
 #endif
 
 extern void __secondary_start_pmac_0(void);
-extern int pmac_pfunc_base_install(void);
 
 static void (*pmac_tb_freeze)(int freeze);
 static u64 timebase;

From f72cf3f1d49f2c35d6cb682af2e8c93550f264e4 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Wed, 4 Apr 2018 22:13:05 +0200
Subject: [PATCH 116/251] powerpc/powermac: Add missing prototype for
 note_bootable_part()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a missing prototype for function `note_bootable_part` to silence a
warning treated as error with W=1:

  arch/powerpc/platforms/powermac/setup.c:361:12: error: no previous prototype for ‘note_bootable_part’ [-Werror=missing-prototypes]

Suggested-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powermac/setup.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index f40e87ca180b5..3a529fcdae972 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -352,6 +352,7 @@ static int pmac_late_init(void)
 }
 machine_late_initcall(powermac, pmac_late_init);
 
+void note_bootable_part(dev_t dev, int part, int goodness);
 /*
  * This is __ref because we check for "initializing" before
  * touching any of the __init sensitive things and "initializing"

From c3f0515ea1277558e7c0c3240d17c77d2a3fcefe Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Wed, 4 Apr 2018 22:12:30 +0200
Subject: [PATCH 117/251] powerpc/52xx: Add missing functions prototypes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The function prototypes were declared within a `#ifdef CONFIG_PPC_LITE5200`
block which would prevent them from being visible when compiling
`mpc52xx_pm.c`. Move the prototypes outside of the `#ifdef` block to fix
the following warnings treated as errors with W=1:

  arch/powerpc/platforms/52xx/mpc52xx_pm.c:58:5: error: no previous prototype for ‘mpc52xx_pm_prepare’ [-Werror=missing-prototypes]
  arch/powerpc/platforms/52xx/mpc52xx_pm.c:113:5: error: no previous prototype for ‘mpc52xx_pm_enter’ [-Werror=missing-prototypes]
  arch/powerpc/platforms/52xx/mpc52xx_pm.c:181:6: error: no previous prototype for ‘mpc52xx_pm_finish’ [-Werror=missing-prototypes]

Suggested-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/mpc52xx.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/mpc52xx.h b/arch/powerpc/include/asm/mpc52xx.h
index e94cede14522f..ce1e0aabaa64a 100644
--- a/arch/powerpc/include/asm/mpc52xx.h
+++ b/arch/powerpc/include/asm/mpc52xx.h
@@ -350,14 +350,14 @@ extern struct mpc52xx_suspend mpc52xx_suspend;
 extern int __init mpc52xx_pm_init(void);
 extern int mpc52xx_set_wakeup_gpio(u8 pin, u8 level);
 
-#ifdef CONFIG_PPC_LITE5200
-extern int __init lite5200_pm_init(void);
-
 /* lite5200 calls mpc5200 suspend functions, so here they are */
 extern int mpc52xx_pm_prepare(void);
 extern int mpc52xx_pm_enter(suspend_state_t);
 extern void mpc52xx_pm_finish(void);
 extern char saved_sram[0x4000]; /* reuse buffer from mpc52xx suspend */
+
+#ifdef CONFIG_PPC_LITE5200
+int __init lite5200_pm_init(void);
 #endif
 #endif /* CONFIG_PM */
 

From 3fc5ee9b2846db2eaddeadf089ef258167c4ee09 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Wed, 4 Apr 2018 22:11:42 +0200
Subject: [PATCH 118/251] powerpc: Add missing prototype
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add one missing prototype for function rh_dump_blk. Fix warning treated as
error in W=1:

  arch/powerpc/lib/rheap.c:740:6: error: no previous prototype for ‘rh_dump_blk’ [-Werror=missing-prototypes]

Suggested-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/rheap.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/include/asm/rheap.h b/arch/powerpc/include/asm/rheap.h
index 172381769cfcf..8e83703d67362 100644
--- a/arch/powerpc/include/asm/rheap.h
+++ b/arch/powerpc/include/asm/rheap.h
@@ -83,6 +83,9 @@ extern int rh_get_stats(rh_info_t * info, int what, int max_stats,
 /* Simple dump of remote heap info */
 extern void rh_dump(rh_info_t * info);
 
+/* Simple dump of remote info block */
+void rh_dump_blk(rh_info_t *info, rh_block_t *blk);
+
 /* Set owner of taken block */
 extern int rh_set_owner(rh_info_t * info, unsigned long start, const char *owner);
 

From bd13ac95f954570e01fba5a6caf771da754ac0e3 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Wed, 4 Apr 2018 22:10:28 +0200
Subject: [PATCH 119/251] powerpc/tau: Synchronize function prototypes and body
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some function prototypes and body for Thermal Assist Units were not in
sync. Update the function definition to match the existing function
declaration found in `setup-common.c`, changing an `int` return type to a
`u32` return type. Move the prototypes to a header file. Fix the following
warnings, treated as error with W=1:

  arch/powerpc/kernel/tau_6xx.c:257:5: error: no previous prototype for ‘cpu_temp_both’ [-Werror=missing-prototypes]
  arch/powerpc/kernel/tau_6xx.c:262:5: error: no previous prototype for ‘cpu_temp’ [-Werror=missing-prototypes]
  arch/powerpc/kernel/tau_6xx.c:267:5: error: no previous prototype for ‘tau_interrupts’ [-Werror=missing-prototypes]

Compile tested with CONFIG_TAU_INT.

Suggested-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/irq.c          | 2 +-
 arch/powerpc/kernel/setup-common.c | 6 ------
 arch/powerpc/kernel/setup.h        | 6 ++++++
 arch/powerpc/kernel/tau_6xx.c      | 7 +++++--
 4 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 061aa0f47bb10..bbf7ec582d601 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -89,7 +89,7 @@ atomic_t ppc_n_lost_interrupts;
 
 #ifdef CONFIG_TAU_INT
 extern int tau_initialized;
-extern int tau_interrupts(int);
+u32 tau_interrupts(unsigned long cpu);
 #endif
 #endif /* CONFIG_PPC32 */
 
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 0af5c11b9e784..62b1a40d89577 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -192,12 +192,6 @@ void machine_halt(void)
 	machine_hang();
 }
 
-
-#ifdef CONFIG_TAU
-extern u32 cpu_temp(unsigned long cpu);
-extern u32 cpu_temp_both(unsigned long cpu);
-#endif /* CONFIG_TAU */
-
 #ifdef CONFIG_SMP
 DEFINE_PER_CPU(unsigned int, cpu_pvr);
 #endif
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index d144df54ad408..c6a592b673866 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -62,4 +62,10 @@ void kvm_cma_reserve(void);
 static inline void kvm_cma_reserve(void) { };
 #endif
 
+#ifdef CONFIG_TAU
+u32 cpu_temp(unsigned long cpu);
+u32 cpu_temp_both(unsigned long cpu);
+u32 tau_interrupts(unsigned long cpu);
+#endif /* CONFIG_TAU */
+
 #endif /* __ARCH_POWERPC_KERNEL_SETUP_H */
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index 8cdd852aedd1e..8f02353c049eb 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -27,6 +27,9 @@
 #include <asm/cache.h>
 #include <asm/8xx_immap.h>
 #include <asm/machdep.h>
+#include <asm/asm-prototypes.h>
+
+#include "setup.h"
 
 static struct tau_temp
 {
@@ -259,12 +262,12 @@ u32 cpu_temp_both(unsigned long cpu)
 	return ((tau[cpu].high << 16) | tau[cpu].low);
 }
 
-int cpu_temp(unsigned long cpu)
+u32 cpu_temp(unsigned long cpu)
 {
 	return ((tau[cpu].high + tau[cpu].low) / 2);
 }
 
-int tau_interrupts(unsigned long cpu)
+u32 tau_interrupts(unsigned long cpu)
 {
 	return (tau[cpu].interrupts);
 }

From 86e11b6e9c56e605475462bf9ba7c12dbe1e3e29 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Thu, 22 Mar 2018 21:19:52 +0100
Subject: [PATCH 120/251] powerpc: Make function btext_initialize static
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This function can be static, make it so, this fix a warning treated as
error with W=1:

  arch/powerpc/kernel/btext.c:173:5: error: no previous prototype for ‘btext_initialize’ [-Werror=missing-prototypes]

Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/btext.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 54403144623f3..b2072d5bbf2bb 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -170,7 +170,7 @@ void btext_map(void)
 	boot_text_mapped = 1;
 }
 
-int btext_initialize(struct device_node *np)
+static int btext_initialize(struct device_node *np)
 {
 	unsigned int width, height, depth, pitch;
 	unsigned long address = 0;

From 9e0d86cd2d749998c3792059221cefa210b0177b Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Thu, 22 Mar 2018 21:19:54 +0100
Subject: [PATCH 121/251] powerpc/tau: Make some function static
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These functions can all be static, make it so. Fix warnings treated as
errors with W=1:

  arch/powerpc/kernel/tau_6xx.c:53:6: error: no previous prototype for ‘set_thresholds’ [-Werror=missing-prototypes]
  arch/powerpc/kernel/tau_6xx.c:73:6: error: no previous prototype for ‘TAUupdate’ [-Werror=missing-prototypes]
  arch/powerpc/kernel/tau_6xx.c:208:13: error: no previous prototype for ‘TAU_init_smp’ [-Werror=missing-prototypes]
  arch/powerpc/kernel/tau_6xx.c:220:12: error: no previous prototype for ‘TAU_init’ [-Werror=missing-prototypes]
  arch/powerpc/kernel/tau_6xx.c:126:6: error: no previous prototype for ‘TAUException’ [-Werror=missing-prototypes]

Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/tau_6xx.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index 8f02353c049eb..e2ab8a111b693 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -53,7 +53,7 @@ struct timer_list tau_timer;
 #define shrink_timer	2*HZ	/* period between shrinking the window */
 #define min_window	2	/* minimum window size, degrees C */
 
-void set_thresholds(unsigned long cpu)
+static void set_thresholds(unsigned long cpu)
 {
 #ifdef CONFIG_TAU_INT
 	/*
@@ -73,7 +73,7 @@ void set_thresholds(unsigned long cpu)
 #endif
 }
 
-void TAUupdate(int cpu)
+static void TAUupdate(int cpu)
 {
 	unsigned thrm;
 
@@ -208,7 +208,7 @@ static void tau_timeout_smp(struct timer_list *unused)
 
 int tau_initialized = 0;
 
-void __init TAU_init_smp(void * info)
+static void __init TAU_init_smp(void *info)
 {
 	unsigned long cpu = smp_processor_id();
 
@@ -220,7 +220,7 @@ void __init TAU_init_smp(void * info)
 	set_thresholds(cpu);
 }
 
-int __init TAU_init(void)
+static int __init TAU_init(void)
 {
 	/* We assume in SMP that if one CPU has TAU support, they
 	 * all have it --BenH

From b87a358b4a1421abd544c0b554b1b7159b2b36c0 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Thu, 22 Mar 2018 21:19:56 +0100
Subject: [PATCH 122/251] powerpc/chrp/time: Make some functions static, add
 missing header include
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a missing include <platforms/chrp/chrp.h>.

These functions can all be static, make it so. Fix warnings treated as
errors with W=1:

  arch/powerpc/platforms/chrp/time.c:41:13: error: no previous prototype for ‘chrp_time_init’ [-Werror=missing-prototypes]
  arch/powerpc/platforms/chrp/time.c:66:5: error: no previous prototype for ‘chrp_cmos_clock_read’ [-Werror=missing-prototypes]
  arch/powerpc/platforms/chrp/time.c:74:6: error: no previous prototype for ‘chrp_cmos_clock_write’ [-Werror=missing-prototypes]
  arch/powerpc/platforms/chrp/time.c:86:5: error: no previous prototype for ‘chrp_set_rtc_time’ [-Werror=missing-prototypes]
  arch/powerpc/platforms/chrp/time.c:130:6: error: no previous prototype for ‘chrp_get_rtc_time’ [-Werror=missing-prototypes]

Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/chrp/time.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/chrp/time.c b/arch/powerpc/platforms/chrp/time.c
index 03d115aaa1916..acde7bbe07164 100644
--- a/arch/powerpc/platforms/chrp/time.c
+++ b/arch/powerpc/platforms/chrp/time.c
@@ -28,6 +28,8 @@
 #include <asm/sections.h>
 #include <asm/time.h>
 
+#include <platforms/chrp/chrp.h>
+
 extern spinlock_t rtc_lock;
 
 #define NVRAM_AS0  0x74
@@ -63,7 +65,7 @@ long __init chrp_time_init(void)
 	return 0;
 }
 
-int chrp_cmos_clock_read(int addr)
+static int chrp_cmos_clock_read(int addr)
 {
 	if (nvram_as1 != 0)
 		outb(addr>>8, nvram_as1);
@@ -71,7 +73,7 @@ int chrp_cmos_clock_read(int addr)
 	return (inb(nvram_data));
 }
 
-void chrp_cmos_clock_write(unsigned long val, int addr)
+static void chrp_cmos_clock_write(unsigned long val, int addr)
 {
 	if (nvram_as1 != 0)
 		outb(addr>>8, nvram_as1);

From c89ca593220931c150cffda24b4d4ccf82f13fc8 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Thu, 22 Mar 2018 21:20:03 +0100
Subject: [PATCH 123/251] powerpc/32: Add a missing include header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The header file <linux/syscalls.h> was missing from the includes. Fix the
following warning, treated as error with W=1:

  arch/powerpc/kernel/pci_32.c:286:6: error: no previous prototype for ‘sys_pciconfig_iobase’ [-Werror=missing-prototypes]

Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/pci_32.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index af36e46c3ed63..d63b488d34d79 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -11,6 +11,7 @@
 #include <linux/sched.h>
 #include <linux/errno.h>
 #include <linux/bootmem.h>
+#include <linux/syscalls.h>
 #include <linux/irq.h>
 #include <linux/list.h>
 #include <linux/of.h>

From d647b210ac738b401c7f824bbebdcbcedbe7cb6b Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Thu, 22 Mar 2018 21:20:04 +0100
Subject: [PATCH 124/251] powerpc: Add a missing include header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The header file <asm/switch_to.h> was missing from the includes. Fix the
following warning, treated as error with W=1:

  arch/powerpc/kernel/vecemu.c:260:5: error: no previous prototype for ‘emulate_altivec’ [-Werror=missing-prototypes]

Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/vecemu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c
index 8812085883fdb..4acd3fb2b38ee 100644
--- a/arch/powerpc/kernel/vecemu.c
+++ b/arch/powerpc/kernel/vecemu.c
@@ -8,6 +8,7 @@
 #include <linux/sched.h>
 #include <asm/ptrace.h>
 #include <asm/processor.h>
+#include <asm/switch_to.h>
 #include <linux/uaccess.h>
 
 /* Functions in vector.S */

From 458c70173daaaa823faeb5b4031bf8fa34c7ca16 Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Tue, 8 May 2018 10:30:24 +0530
Subject: [PATCH 125/251] powerpc/perf: Update raw-event code encoding comment
 for power8

Comment explanning the raw event code encoding for Power8 was
moved to isa207_common.h file when re-factoring the code to
support power9. But then Power9 pmu branched out due to changes
specific to power9. So move the encoding comment back to power8-pmu.c
Just comment movement and no logic change.

Fixes: 4d3576b20716 ('powerpc/perf: factor out power8 pmu macros and defines')
Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/isa207-common.h | 64 -------------------------------
 arch/powerpc/perf/power8-pmu.c    | 64 +++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+), 64 deletions(-)

diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index 6c737d675792e..6a0b586c935ab 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -17,70 +17,6 @@
 #include <asm/firmware.h>
 #include <asm/cputable.h>
 
-/*
- * Raw event encoding for PowerISA v2.07:
- *
- *        60        56        52        48        44        40        36        32
- * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
- *   | | [ ]                           [      thresh_cmp     ]   [  thresh_ctl   ]
- *   | |  |                                                              |
- *   | |  *- IFM (Linux)                 thresh start/stop OR FAB match -*
- *   | *- BHRB (Linux)
- *   *- EBB (Linux)
- *
- *        28        24        20        16        12         8         4         0
- * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
- *   [   ] [  sample ]   [cache]   [ pmc ]   [unit ]   c     m   [    pmcxsel    ]
- *     |        |           |                          |     |
- *     |        |           |                          |     *- mark
- *     |        |           *- L1/L2/L3 cache_sel      |
- *     |        |                                      |
- *     |        *- sampling mode for marked events     *- combine
- *     |
- *     *- thresh_sel
- *
- * Below uses IBM bit numbering.
- *
- * MMCR1[x:y] = unit    (PMCxUNIT)
- * MMCR1[x]   = combine (PMCxCOMB)
- *
- * if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011
- *	# PM_MRK_FAB_RSP_MATCH
- *	MMCR1[20:27] = thresh_ctl   (FAB_CRESP_MATCH / FAB_TYPE_MATCH)
- * else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001
- *	# PM_MRK_FAB_RSP_MATCH_CYC
- *	MMCR1[20:27] = thresh_ctl   (FAB_CRESP_MATCH / FAB_TYPE_MATCH)
- * else
- *	MMCRA[48:55] = thresh_ctl   (THRESH START/END)
- *
- * if thresh_sel:
- *	MMCRA[45:47] = thresh_sel
- *
- * if thresh_cmp:
- *	MMCRA[22:24] = thresh_cmp[0:2]
- *	MMCRA[25:31] = thresh_cmp[3:9]
- *
- * if unit == 6 or unit == 7
- *	MMCRC[53:55] = cache_sel[1:3]      (L2EVENT_SEL)
- * else if unit == 8 or unit == 9:
- *	if cache_sel[0] == 0: # L3 bank
- *		MMCRC[47:49] = cache_sel[1:3]  (L3EVENT_SEL0)
- *	else if cache_sel[0] == 1:
- *		MMCRC[50:51] = cache_sel[2:3]  (L3EVENT_SEL1)
- * else if cache_sel[1]: # L1 event
- *	MMCR1[16] = cache_sel[2]
- *	MMCR1[17] = cache_sel[3]
- *
- * if mark:
- *	MMCRA[63]    = 1		(SAMPLE_ENABLE)
- *	MMCRA[57:59] = sample[0:2]	(RAND_SAMP_ELIG)
- *	MMCRA[61:62] = sample[3:4]	(RAND_SAMP_MODE)
- *
- * if EBB and BHRB:
- *	MMCRA[32:33] = IFM
- *
- */
-
 #define EVENT_EBB_MASK		1ull
 #define EVENT_EBB_SHIFT		PERF_EVENT_CONFIG_EBB_SHIFT
 #define EVENT_BHRB_MASK		1ull
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index c9356955cab4c..d12a2db263535 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -30,6 +30,70 @@ enum {
 #define	POWER8_MMCRA_IFM2		0x0000000080000000UL
 #define	POWER8_MMCRA_IFM3		0x00000000C0000000UL
 
+/*
+ * Raw event encoding for PowerISA v2.07 (Power8):
+ *
+ *        60        56        52        48        44        40        36        32
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ *   | | [ ]                           [      thresh_cmp     ]   [  thresh_ctl   ]
+ *   | |  |                                                              |
+ *   | |  *- IFM (Linux)                 thresh start/stop OR FAB match -*
+ *   | *- BHRB (Linux)
+ *   *- EBB (Linux)
+ *
+ *        28        24        20        16        12         8         4         0
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ *   [   ] [  sample ]   [cache]   [ pmc ]   [unit ]   c     m   [    pmcxsel    ]
+ *     |        |           |                          |     |
+ *     |        |           |                          |     *- mark
+ *     |        |           *- L1/L2/L3 cache_sel      |
+ *     |        |                                      |
+ *     |        *- sampling mode for marked events     *- combine
+ *     |
+ *     *- thresh_sel
+ *
+ * Below uses IBM bit numbering.
+ *
+ * MMCR1[x:y] = unit    (PMCxUNIT)
+ * MMCR1[x]   = combine (PMCxCOMB)
+ *
+ * if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011
+ *	# PM_MRK_FAB_RSP_MATCH
+ *	MMCR1[20:27] = thresh_ctl   (FAB_CRESP_MATCH / FAB_TYPE_MATCH)
+ * else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001
+ *	# PM_MRK_FAB_RSP_MATCH_CYC
+ *	MMCR1[20:27] = thresh_ctl   (FAB_CRESP_MATCH / FAB_TYPE_MATCH)
+ * else
+ *	MMCRA[48:55] = thresh_ctl   (THRESH START/END)
+ *
+ * if thresh_sel:
+ *	MMCRA[45:47] = thresh_sel
+ *
+ * if thresh_cmp:
+ *	MMCRA[22:24] = thresh_cmp[0:2]
+ *	MMCRA[25:31] = thresh_cmp[3:9]
+ *
+ * if unit == 6 or unit == 7
+ *	MMCRC[53:55] = cache_sel[1:3]      (L2EVENT_SEL)
+ * else if unit == 8 or unit == 9:
+ *	if cache_sel[0] == 0: # L3 bank
+ *		MMCRC[47:49] = cache_sel[1:3]  (L3EVENT_SEL0)
+ *	else if cache_sel[0] == 1:
+ *		MMCRC[50:51] = cache_sel[2:3]  (L3EVENT_SEL1)
+ * else if cache_sel[1]: # L1 event
+ *	MMCR1[16] = cache_sel[2]
+ *	MMCR1[17] = cache_sel[3]
+ *
+ * if mark:
+ *	MMCRA[63]    = 1		(SAMPLE_ENABLE)
+ *	MMCRA[57:59] = sample[0:2]	(RAND_SAMP_ELIG)
+ *	MMCRA[61:62] = sample[3:4]	(RAND_SAMP_MODE)
+ *
+ * if EBB and BHRB:
+ *	MMCRA[32:33] = IFM
+ *
+ */
+
 /* PowerISA v2.07 format attribute structure*/
 extern struct attribute_group isa207_pmu_format_group;
 

From ac9816dcbab53c57bcf1d7b15370b08f1e284318 Mon Sep 17 00:00:00 2001
From: Akshay Adiga <akshay.adiga@linux.vnet.ibm.com>
Date: Wed, 16 May 2018 17:32:14 +0530
Subject: [PATCH 126/251] powerpc/powernv/cpuidle: Init all present cpus for
 deep states

Init all present cpus for deep states instead of "all possible" cpus.
Init fails if a possible cpu is guarded. Resulting in making only
non-deep states available for cpuidle/hotplug.

Stewart says, this means that for single threaded workloads, if you
guard out a CPU core you'll not get WoF (Workload Optimised
Frequency), which means that performance goes down when you wouldn't
expect it to.

Fixes: 77b54e9f213f ("powernv/powerpc: Add winkle support for offline cpus")
Cc: stable@vger.kernel.org # v3.19+
Signed-off-by: Akshay Adiga <akshay.adiga@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/idle.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 1f12ab1e6030f..1c5d0675b43cb 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -79,7 +79,7 @@ static int pnv_save_sprs_for_deep_states(void)
 	uint64_t msr_val = MSR_IDLE;
 	uint64_t psscr_val = pnv_deepest_stop_psscr_val;
 
-	for_each_possible_cpu(cpu) {
+	for_each_present_cpu(cpu) {
 		uint64_t pir = get_hard_smp_processor_id(cpu);
 		uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu];
 
@@ -814,7 +814,7 @@ static int __init pnv_init_idle_states(void)
 		int cpu;
 
 		pr_info("powernv: idle: Saving PACA pointers of all CPUs in their thread sibling PACA\n");
-		for_each_possible_cpu(cpu) {
+		for_each_present_cpu(cpu) {
 			int base_cpu = cpu_first_thread_sibling(cpu);
 			int idx = cpu_thread_in_core(cpu);
 			int i;

From 8ce621e1d946b1d1d7717337ab8dc3cbc4fd996f Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Thu, 24 May 2018 22:48:34 -0500
Subject: [PATCH 127/251] powerpc/modules: remove unused mod_arch_specific.toc
 field

The toc field in the mod_arch_specific struct isn't actually used
anywhere, so remove it.

Also the ftrace-specific fields are now common between 32-bit and
64-bit, so simplify the struct definition a bit by moving them out of
the __powerpc64__ #ifdef.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Reviewed-by: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/module.h | 13 +++++--------
 arch/powerpc/kernel/module_64.c   |  1 -
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index 18f7214d68b78..d8374f984f399 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -50,13 +50,6 @@ struct mod_arch_specific {
 	unsigned int stubs_section;	/* Index of stubs section in module */
 	unsigned int toc_section;	/* What section is the TOC? */
 	bool toc_fixed;			/* Have we fixed up .TOC.? */
-#ifdef CONFIG_DYNAMIC_FTRACE
-	unsigned long toc;
-	unsigned long tramp;
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-	unsigned long tramp_regs;
-#endif
-#endif
 
 	/* For module function descriptor dereference */
 	unsigned long start_opd;
@@ -65,10 +58,14 @@ struct mod_arch_specific {
 	/* Indices of PLT sections within module. */
 	unsigned int core_plt_section;
 	unsigned int init_plt_section;
+#endif /* powerpc64 */
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 	unsigned long tramp;
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+	unsigned long tramp_regs;
+#endif
 #endif
-#endif /* powerpc64 */
 
 	/* List of BUG addresses, source line numbers and filenames */
 	struct list_head bug_list;
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index f7667e2ebfcb8..1b7419579820f 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -823,7 +823,6 @@ static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs,
 
 int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs)
 {
-	mod->arch.toc = my_r2(sechdrs, mod);
 	mod->arch.tramp = create_ftrace_stub(sechdrs, mod,
 					(unsigned long)ftrace_caller);
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS

From 1f7256e7dddef49acf9f6c9fe3f93522a8a2a4c2 Mon Sep 17 00:00:00 2001
From: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Date: Thu, 24 May 2018 23:11:44 -0300
Subject: [PATCH 128/251] selftests/powerpc: Add ptrace tests for Protection
 Key registers

This test exercises read and write access to the AMR, IAMR and UAMOR.

Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
[mpe: Simplify make rule]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/include/reg.h |   1 +
 .../testing/selftests/powerpc/ptrace/Makefile |   5 +-
 .../testing/selftests/powerpc/ptrace/child.h  | 139 ++++++++
 .../selftests/powerpc/ptrace/ptrace-pkey.c    | 327 ++++++++++++++++++
 .../testing/selftests/powerpc/ptrace/ptrace.h |  38 ++
 5 files changed, 509 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/ptrace/child.h
 create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c

diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h
index 4afdebcce4cdb..7f348c059bc25 100644
--- a/tools/testing/selftests/powerpc/include/reg.h
+++ b/tools/testing/selftests/powerpc/include/reg.h
@@ -54,6 +54,7 @@
 #define SPRN_DSCR_PRIV 0x11	/* Privilege State DSCR */
 #define SPRN_DSCR      0x03	/* Data Stream Control Register */
 #define SPRN_PPR       896	/* Program Priority Register */
+#define SPRN_AMR       13	/* Authority Mask Register - problem state */
 
 /* TEXASR register bits */
 #define TEXASR_FC	0xFE00000000000000
diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile
index 0e2f4601d1a8e..bd8959854aec3 100644
--- a/tools/testing/selftests/powerpc/ptrace/Makefile
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
               ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \
-              ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak
+              ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey
 
 include ../../lib.mk
 
@@ -9,6 +9,9 @@ all: $(TEST_PROGS)
 
 CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie
 
+ptrace-pkey: child.h
+ptrace-pkey: LDLIBS += -pthread
+
 $(TEST_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h
 
 clean:
diff --git a/tools/testing/selftests/powerpc/ptrace/child.h b/tools/testing/selftests/powerpc/ptrace/child.h
new file mode 100644
index 0000000000000..d7275b7b33dc9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/child.h
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Helper functions to sync execution between parent and child processes.
+ *
+ * Copyright 2018, Thiago Jung Bauermann, IBM Corporation.
+ */
+#include <stdio.h>
+#include <stdbool.h>
+#include <semaphore.h>
+
+/*
+ * Information in a shared memory location for synchronization between child and
+ * parent.
+ */
+struct child_sync {
+	/* The parent waits on this semaphore. */
+	sem_t sem_parent;
+
+	/* If true, the child should give up as well. */
+	bool parent_gave_up;
+
+	/* The child waits on this semaphore. */
+	sem_t sem_child;
+
+	/* If true, the parent should give up as well. */
+	bool child_gave_up;
+};
+
+#define CHILD_FAIL_IF(x, sync)						\
+	do {								\
+		if (x) {						\
+			fprintf(stderr,					\
+				"[FAIL] Test FAILED on line %d\n", __LINE__); \
+			(sync)->child_gave_up = true;			\
+			prod_parent(sync);				\
+			return 1;					\
+		}							\
+	} while (0)
+
+#define PARENT_FAIL_IF(x, sync)						\
+	do {								\
+		if (x) {						\
+			fprintf(stderr,					\
+				"[FAIL] Test FAILED on line %d\n", __LINE__); \
+			(sync)->parent_gave_up = true;			\
+			prod_child(sync);				\
+			return 1;					\
+		}							\
+	} while (0)
+
+#define PARENT_SKIP_IF_UNSUPPORTED(x, sync)				\
+	do {								\
+		if ((x) == -1 && (errno == ENODEV || errno == EINVAL)) { \
+			(sync)->parent_gave_up = true;			\
+			prod_child(sync);				\
+			SKIP_IF(1);					\
+		}							\
+	} while (0)
+
+int init_child_sync(struct child_sync *sync)
+{
+	int ret;
+
+	ret = sem_init(&sync->sem_parent, 1, 0);
+	if (ret) {
+		perror("Semaphore initialization failed");
+		return 1;
+	}
+
+	ret = sem_init(&sync->sem_child, 1, 0);
+	if (ret) {
+		perror("Semaphore initialization failed");
+		return 1;
+	}
+
+	return 0;
+}
+
+void destroy_child_sync(struct child_sync *sync)
+{
+	sem_destroy(&sync->sem_parent);
+	sem_destroy(&sync->sem_child);
+}
+
+int wait_child(struct child_sync *sync)
+{
+	int ret;
+
+	/* Wait until the child prods us. */
+	ret = sem_wait(&sync->sem_parent);
+	if (ret) {
+		perror("Error waiting for child");
+		return 1;
+	}
+
+	return sync->child_gave_up;
+}
+
+int prod_child(struct child_sync *sync)
+{
+	int ret;
+
+	/* Unblock the child now. */
+	ret = sem_post(&sync->sem_child);
+	if (ret) {
+		perror("Error prodding child");
+		return 1;
+	}
+
+	return 0;
+}
+
+int wait_parent(struct child_sync *sync)
+{
+	int ret;
+
+	/* Wait until the parent prods us. */
+	ret = sem_wait(&sync->sem_child);
+	if (ret) {
+		perror("Error waiting for parent");
+		return 1;
+	}
+
+	return sync->parent_gave_up;
+}
+
+int prod_parent(struct child_sync *sync)
+{
+	int ret;
+
+	/* Unblock the parent now. */
+	ret = sem_post(&sync->sem_parent);
+	if (ret) {
+		perror("Error prodding parent");
+		return 1;
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
new file mode 100644
index 0000000000000..5cf631f792cc7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
@@ -0,0 +1,327 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Ptrace test for Memory Protection Key registers
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ * Copyright (C) 2018 IBM Corporation.
+ */
+#include "ptrace.h"
+#include "child.h"
+
+#ifndef __NR_pkey_alloc
+#define __NR_pkey_alloc		384
+#endif
+
+#ifndef __NR_pkey_free
+#define __NR_pkey_free		385
+#endif
+
+#ifndef NT_PPC_PKEY
+#define NT_PPC_PKEY		0x110
+#endif
+
+#ifndef PKEY_DISABLE_EXECUTE
+#define PKEY_DISABLE_EXECUTE	0x4
+#endif
+
+#define AMR_BITS_PER_PKEY 2
+#define PKEY_REG_BITS (sizeof(u64) * 8)
+#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY))
+
+static const char user_read[] = "[User Read (Running)]";
+static const char user_write[] = "[User Write (Running)]";
+static const char ptrace_read_running[] = "[Ptrace Read (Running)]";
+static const char ptrace_write_running[] = "[Ptrace Write (Running)]";
+
+/* Information shared between the parent and the child. */
+struct shared_info {
+	struct child_sync child_sync;
+
+	/* AMR value the parent expects to read from the child. */
+	unsigned long amr1;
+
+	/* AMR value the parent is expected to write to the child. */
+	unsigned long amr2;
+
+	/* AMR value that ptrace should refuse to write to the child. */
+	unsigned long amr3;
+
+	/* IAMR value the parent expects to read from the child. */
+	unsigned long expected_iamr;
+
+	/* UAMOR value the parent expects to read from the child. */
+	unsigned long expected_uamor;
+
+	/*
+	 * IAMR and UAMOR values that ptrace should refuse to write to the child
+	 * (even though they're valid ones) because userspace doesn't have
+	 * access to those registers.
+	 */
+	unsigned long new_iamr;
+	unsigned long new_uamor;
+};
+
+static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
+{
+	return syscall(__NR_pkey_alloc, flags, init_access_rights);
+}
+
+static int sys_pkey_free(int pkey)
+{
+	return syscall(__NR_pkey_free, pkey);
+}
+
+static int child(struct shared_info *info)
+{
+	unsigned long reg;
+	bool disable_execute = true;
+	int pkey1, pkey2, pkey3;
+	int ret;
+
+	/* Wait until parent fills out the initial register values. */
+	ret = wait_parent(&info->child_sync);
+	if (ret)
+		return ret;
+
+	/* Get some pkeys so that we can change their bits in the AMR. */
+	pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE);
+	if (pkey1 < 0) {
+		pkey1 = sys_pkey_alloc(0, 0);
+		CHILD_FAIL_IF(pkey1 < 0, &info->child_sync);
+
+		disable_execute = false;
+	}
+
+	pkey2 = sys_pkey_alloc(0, 0);
+	CHILD_FAIL_IF(pkey2 < 0, &info->child_sync);
+
+	pkey3 = sys_pkey_alloc(0, 0);
+	CHILD_FAIL_IF(pkey3 < 0, &info->child_sync);
+
+	info->amr1 |= 3ul << pkeyshift(pkey1);
+	info->amr2 |= 3ul << pkeyshift(pkey2);
+	info->amr3 |= info->amr2 | 3ul << pkeyshift(pkey3);
+
+	if (disable_execute)
+		info->expected_iamr |= 1ul << pkeyshift(pkey1);
+
+	info->expected_uamor |= 3ul << pkeyshift(pkey1) |
+				3ul << pkeyshift(pkey2);
+	info->new_iamr |= 1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2);
+	info->new_uamor |= 3ul << pkeyshift(pkey1);
+
+	/*
+	 * We won't use pkey3. We just want a plausible but invalid key to test
+	 * whether ptrace will let us write to AMR bits we are not supposed to.
+	 *
+	 * This also tests whether the kernel restores the UAMOR permissions
+	 * after a key is freed.
+	 */
+	sys_pkey_free(pkey3);
+
+	printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
+	       user_write, info->amr1, pkey1, pkey2, pkey3);
+
+	mtspr(SPRN_AMR, info->amr1);
+
+	/* Wait for parent to read our AMR value and write a new one. */
+	ret = prod_parent(&info->child_sync);
+	CHILD_FAIL_IF(ret, &info->child_sync);
+
+	ret = wait_parent(&info->child_sync);
+	if (ret)
+		return ret;
+
+	reg = mfspr(SPRN_AMR);
+
+	printf("%-30s AMR: %016lx\n", user_read, reg);
+
+	CHILD_FAIL_IF(reg != info->amr2, &info->child_sync);
+
+	/*
+	 * Wait for parent to try to write an invalid AMR value.
+	 */
+	ret = prod_parent(&info->child_sync);
+	CHILD_FAIL_IF(ret, &info->child_sync);
+
+	ret = wait_parent(&info->child_sync);
+	if (ret)
+		return ret;
+
+	reg = mfspr(SPRN_AMR);
+
+	printf("%-30s AMR: %016lx\n", user_read, reg);
+
+	CHILD_FAIL_IF(reg != info->amr2, &info->child_sync);
+
+	/*
+	 * Wait for parent to try to write an IAMR and a UAMOR value. We can't
+	 * verify them, but we can verify that the AMR didn't change.
+	 */
+	ret = prod_parent(&info->child_sync);
+	CHILD_FAIL_IF(ret, &info->child_sync);
+
+	ret = wait_parent(&info->child_sync);
+	if (ret)
+		return ret;
+
+	reg = mfspr(SPRN_AMR);
+
+	printf("%-30s AMR: %016lx\n", user_read, reg);
+
+	CHILD_FAIL_IF(reg != info->amr2, &info->child_sync);
+
+	/* Now let parent now that we are finished. */
+
+	ret = prod_parent(&info->child_sync);
+	CHILD_FAIL_IF(ret, &info->child_sync);
+
+	return TEST_PASS;
+}
+
+static int parent(struct shared_info *info, pid_t pid)
+{
+	unsigned long regs[3];
+	int ret, status;
+
+	/*
+	 * Get the initial values for AMR, IAMR and UAMOR and communicate them
+	 * to the child.
+	 */
+	ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
+	PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	info->amr1 = info->amr2 = info->amr3 = regs[0];
+	info->expected_iamr = info->new_iamr = regs[1];
+	info->expected_uamor = info->new_uamor = regs[2];
+
+	/* Wake up child so that it can set itself up. */
+	ret = prod_child(&info->child_sync);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	ret = wait_child(&info->child_sync);
+	if (ret)
+		return ret;
+
+	/* Verify that we can read the pkey registers from the child. */
+	ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
+	       ptrace_read_running, regs[0], regs[1], regs[2]);
+
+	PARENT_FAIL_IF(regs[0] != info->amr1, &info->child_sync);
+	PARENT_FAIL_IF(regs[1] != info->expected_iamr, &info->child_sync);
+	PARENT_FAIL_IF(regs[2] != info->expected_uamor, &info->child_sync);
+
+	/* Write valid AMR value in child. */
+	ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->amr2, 1);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr2);
+
+	/* Wake up child so that it can verify it changed. */
+	ret = prod_child(&info->child_sync);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	ret = wait_child(&info->child_sync);
+	if (ret)
+		return ret;
+
+	/* Write invalid AMR value in child. */
+	ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->amr3, 1);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr3);
+
+	/* Wake up child so that it can verify it didn't change. */
+	ret = prod_child(&info->child_sync);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	ret = wait_child(&info->child_sync);
+	if (ret)
+		return ret;
+
+	/* Try to write to IAMR. */
+	regs[0] = info->amr1;
+	regs[1] = info->new_iamr;
+	ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 2);
+	PARENT_FAIL_IF(!ret, &info->child_sync);
+
+	printf("%-30s AMR: %016lx IAMR: %016lx\n",
+	       ptrace_write_running, regs[0], regs[1]);
+
+	/* Try to write to IAMR and UAMOR. */
+	regs[2] = info->new_uamor;
+	ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 3);
+	PARENT_FAIL_IF(!ret, &info->child_sync);
+
+	printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
+	       ptrace_write_running, regs[0], regs[1], regs[2]);
+
+	/* Verify that all registers still have their expected values. */
+	ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
+	       ptrace_read_running, regs[0], regs[1], regs[2]);
+
+	PARENT_FAIL_IF(regs[0] != info->amr2, &info->child_sync);
+	PARENT_FAIL_IF(regs[1] != info->expected_iamr, &info->child_sync);
+	PARENT_FAIL_IF(regs[2] != info->expected_uamor, &info->child_sync);
+
+	/* Wake up child so that it can verify AMR didn't change and wrap up. */
+	ret = prod_child(&info->child_sync);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	ret = wait(&status);
+	if (ret != pid) {
+		printf("Child's exit status not captured\n");
+		ret = TEST_PASS;
+	} else if (!WIFEXITED(status)) {
+		printf("Child exited abnormally\n");
+		ret = TEST_FAIL;
+	} else
+		ret = WEXITSTATUS(status) ? TEST_FAIL : TEST_PASS;
+
+	return ret;
+}
+
+static int ptrace_pkey(void)
+{
+	struct shared_info *info;
+	int shm_id;
+	int ret;
+	pid_t pid;
+
+	shm_id = shmget(IPC_PRIVATE, sizeof(*info), 0777 | IPC_CREAT);
+	info = shmat(shm_id, NULL, 0);
+
+	ret = init_child_sync(&info->child_sync);
+	if (ret)
+		return ret;
+
+	pid = fork();
+	if (pid < 0) {
+		perror("fork() failed");
+		ret = TEST_FAIL;
+	} else if (pid == 0)
+		ret = child(info);
+	else
+		ret = parent(info, pid);
+
+	shmdt(info);
+
+	if (pid) {
+		destroy_child_sync(&info->child_sync);
+		shmctl(shm_id, IPC_RMID, NULL);
+	}
+
+	return ret;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(ptrace_pkey, "ptrace_pkey");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace.h b/tools/testing/selftests/powerpc/ptrace/ptrace.h
index 19fb825270a18..34201cfa83350 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace.h
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace.h
@@ -102,6 +102,44 @@ int cont_trace(pid_t child)
 	return TEST_PASS;
 }
 
+int ptrace_read_regs(pid_t child, unsigned long type, unsigned long regs[],
+		     int n)
+{
+	struct iovec iov;
+	long ret;
+
+	FAIL_IF(start_trace(child));
+
+	iov.iov_base = regs;
+	iov.iov_len = n * sizeof(unsigned long);
+
+	ret = ptrace(PTRACE_GETREGSET, child, type, &iov);
+	if (ret)
+		return ret;
+
+	FAIL_IF(stop_trace(child));
+
+	return TEST_PASS;
+}
+
+long ptrace_write_regs(pid_t child, unsigned long type, unsigned long regs[],
+		       int n)
+{
+	struct iovec iov;
+	long ret;
+
+	FAIL_IF(start_trace(child));
+
+	iov.iov_base = regs;
+	iov.iov_len = n * sizeof(unsigned long);
+
+	ret = ptrace(PTRACE_SETREGSET, child, type, &iov);
+
+	FAIL_IF(stop_trace(child));
+
+	return ret;
+}
+
 /* TAR, PPR, DSCR */
 int show_tar_registers(pid_t child, unsigned long *out)
 {

From 39b91dd625f15438859204a489ab62ef41d2be39 Mon Sep 17 00:00:00 2001
From: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Date: Thu, 24 May 2018 23:11:45 -0300
Subject: [PATCH 129/251] selftests/powerpc: Add core file test for Protection
 Key registers

This test verifies that the AMR, IAMR and UAMOR are being written to a
process' core file.

Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
[mpe: Simplify make rule]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 .../testing/selftests/powerpc/ptrace/Makefile |   6 +-
 .../selftests/powerpc/ptrace/core-pkey.c      | 461 ++++++++++++++++++
 2 files changed, 464 insertions(+), 3 deletions(-)
 create mode 100644 tools/testing/selftests/powerpc/ptrace/core-pkey.c

diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile
index bd8959854aec3..4f59575389087 100644
--- a/tools/testing/selftests/powerpc/ptrace/Makefile
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
               ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \
-              ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey
+              ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey
 
 include ../../lib.mk
 
@@ -9,8 +9,8 @@ all: $(TEST_PROGS)
 
 CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie
 
-ptrace-pkey: child.h
-ptrace-pkey: LDLIBS += -pthread
+ptrace-pkey core-pkey: child.h
+ptrace-pkey core-pkey: LDLIBS += -pthread
 
 $(TEST_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h
 
diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
new file mode 100644
index 0000000000000..36bc312b1f5ca
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
@@ -0,0 +1,461 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Ptrace test for Memory Protection Key registers
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ * Copyright (C) 2018 IBM Corporation.
+ */
+#include <limits.h>
+#include <linux/kernel.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "ptrace.h"
+#include "child.h"
+
+#ifndef __NR_pkey_alloc
+#define __NR_pkey_alloc		384
+#endif
+
+#ifndef __NR_pkey_free
+#define __NR_pkey_free		385
+#endif
+
+#ifndef NT_PPC_PKEY
+#define NT_PPC_PKEY		0x110
+#endif
+
+#ifndef PKEY_DISABLE_EXECUTE
+#define PKEY_DISABLE_EXECUTE	0x4
+#endif
+
+#define AMR_BITS_PER_PKEY 2
+#define PKEY_REG_BITS (sizeof(u64) * 8)
+#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY))
+
+#define CORE_FILE_LIMIT	(5 * 1024 * 1024)	/* 5 MB should be enough */
+
+static const char core_pattern_file[] = "/proc/sys/kernel/core_pattern";
+
+static const char user_write[] = "[User Write (Running)]";
+static const char core_read_running[] = "[Core Read (Running)]";
+
+/* Information shared between the parent and the child. */
+struct shared_info {
+	struct child_sync child_sync;
+
+	/* AMR value the parent expects to read in the core file. */
+	unsigned long amr;
+
+	/* IAMR value the parent expects to read in the core file. */
+	unsigned long iamr;
+
+	/* UAMOR value the parent expects to read in the core file. */
+	unsigned long uamor;
+
+	/* When the child crashed. */
+	time_t core_time;
+};
+
+static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
+{
+	return syscall(__NR_pkey_alloc, flags, init_access_rights);
+}
+
+static int sys_pkey_free(int pkey)
+{
+	return syscall(__NR_pkey_free, pkey);
+}
+
+static int increase_core_file_limit(void)
+{
+	struct rlimit rlim;
+	int ret;
+
+	ret = getrlimit(RLIMIT_CORE, &rlim);
+	FAIL_IF(ret);
+
+	if (rlim.rlim_cur != RLIM_INFINITY && rlim.rlim_cur < CORE_FILE_LIMIT) {
+		rlim.rlim_cur = CORE_FILE_LIMIT;
+
+		if (rlim.rlim_max != RLIM_INFINITY &&
+		    rlim.rlim_max < CORE_FILE_LIMIT)
+			rlim.rlim_max = CORE_FILE_LIMIT;
+
+		ret = setrlimit(RLIMIT_CORE, &rlim);
+		FAIL_IF(ret);
+	}
+
+	ret = getrlimit(RLIMIT_FSIZE, &rlim);
+	FAIL_IF(ret);
+
+	if (rlim.rlim_cur != RLIM_INFINITY && rlim.rlim_cur < CORE_FILE_LIMIT) {
+		rlim.rlim_cur = CORE_FILE_LIMIT;
+
+		if (rlim.rlim_max != RLIM_INFINITY &&
+		    rlim.rlim_max < CORE_FILE_LIMIT)
+			rlim.rlim_max = CORE_FILE_LIMIT;
+
+		ret = setrlimit(RLIMIT_FSIZE, &rlim);
+		FAIL_IF(ret);
+	}
+
+	return TEST_PASS;
+}
+
+static int child(struct shared_info *info)
+{
+	bool disable_execute = true;
+	int pkey1, pkey2, pkey3;
+	int *ptr, ret;
+
+	/* Wait until parent fills out the initial register values. */
+	ret = wait_parent(&info->child_sync);
+	if (ret)
+		return ret;
+
+	ret = increase_core_file_limit();
+	FAIL_IF(ret);
+
+	/* Get some pkeys so that we can change their bits in the AMR. */
+	pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE);
+	if (pkey1 < 0) {
+		pkey1 = sys_pkey_alloc(0, 0);
+		FAIL_IF(pkey1 < 0);
+
+		disable_execute = false;
+	}
+
+	pkey2 = sys_pkey_alloc(0, 0);
+	FAIL_IF(pkey2 < 0);
+
+	pkey3 = sys_pkey_alloc(0, 0);
+	FAIL_IF(pkey3 < 0);
+
+	info->amr |= 3ul << pkeyshift(pkey1) | 2ul << pkeyshift(pkey2);
+
+	if (disable_execute)
+		info->iamr |= 1ul << pkeyshift(pkey1);
+
+	info->uamor |= 3ul << pkeyshift(pkey1) | 3ul << pkeyshift(pkey2);
+
+	printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
+	       user_write, info->amr, pkey1, pkey2, pkey3);
+
+	mtspr(SPRN_AMR, info->amr);
+
+	/*
+	 * We won't use pkey3. This tests whether the kernel restores the UAMOR
+	 * permissions after a key is freed.
+	 */
+	sys_pkey_free(pkey3);
+
+	info->core_time = time(NULL);
+
+	/* Crash. */
+	ptr = 0;
+	*ptr = 1;
+
+	/* Shouldn't get here. */
+	FAIL_IF(true);
+
+	return TEST_FAIL;
+}
+
+/* Return file size if filename exists and pass sanity check, or zero if not. */
+static off_t try_core_file(const char *filename, struct shared_info *info,
+			   pid_t pid)
+{
+	struct stat buf;
+	int ret;
+
+	ret = stat(filename, &buf);
+	if (ret == -1)
+		return TEST_FAIL;
+
+	/* Make sure we're not using a stale core file. */
+	return buf.st_mtime >= info->core_time ? buf.st_size : TEST_FAIL;
+}
+
+static Elf64_Nhdr *next_note(Elf64_Nhdr *nhdr)
+{
+	return (void *) nhdr + sizeof(*nhdr) +
+		__ALIGN_KERNEL(nhdr->n_namesz, 4) +
+		__ALIGN_KERNEL(nhdr->n_descsz, 4);
+}
+
+static int check_core_file(struct shared_info *info, Elf64_Ehdr *ehdr,
+			   off_t core_size)
+{
+	unsigned long *regs;
+	Elf64_Phdr *phdr;
+	Elf64_Nhdr *nhdr;
+	size_t phdr_size;
+	void *p = ehdr, *note;
+	int ret;
+
+	ret = memcmp(ehdr->e_ident, ELFMAG, SELFMAG);
+	FAIL_IF(ret);
+
+	FAIL_IF(ehdr->e_type != ET_CORE);
+	FAIL_IF(ehdr->e_machine != EM_PPC64);
+	FAIL_IF(ehdr->e_phoff == 0 || ehdr->e_phnum == 0);
+
+	/*
+	 * e_phnum is at most 65535 so calculating the size of the
+	 * program header cannot overflow.
+	 */
+	phdr_size = sizeof(*phdr) * ehdr->e_phnum;
+
+	/* Sanity check the program header table location. */
+	FAIL_IF(ehdr->e_phoff + phdr_size < ehdr->e_phoff);
+	FAIL_IF(ehdr->e_phoff + phdr_size > core_size);
+
+	/* Find the PT_NOTE segment. */
+	for (phdr = p + ehdr->e_phoff;
+	     (void *) phdr < p + ehdr->e_phoff + phdr_size;
+	     phdr += ehdr->e_phentsize)
+		if (phdr->p_type == PT_NOTE)
+			break;
+
+	FAIL_IF((void *) phdr >= p + ehdr->e_phoff + phdr_size);
+
+	/* Find the NT_PPC_PKEY note. */
+	for (nhdr = p + phdr->p_offset;
+	     (void *) nhdr < p + phdr->p_offset + phdr->p_filesz;
+	     nhdr = next_note(nhdr))
+		if (nhdr->n_type == NT_PPC_PKEY)
+			break;
+
+	FAIL_IF((void *) nhdr >= p + phdr->p_offset + phdr->p_filesz);
+	FAIL_IF(nhdr->n_descsz == 0);
+
+	p = nhdr;
+	note = p + sizeof(*nhdr) + __ALIGN_KERNEL(nhdr->n_namesz, 4);
+
+	regs = (unsigned long *) note;
+
+	printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
+	       core_read_running, regs[0], regs[1], regs[2]);
+
+	FAIL_IF(regs[0] != info->amr);
+	FAIL_IF(regs[1] != info->iamr);
+	FAIL_IF(regs[2] != info->uamor);
+
+	return TEST_PASS;
+}
+
+static int parent(struct shared_info *info, pid_t pid)
+{
+	char *filenames, *filename[3];
+	int fd, i, ret, status;
+	unsigned long regs[3];
+	off_t core_size;
+	void *core;
+
+	/*
+	 * Get the initial values for AMR, IAMR and UAMOR and communicate them
+	 * to the child.
+	 */
+	ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
+	PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	info->amr = regs[0];
+	info->iamr = regs[1];
+	info->uamor = regs[2];
+
+	/* Wake up child so that it can set itself up. */
+	ret = prod_child(&info->child_sync);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	ret = wait(&status);
+	if (ret != pid) {
+		printf("Child's exit status not captured\n");
+		return TEST_FAIL;
+	} else if (!WIFSIGNALED(status) || !WCOREDUMP(status)) {
+		printf("Child didn't dump core\n");
+		return TEST_FAIL;
+	}
+
+	/* Construct array of core file names to try. */
+
+	filename[0] = filenames = malloc(PATH_MAX);
+	if (!filenames) {
+		perror("Error allocating memory");
+		return TEST_FAIL;
+	}
+
+	ret = snprintf(filename[0], PATH_MAX, "core-pkey.%d", pid);
+	if (ret < 0 || ret >= PATH_MAX) {
+		ret = TEST_FAIL;
+		goto out;
+	}
+
+	filename[1] = filename[0] + ret + 1;
+	ret = snprintf(filename[1], PATH_MAX - ret - 1, "core.%d", pid);
+	if (ret < 0 || ret >= PATH_MAX - ret - 1) {
+		ret = TEST_FAIL;
+		goto out;
+	}
+	filename[2] = "core";
+
+	for (i = 0; i < 3; i++) {
+		core_size = try_core_file(filename[i], info, pid);
+		if (core_size != TEST_FAIL)
+			break;
+	}
+
+	if (i == 3) {
+		printf("Couldn't find core file\n");
+		ret = TEST_FAIL;
+		goto out;
+	}
+
+	fd = open(filename[i], O_RDONLY);
+	if (fd == -1) {
+		perror("Error opening core file");
+		ret = TEST_FAIL;
+		goto out;
+	}
+
+	core = mmap(NULL, core_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	if (core == (void *) -1) {
+		perror("Error mmaping core file");
+		ret = TEST_FAIL;
+		goto out;
+	}
+
+	ret = check_core_file(info, core, core_size);
+
+	munmap(core, core_size);
+	close(fd);
+	unlink(filename[i]);
+
+ out:
+	free(filenames);
+
+	return ret;
+}
+
+static int write_core_pattern(const char *core_pattern)
+{
+	size_t len = strlen(core_pattern), ret;
+	FILE *f;
+
+	f = fopen(core_pattern_file, "w");
+	if (!f) {
+		perror("Error writing to core_pattern file");
+		return TEST_FAIL;
+	}
+
+	ret = fwrite(core_pattern, 1, len, f);
+	fclose(f);
+	if (ret != len) {
+		perror("Error writing to core_pattern file");
+		return TEST_FAIL;
+	}
+
+	return TEST_PASS;
+}
+
+static int setup_core_pattern(char **core_pattern_, bool *changed_)
+{
+	FILE *f;
+	char *core_pattern;
+	int ret;
+
+	core_pattern = malloc(PATH_MAX);
+	if (!core_pattern) {
+		perror("Error allocating memory");
+		return TEST_FAIL;
+	}
+
+	f = fopen(core_pattern_file, "r");
+	if (!f) {
+		perror("Error opening core_pattern file");
+		ret = TEST_FAIL;
+		goto out;
+	}
+
+	ret = fread(core_pattern, 1, PATH_MAX, f);
+	fclose(f);
+	if (!ret) {
+		perror("Error reading core_pattern file");
+		ret = TEST_FAIL;
+		goto out;
+	}
+
+	/* Check whether we can predict the name of the core file. */
+	if (!strcmp(core_pattern, "core") || !strcmp(core_pattern, "core.%p"))
+		*changed_ = false;
+	else {
+		ret = write_core_pattern("core-pkey.%p");
+		if (ret)
+			goto out;
+
+		*changed_ = true;
+	}
+
+	*core_pattern_ = core_pattern;
+	ret = TEST_PASS;
+
+ out:
+	if (ret)
+		free(core_pattern);
+
+	return ret;
+}
+
+static int core_pkey(void)
+{
+	char *core_pattern;
+	bool changed_core_pattern;
+	struct shared_info *info;
+	int shm_id;
+	int ret;
+	pid_t pid;
+
+	ret = setup_core_pattern(&core_pattern, &changed_core_pattern);
+	if (ret)
+		return ret;
+
+	shm_id = shmget(IPC_PRIVATE, sizeof(*info), 0777 | IPC_CREAT);
+	info = shmat(shm_id, NULL, 0);
+
+	ret = init_child_sync(&info->child_sync);
+	if (ret)
+		return ret;
+
+	pid = fork();
+	if (pid < 0) {
+		perror("fork() failed");
+		ret = TEST_FAIL;
+	} else if (pid == 0)
+		ret = child(info);
+	else
+		ret = parent(info, pid);
+
+	shmdt(info);
+
+	if (pid) {
+		destroy_child_sync(&info->child_sync);
+		shmctl(shm_id, IPC_RMID, NULL);
+
+		if (changed_core_pattern)
+			write_core_pattern(core_pattern);
+	}
+
+	free(core_pattern);
+
+	return ret;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(core_pkey, "core_pkey");
+}

From 5e3f0d15ae5f95bdde8d092a0884d2defe27d448 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Tue, 22 May 2018 14:38:20 +0530
Subject: [PATCH 130/251] powerpc/livepatch: Fix build error with kprobes
 disabled.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

arch/powerpc/kernel/stacktrace.c: In function ‘save_stack_trace_tsk_reliable’:
arch/powerpc/kernel/stacktrace.c:176:28: error: ‘kretprobe_trampoline’ undeclared
   if (ip == (unsigned long)kretprobe_trampoline)
                            ^~~~~~~~~~~~~~~~~~~~

Fixes: df78d3f61480 ("powerpc/livepatch: Implement reliable stack tracing for the consistency model")
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/stacktrace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 26a50603177c3..8dd6ba0c7d353 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -168,13 +168,14 @@ save_stack_trace_tsk_reliable(struct task_struct *tsk,
 		 * arch-dependent code, they are generic.
 		 */
 		ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, NULL);
-
+#ifdef CONFIG_KPROBES
 		/*
 		 * Mark stacktraces with kretprobed functions on them
 		 * as unreliable.
 		 */
 		if (ip == (unsigned long)kretprobe_trampoline)
 			return 1;
+#endif
 
 		if (!trace->skip)
 			trace->entries[trace->nr_entries++] = ip;

From 4bf4f42a2febb449a5cc5d79e7c58e5b168942d7 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 30 May 2018 22:19:19 +1000
Subject: [PATCH 131/251] powerpc/kbuild: Set default generic machine type for
 32-bit compile

Some 64-bit toolchains uses the wrong ISA variant for compiling 32-bit
kernels, even with -m32. Debian's powerpc64le is one such case, and
that is because it is built with --with-cpu=power8.

So when cross compiling a 32-bit kernel with a 64-bit toolchain, set
-mcpu=powerpc initially, which is the generic 32-bit powerpc machine
type and scheduling model. CPU and platform code can override this
with subsequent -mcpu flags if necessary.

This is not done for 32-bit toolchains otherwise it would override
their defaults, which are presumably set appropriately for the
environment (moreso than a 64-bit cross compiler).

This fixes a lot of build failures due to incompatible assembly when
compiling 32-bit kernel with the Debian powerpc64le 64-bit toolchain.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Makefile | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 95813df908012..d628724087c60 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -24,6 +24,14 @@ ifeq ($(HAS_BIARCH),y)
 ifeq ($(CROSS32_COMPILE),)
 CROSS32CC	:= $(CC) -m32
 KBUILD_ARFLAGS	+= --target=elf32-powerpc
+ifdef CONFIG_PPC32
+# These options will be overridden by any -mcpu option that the CPU
+# or platform code sets later on the command line, but they are needed
+# to set a sane 32-bit cpu target for the 64-bit cross compiler which
+# may default to the wrong ISA.
+KBUILD_CFLAGS		+= -mcpu=powerpc
+KBUILD_AFLAGS		+= -mcpu=powerpc
+endif
 endif
 endif
 

From af3901cbbd3de182aafb8ee553c825c0074df6a2 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 30 May 2018 22:19:20 +1000
Subject: [PATCH 132/251] powerpc/kbuild: Remove CROSS32 defines from top level
 powerpc Makefile

Switch VDSO32 build over to use CROSS32_COMPILE directly, and have
it pass in -m32 after the standard c_flags. This allows endianness
overrides to be removed and the endian and bitness flags moved into
standard flags variables.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Makefile               |  7 -------
 arch/powerpc/boot/Makefile          | 16 +++++++++++-----
 arch/powerpc/kernel/vdso32/Makefile | 15 +++++++++++----
 3 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index d628724087c60..167b26a0780c1 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -17,13 +17,8 @@ HAS_BIARCH	:= $(call cc-option-yn, -m32)
 # Set default 32 bits cross compilers for vdso and boot wrapper
 CROSS32_COMPILE ?=
 
-CROSS32CC		:= $(CROSS32_COMPILE)gcc
-CROSS32AR		:= $(CROSS32_COMPILE)ar
-
 ifeq ($(HAS_BIARCH),y)
 ifeq ($(CROSS32_COMPILE),)
-CROSS32CC	:= $(CC) -m32
-KBUILD_ARFLAGS	+= --target=elf32-powerpc
 ifdef CONFIG_PPC32
 # These options will be overridden by any -mcpu option that the CPU
 # or platform code sets later on the command line, but they are needed
@@ -35,8 +30,6 @@ endif
 endif
 endif
 
-export CROSS32CC CROSS32AR
-
 ifeq ($(CROSS_COMPILE),)
 KBUILD_DEFCONFIG := $(shell uname -m)_defconfig
 else
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 26d5d2a5b8e99..49767e06202cc 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -23,19 +23,23 @@ all: $(obj)/zImage
 compress-$(CONFIG_KERNEL_GZIP) := CONFIG_KERNEL_GZIP
 compress-$(CONFIG_KERNEL_XZ)   := CONFIG_KERNEL_XZ
 
+ifdef CROSS32_COMPILE
+    BOOTCC := $(CROSS32_COMPILE)gcc
+    BOOTAR := $(CROSS32_COMPILE)ar
+else
+    BOOTCC := $(CC)
+    BOOTAR := $(AR)
+endif
+
 BOOTCFLAGS    := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
 		 -fno-strict-aliasing -Os -msoft-float -pipe \
 		 -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
 		 -D$(compress-y)
 
-BOOTCC := $(CC)
 ifdef CONFIG_PPC64_BOOT_WRAPPER
 BOOTCFLAGS	+= -m64
 else
 BOOTCFLAGS	+= -m32
-ifdef CROSS32_COMPILE
-    BOOTCC := $(CROSS32_COMPILE)gcc
-endif
 endif
 
 BOOTCFLAGS	+= -isystem $(shell $(BOOTCC) -print-file-name=include)
@@ -49,6 +53,8 @@ endif
 
 BOOTAFLAGS	:= -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc
 
+BOOTARFLAGS	:= -cr$(KBUILD_ARFLAGS)
+
 ifdef CONFIG_DEBUG_INFO
 BOOTCFLAGS	+= -g
 endif
@@ -202,7 +208,7 @@ quiet_cmd_bootas = BOOTAS  $@
       cmd_bootas = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $<
 
 quiet_cmd_bootar = BOOTAR  $@
-      cmd_bootar = $(CROSS32AR) -cr$(KBUILD_ARFLAGS) $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@
+      cmd_bootar = $(BOOTAR) $(BOOTARFLAGS) $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@
 
 $(obj-libfdt): $(obj)/%.o: $(srctree)/scripts/dtc/libfdt/%.c FORCE
 	$(call if_changed_dep,bootcc)
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index b8c434d1d4591..50112d4473bb9 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -8,8 +8,15 @@ obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o \
 
 # Build rules
 
-ifeq ($(CONFIG_PPC32),y)
-CROSS32CC := $(CC)
+ifdef CROSS32_COMPILE
+    VDSOCC := $(CROSS32_COMPILE)gcc
+else
+    VDSOCC := $(CC)
+endif
+
+CC32FLAGS :=
+ifdef CONFIG_PPC64
+CC32FLAGS += -m32
 endif
 
 targets := $(obj-vdso32) vdso32.so vdso32.so.dbg
@@ -45,9 +52,9 @@ $(obj-vdso32): %.o: %.S FORCE
 
 # actual build commands
 quiet_cmd_vdso32ld = VDSO32L $@
-      cmd_vdso32ld = $(CROSS32CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
+      cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
 quiet_cmd_vdso32as = VDSO32A $@
-      cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $<
+      cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $<
 
 # install commands for the unstripped file
 quiet_cmd_vdso_install = INSTALL $@

From 1421dc6d48296a9e91702743b31458d337610aa6 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 30 May 2018 22:19:21 +1000
Subject: [PATCH 133/251] powerpc/kbuild: Use flags variables rather than
 overriding LD/CC/AS

The powerpc toolchain can compile combinations of 32/64 bit and
big/little endian, so it's convenient to consider, e.g.,

  `CC -m64 -mbig-endian`

To be the C compiler for the purpose of invoking it to build target
artifacts. So overriding the CC variable to include these flags works
for this purpose.

Unfortunately that is not compatible with the way the proposed new
Kconfig macro language will work.

After previous patches in this series, these flags can be carefully
passed in using flags instead.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Makefile                          | 16 +++++++++-------
 .../powerpc/tools/gcc-check-mprofile-kernel.sh | 12 ++++++++----
 scripts/recordmcount.pl                        | 18 +++++++++++++++++-
 3 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 167b26a0780c1..6faf1d6ad9dd2 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -75,13 +75,15 @@ endif
 endif
 
 ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-override LD	+= -EL
+KBUILD_CFLAGS	+= -mlittle-endian
+LDFLAGS		+= -EL
 LDEMULATION	:= lppc
 GNUTARGET	:= powerpcle
 MULTIPLEWORD	:= -mno-multiple
 KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-save-toc-indirect)
 else
-override LD	+= -EB
+KBUILD_CFLAGS += $(call cc-option,-mbig-endian)
+LDFLAGS		+= -EB
 LDEMULATION	:= ppc
 GNUTARGET	:= powerpc
 MULTIPLEWORD	:= -mmultiple
@@ -94,19 +96,19 @@ aflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mabi=elfv1)
 aflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mabi=elfv2
 endif
 
-cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mlittle-endian
-cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mbig-endian)
 ifneq ($(cc-name),clang)
   cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mno-strict-align
 endif
 
+cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mbig-endian)
+cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mlittle-endian
 aflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mbig-endian)
 aflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mlittle-endian
 
 ifeq ($(HAS_BIARCH),y)
-override AS	+= -a$(BITS)
-override LD	+= -m elf$(BITS)$(LDEMULATION)
-override CC	+= -m$(BITS)
+KBUILD_CFLAGS	+= -m$(BITS)
+KBUILD_AFLAGS	+= -m$(BITS) -Wl,-a$(BITS)
+LDFLAGS		+= -m elf$(BITS)$(LDEMULATION)
 KBUILD_ARFLAGS	+= --target=elf$(BITS)-$(GNUTARGET)
 endif
 
diff --git a/arch/powerpc/tools/gcc-check-mprofile-kernel.sh b/arch/powerpc/tools/gcc-check-mprofile-kernel.sh
index 061f8035bdbea..a7dd0e5d9f984 100755
--- a/arch/powerpc/tools/gcc-check-mprofile-kernel.sh
+++ b/arch/powerpc/tools/gcc-check-mprofile-kernel.sh
@@ -7,17 +7,21 @@ set -o pipefail
 # To debug, uncomment the following line
 # set -x
 
+# -mprofile-kernel is only supported on 64le, so this should not be invoked
+# for other targets. Therefore we can pass in -m64 and -mlittle-endian
+# explicitly, to take care of toolchains defaulting to other targets.
+
 # Test whether the compile option -mprofile-kernel exists and generates
 # profiling code (ie. a call to _mcount()).
 echo "int func() { return 0; }" | \
-    $* -S -x c -O2 -p -mprofile-kernel - -o - 2> /dev/null | \
-    grep -q "_mcount"
+    $* -m64 -mlittle-endian -S -x c -O2 -p -mprofile-kernel - -o - \
+    2> /dev/null | grep -q "_mcount"
 
 # Test whether the notrace attribute correctly suppresses calls to _mcount().
 
 echo -e "#include <linux/compiler.h>\nnotrace int func() { return 0; }" | \
-    $* -S -x c -O2 -p -mprofile-kernel - -o - 2> /dev/null | \
-    grep -q "_mcount" && \
+    $* -m64 -mlittle-endian -S -x c -O2 -p -mprofile-kernel - -o - \
+    2> /dev/null | grep -q "_mcount" && \
     exit 1
 
 echo "OK"
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 191eb949d52c1..fe06e77c15eb7 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -266,13 +266,29 @@ sub check_objcopy
     $objcopy .= " -O elf32-sh-linux";
 
 } elsif ($arch eq "powerpc") {
+    my $ldemulation;
+
     $local_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\.?\\S+)";
     # See comment in the sparc64 section for why we use '\w'.
     $function_regex = "^([0-9a-fA-F]+)\\s+<(\\.?\\w*?)>:";
     $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s\\.?_mcount\$";
 
+    if ($endian eq "big") {
+	    $cc .= " -mbig-endian ";
+	    $ld .= " -EB ";
+	    $ldemulation = "ppc"
+    } else {
+	    $cc .= " -mlittle-endian ";
+	    $ld .= " -EL ";
+	    $ldemulation = "lppc"
+    }
     if ($bits == 64) {
-	$type = ".quad";
+        $type = ".quad";
+        $cc .= " -m64 ";
+        $ld .= " -m elf64".$ldemulation." ";
+    } else {
+        $cc .= " -m32 ";
+        $ld .= " -m elf32".$ldemulation." ";
     }
 
 } elsif ($arch eq "arm") {

From b6c84ba22ff3a198eb8d5552cf9b8fda1d792e54 Mon Sep 17 00:00:00 2001
From: Vaibhav Jain <vaibhav@linux.ibm.com>
Date: Fri, 18 May 2018 15:12:23 +0530
Subject: [PATCH 134/251] cxl: Disable prefault_mode in Radix mode

Currently we see a kernel-oops reported on Power-9 while attaching a
context to an AFU, with radix-mode and sysfs attr 'prefault_mode' set
to anything other than 'none'. The backtrace of the oops is of this
form:

  Unable to handle kernel paging request for data at address 0x00000080
  Faulting instruction address: 0xc00800000bcf3b20
  cpu 0x1: Vector: 300 (Data Access) at [c00000037f003800]
      pc: c00800000bcf3b20: cxl_load_segment+0x178/0x290 [cxl]
      lr: c00800000bcf39f0: cxl_load_segment+0x48/0x290 [cxl]
      sp: c00000037f003a80
     msr: 9000000000009033
     dar: 80
   dsisr: 40000000
    current = 0xc00000037f280000
    paca    = 0xc0000003ffffe600   softe: 3        irq_happened: 0x01
      pid   = 3529, comm = afp_no_int
  <snip>
  cxl_prefault+0xfc/0x248 [cxl]
  process_element_entry_psl9+0xd8/0x1a0 [cxl]
  cxl_attach_dedicated_process_psl9+0x44/0x130 [cxl]
  native_attach_process+0xc0/0x130 [cxl]
  afu_ioctl+0x3f4/0x5e0 [cxl]
  do_vfs_ioctl+0xdc/0x890
  ksys_ioctl+0x68/0xf0
  sys_ioctl+0x40/0xa0
  system_call+0x58/0x6c

The issue is caused as on Power-8 the AFU attr 'prefault_mode' was
used to improve initial storage fault performance by prefaulting
process segments. However on Power-9 with radix mode we don't have
Storage-Segments that we can prefault. Also prefaulting process Pages
will be too costly and fine-grained.

Hence, since the prefaulting mechanism doesn't makes sense of
radix-mode, this patch updates prefault_mode_store() to not allow any
other value apart from CXL_PREFAULT_NONE when radix mode is enabled.

Fixes: f24be42aab37 ("cxl: Add psl9 specific code")
Cc: stable@vger.kernel.org # v4.12+
Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Acked-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 Documentation/ABI/testing/sysfs-class-cxl |  4 +++-
 drivers/misc/cxl/sysfs.c                  | 16 ++++++++++++----
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-class-cxl b/Documentation/ABI/testing/sysfs-class-cxl
index 640f65e79ef1c..267920a1874b9 100644
--- a/Documentation/ABI/testing/sysfs-class-cxl
+++ b/Documentation/ABI/testing/sysfs-class-cxl
@@ -69,7 +69,9 @@ Date:           September 2014
 Contact:        linuxppc-dev@lists.ozlabs.org
 Description:    read/write
                 Set the mode for prefaulting in segments into the segment table
-                when performing the START_WORK ioctl. Possible values:
+                when performing the START_WORK ioctl. Only applicable when
+                running under hashed page table mmu.
+                Possible values:
                         none: No prefaulting (default)
                         work_element_descriptor: Treat the work element
                                  descriptor as an effective address and
diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c
index 95285b7f636ff..90aa4eee8ccdf 100644
--- a/drivers/misc/cxl/sysfs.c
+++ b/drivers/misc/cxl/sysfs.c
@@ -343,12 +343,20 @@ static ssize_t prefault_mode_store(struct device *device,
 	struct cxl_afu *afu = to_cxl_afu(device);
 	enum prefault_modes mode = -1;
 
-	if (!strncmp(buf, "work_element_descriptor", 23))
-		mode = CXL_PREFAULT_WED;
-	if (!strncmp(buf, "all", 3))
-		mode = CXL_PREFAULT_ALL;
 	if (!strncmp(buf, "none", 4))
 		mode = CXL_PREFAULT_NONE;
+	else {
+		if (!radix_enabled()) {
+
+			/* only allowed when not in radix mode */
+			if (!strncmp(buf, "work_element_descriptor", 23))
+				mode = CXL_PREFAULT_WED;
+			if (!strncmp(buf, "all", 3))
+				mode = CXL_PREFAULT_ALL;
+		} else {
+			dev_err(device, "Cannot prefault with radix enabled\n");
+		}
+	}
 
 	if (mode == -1)
 		return -EINVAL;

From 9a6d2022bacd8fca0be6297459a02dfd28dad6ba Mon Sep 17 00:00:00 2001
From: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Date: Tue, 17 Apr 2018 10:41:02 +0530
Subject: [PATCH 135/251] cxl: Configure PSL to not use APC virtual machines

APC virtual machines arent used on POWER-9 chips and are already
disabled in on-chip CAPP. They also need to be disabled on the PSL via
'PSL Data Send Control Register' by setting bit(47). This forces the
PSL to send commands to CAPP with queue.id == 0.

Fixes: 5632874311db ("cxl: Add support for POWER9 DD2")
Cc: stable@vger.kernel.org # v4.15+
Signed-off-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Reviewed-by: Alastair D'Silva <alastair@d-silva.org>
Reviewed-by: Christophe Lombard <clombard@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/misc/cxl/pci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 83f1d08058fc2..2e8b187956bca 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -514,9 +514,9 @@ static int init_implementation_adapter_regs_psl9(struct cxl *adapter,
 	cxl_p1_write(adapter, CXL_PSL9_FIR_CNTL, psl_fircntl);
 
 	/* Setup the PSL to transmit packets on the PCIe before the
-	 * CAPP is enabled
+	 * CAPP is enabled. Make sure that CAPP virtual machines are disabled
 	 */
-	cxl_p1_write(adapter, CXL_PSL9_DSNDCTL, 0x0001001000002A10ULL);
+	cxl_p1_write(adapter, CXL_PSL9_DSNDCTL, 0x0001001000012A10ULL);
 
 	/*
 	 * A response to an ASB_Notify request is returned by the

From 8af1da40669609707303eecdb857f48a5ba5792d Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Tue, 29 May 2018 21:20:01 +0200
Subject: [PATCH 136/251] powerpc/prom: Fix %u/%llx usage since prom_printf()
 change

In commit eae5f709a4d7 ("powerpc: Add __printf verification to
prom_printf") __printf attribute was added to prom_printf(), which
means GCC started warning about type/format mismatches. As part of
that commit we changed some "%lx" formats to "%llx" where the type is
actually unsigned long long.

Unfortunately prom_printf() doesn't know how to print "%llx", it just
prints a literal "lx", eg:

  reserved memory map:
    lx - lx
    lx - lx

prom_printf() also doesn't know how to print "%u" (only "%lu"), it
just prints a literal "u", eg:

  Max number of cores passed to firmware: u (NR_CPUS = 2048)

Instead of:

  Max number of cores passed to firmware: 2048 (NR_CPUS = 2048)

This commit adds support for the missing formatters.

Fixes: eae5f709a4d7 ("powerpc: Add __printf verification to prom_printf")
Reported-by: Michael Ellerman <mpe@ellerman.id.au>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Mathieu Malaterre <malat@debian.org>
Tested-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/prom_init.c | 73 ++++++++++++++++++++++-----------
 1 file changed, 49 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 425992e393bc9..5425dd3d6a9f7 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -301,6 +301,10 @@ static void __init prom_print(const char *msg)
 }
 
 
+/*
+ * Both prom_print_hex & prom_print_dec takes an unsigned long as input so that
+ * we do not need __udivdi3 or __umoddi3 on 32bits.
+ */
 static void __init prom_print_hex(unsigned long val)
 {
 	int i, nibbles = sizeof(val)*2;
@@ -341,6 +345,7 @@ static void __init prom_printf(const char *format, ...)
 	va_list args;
 	unsigned long v;
 	long vs;
+	int n = 0;
 
 	va_start(args, format);
 	for (p = format; *p != 0; p = q) {
@@ -359,6 +364,10 @@ static void __init prom_printf(const char *format, ...)
 		++q;
 		if (*q == 0)
 			break;
+		while (*q == 'l') {
+			++q;
+			++n;
+		}
 		switch (*q) {
 		case 's':
 			++q;
@@ -367,39 +376,55 @@ static void __init prom_printf(const char *format, ...)
 			break;
 		case 'x':
 			++q;
-			v = va_arg(args, unsigned long);
+			switch (n) {
+			case 0:
+				v = va_arg(args, unsigned int);
+				break;
+			case 1:
+				v = va_arg(args, unsigned long);
+				break;
+			case 2:
+			default:
+				v = va_arg(args, unsigned long long);
+				break;
+			}
 			prom_print_hex(v);
 			break;
-		case 'd':
+		case 'u':
 			++q;
-			vs = va_arg(args, int);
-			if (vs < 0) {
-				prom_print("-");
-				vs = -vs;
+			switch (n) {
+			case 0:
+				v = va_arg(args, unsigned int);
+				break;
+			case 1:
+				v = va_arg(args, unsigned long);
+				break;
+			case 2:
+			default:
+				v = va_arg(args, unsigned long long);
+				break;
 			}
-			prom_print_dec(vs);
+			prom_print_dec(v);
 			break;
-		case 'l':
+		case 'd':
 			++q;
-			if (*q == 0)
+			switch (n) {
+			case 0:
+				vs = va_arg(args, int);
 				break;
-			else if (*q == 'x') {
-				++q;
-				v = va_arg(args, unsigned long);
-				prom_print_hex(v);
-			} else if (*q == 'u') { /* '%lu' */
-				++q;
-				v = va_arg(args, unsigned long);
-				prom_print_dec(v);
-			} else if (*q == 'd') { /* %ld */
-				++q;
+			case 1:
 				vs = va_arg(args, long);
-				if (vs < 0) {
-					prom_print("-");
-					vs = -vs;
-				}
-				prom_print_dec(vs);
+				break;
+			case 2:
+			default:
+				vs = va_arg(args, long long);
+				break;
 			}
+			if (vs < 0) {
+				prom_print("-");
+				vs = -vs;
+			}
+			prom_print_dec(vs);
 			break;
 		}
 	}

From 667416f38554eef94485496f3a27b93feb5a9fbb Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Wed, 30 May 2018 18:02:25 +0530
Subject: [PATCH 137/251] powerpc/mm: Fix kernel crash on page table free

Fix the below crash on Book3E 64. pgtable_page_dtor expects struct
page *arg.

Also call the destructor on non book3s platforms correctly. This frees
up the split PTL locks correctly if we had allocated them before.

Call Trace:
  .kmem_cache_free+0x9c/0x44c (unreliable)
  .ptlock_free+0x1c/0x30
  .tlb_remove_table+0xdc/0x224
  .free_pgd_range+0x298/0x500
  .shift_arg_pages+0x10c/0x1e0
  .setup_arg_pages+0x200/0x25c
  .load_elf_binary+0x450/0x16c8
  .search_binary_handler.part.11+0x9c/0x248
  .do_execveat_common.isra.13+0x868/0xc18
  .run_init_process+0x34/0x4c
  .try_to_run_init_process+0x1c/0x68
  .kernel_init+0xdc/0x130
  .ret_from_kernel_thread+0x58/0x7c

Fixes: 702346768 ("powerpc/mm/nohash: Remove pte fragment dependency from nohash")
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/32/pgalloc.h | 1 +
 arch/powerpc/include/asm/nohash/32/pgalloc.h | 1 +
 arch/powerpc/include/asm/nohash/64/pgalloc.h | 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 5073cc75f1c87..6a6673907e45e 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -99,6 +99,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
 static inline void pgtable_free(void *table, unsigned index_size)
 {
 	if (!index_size) {
+		pgtable_page_dtor(virt_to_page(table));
 		free_page((unsigned long)table);
 	} else {
 		BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 29d37bd1f3b37..1707781d2f208 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -100,6 +100,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
 static inline void pgtable_free(void *table, unsigned index_size)
 {
 	if (!index_size) {
+		pgtable_page_dtor(virt_to_page(table));
 		free_page((unsigned long)table);
 	} else {
 		BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index 21624ff1f065b..0e693f322cb2e 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -133,7 +133,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
 static inline void pgtable_free(void *table, int shift)
 {
 	if (!shift) {
-		pgtable_page_dtor(table);
+		pgtable_page_dtor(virt_to_page(table));
 		free_page((unsigned long)table);
 	} else {
 		BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);

From c95998811807d897ca112ea62d66716ed733d058 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Tue, 29 May 2018 06:03:53 +0000
Subject: [PATCH 138/251] powerpc/64: Fix strncpy() related build failures with
 GCC 8.1

GCC 8.1 warns about possible string truncation:

  arch/powerpc/kernel/nvram_64.c:1042:2: error: 'strncpy' specified
  bound 12 equals destination size [-Werror=stringop-truncation]
    strncpy(new_part->header.name, name, 12);

  arch/powerpc/platforms/ps3/repository.c:106:2: error: 'strncpy'
  output truncated before terminating nul copying 8 bytes from a
  string of the same length [-Werror=stringop-truncation]
    strncpy((char *)&n, text, 8);

Fix it by using memcpy(). To make that safe we need to ensure the
destination is pre-zeroed. Use kzalloc() in the nvram code and
initialise the u64 to zero in the ps3 code.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
[mpe: Use kzalloc() in the nvram code, flesh out change log]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/nvram_64.c          | 4 ++--
 arch/powerpc/platforms/ps3/repository.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index ba681dac7b467..22e9d281324da 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -1030,7 +1030,7 @@ loff_t __init nvram_create_partition(const char *name, int sig,
 		return -ENOSPC;
 	
 	/* Create our OS partition */
-	new_part = kmalloc(sizeof(*new_part), GFP_KERNEL);
+	new_part = kzalloc(sizeof(*new_part), GFP_KERNEL);
 	if (!new_part) {
 		pr_err("%s: kmalloc failed\n", __func__);
 		return -ENOMEM;
@@ -1039,7 +1039,7 @@ loff_t __init nvram_create_partition(const char *name, int sig,
 	new_part->index = free_part->index;
 	new_part->header.signature = sig;
 	new_part->header.length = size;
-	strncpy(new_part->header.name, name, 12);
+	memcpy(new_part->header.name, name, strnlen(name, sizeof(new_part->header.name)));
 	new_part->header.checksum = nvram_checksum(&new_part->header);
 
 	rc = nvram_write_header(new_part);
diff --git a/arch/powerpc/platforms/ps3/repository.c b/arch/powerpc/platforms/ps3/repository.c
index 50dbaf24b1ee6..e49c887787c44 100644
--- a/arch/powerpc/platforms/ps3/repository.c
+++ b/arch/powerpc/platforms/ps3/repository.c
@@ -101,9 +101,9 @@ static u64 make_first_field(const char *text, u64 index)
 
 static u64 make_field(const char *text, u64 index)
 {
-	u64 n;
+	u64 n = 0;
 
-	strncpy((char *)&n, text, 8);
+	memcpy((char *)&n, text, strnlen(text, sizeof(n)));
 	return n + index;
 }
 

From 2479bfc9bc600dcce7f932d52dcfa8d677c41f93 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Tue, 29 May 2018 16:06:41 +0000
Subject: [PATCH 139/251] powerpc: Fix build by disabling attribute-alias
 warning for SYSCALL_DEFINEx

GCC 8.1 emits warnings such as the following. As arch/powerpc code is
built with -Werror, this breaks the build with GCC 8.1.

  In file included from arch/powerpc/kernel/pci_64.c:23:
  ./include/linux/syscalls.h:233:18: error: 'sys_pciconfig_iobase' alias
  between functions of incompatible types 'long int(long int, long
  unsigned int, long unsigned int)' and 'long int(long int, long int,
  long int)' [-Werror=attribute-alias]
    asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
                    ^~~
  ./include/linux/syscalls.h:222:2: note: in expansion of macro '__SYSCALL_DEFINEx'
    __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)

This patch inhibits those warnings.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
[mpe: Trim change log]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/pci_64.c    | 4 ++++
 arch/powerpc/kernel/rtas.c      | 4 ++++
 arch/powerpc/kernel/signal_32.c | 8 ++++++++
 arch/powerpc/kernel/signal_64.c | 4 ++++
 arch/powerpc/kernel/syscalls.c  | 4 ++++
 arch/powerpc/mm/subpage-prot.c  | 4 ++++
 6 files changed, 28 insertions(+)

diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index dff28f9035124..812171c09f42f 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -203,6 +203,9 @@ void pcibios_setup_phb_io_space(struct pci_controller *hose)
 #define IOBASE_ISA_IO		3
 #define IOBASE_ISA_MEM		4
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpragmas"
+#pragma GCC diagnostic ignored "-Wattribute-alias"
 SYSCALL_DEFINE3(pciconfig_iobase, long, which, unsigned long, in_bus,
 			  unsigned long, in_devfn)
 {
@@ -256,6 +259,7 @@ SYSCALL_DEFINE3(pciconfig_iobase, long, which, unsigned long, in_bus,
 
 	return -EOPNOTSUPP;
 }
+#pragma GCC diagnostic pop
 
 #ifdef CONFIG_NUMA
 int pcibus_to_node(struct pci_bus *bus)
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 8afd146bc9c70..7fb9f83dcde88 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1051,6 +1051,9 @@ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
 }
 
 /* We assume to be passed big endian arguments */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpragmas"
+#pragma GCC diagnostic ignored "-Wattribute-alias"
 SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
 {
 	struct rtas_args args;
@@ -1137,6 +1140,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
 
 	return 0;
 }
+#pragma GCC diagnostic pop
 
 /*
  * Call early during boot, before mem init, to retrieve the RTAS
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 9cf8a03d3bc75..342ac78f620f5 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1037,6 +1037,9 @@ static int do_setcontext_tm(struct ucontext __user *ucp,
 }
 #endif
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpragmas"
+#pragma GCC diagnostic ignored "-Wattribute-alias"
 #ifdef CONFIG_PPC64
 COMPAT_SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 		       struct ucontext __user *, new_ctx, int, ctx_size)
@@ -1132,6 +1135,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 	set_thread_flag(TIF_RESTOREALL);
 	return 0;
 }
+#pragma GCC diagnostic pop
 
 #ifdef CONFIG_PPC64
 COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
@@ -1228,6 +1232,9 @@ SYSCALL_DEFINE0(rt_sigreturn)
 	return 0;
 }
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpragmas"
+#pragma GCC diagnostic ignored "-Wattribute-alias"
 #ifdef CONFIG_PPC32
 SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx,
 			 int, ndbg, struct sig_dbg_op __user *, dbg)
@@ -1333,6 +1340,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx,
 	return 0;
 }
 #endif
+#pragma GCC diagnostic pop
 
 /*
  * OK, we're invoking a handler
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 83d51bf586c7e..d42b600203892 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -625,6 +625,9 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp)
 /*
  * Handle {get,set,swap}_context operations
  */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpragmas"
+#pragma GCC diagnostic ignored "-Wattribute-alias"
 SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 		struct ucontext __user *, new_ctx, long, ctx_size)
 {
@@ -690,6 +693,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 	set_thread_flag(TIF_RESTOREALL);
 	return 0;
 }
+#pragma GCC diagnostic pop
 
 
 /*
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index 466216506eb2f..083fa06962fda 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -62,6 +62,9 @@ static inline long do_mmap2(unsigned long addr, size_t len,
 	return ret;
 }
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpragmas"
+#pragma GCC diagnostic ignored "-Wattribute-alias"
 SYSCALL_DEFINE6(mmap2, unsigned long, addr, size_t, len,
 		unsigned long, prot, unsigned long, flags,
 		unsigned long, fd, unsigned long, pgoff)
@@ -75,6 +78,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len,
 {
 	return do_mmap2(addr, len, prot, flags, fd, offset, PAGE_SHIFT);
 }
+#pragma GCC diagnostic pop
 
 #ifdef CONFIG_PPC32
 /*
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c
index 9d16ee251fc01..75cb646a79c38 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/subpage-prot.c
@@ -186,6 +186,9 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
  * in a 2-bit field won't allow writes to a page that is otherwise
  * write-protected.
  */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpragmas"
+#pragma GCC diagnostic ignored "-Wattribute-alias"
 SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
 		unsigned long, len, u32 __user *, map)
 {
@@ -269,3 +272,4 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
 	up_write(&mm->mmap_sem);
 	return err;
 }
+#pragma GCC diagnostic pop

From 0abbf2bfdc9dec32e9832aa8d4522a57d698e753 Mon Sep 17 00:00:00 2001
From: Yisheng Xie <xieyisheng1@huawei.com>
Date: Thu, 31 May 2018 19:11:25 +0800
Subject: [PATCH 140/251] powerpc/xmon: use match_string() helper

match_string() returns the index of an array for a matching string,
which can be used instead of open coded variant.

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/xmon/xmon.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index c2e9270728c75..d94a41254b11c 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -3173,7 +3173,7 @@ skipbl(void)
 }
 
 #define N_PTREGS	44
-static char *regnames[N_PTREGS] = {
+static const char *regnames[N_PTREGS] = {
 	"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
 	"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
 	"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
@@ -3208,18 +3208,17 @@ scanhex(unsigned long *vp)
 			regname[i] = c;
 		}
 		regname[i] = 0;
-		for (i = 0; i < N_PTREGS; ++i) {
-			if (strcmp(regnames[i], regname) == 0) {
-				if (xmon_regs == NULL) {
-					printf("regs not available\n");
-					return 0;
-				}
-				*vp = ((unsigned long *)xmon_regs)[i];
-				return 1;
-			}
+		i = match_string(regnames, N_PTREGS, regname);
+		if (i < 0) {
+			printf("invalid register name '%%%s'\n", regname);
+			return 0;
 		}
-		printf("invalid register name '%%%s'\n", regname);
-		return 0;
+		if (xmon_regs == NULL) {
+			printf("regs not available\n");
+			return 0;
+		}
+		*vp = ((unsigned long *)xmon_regs)[i];
+		return 1;
 	}
 
 	/* skip leading "0x" if any */

From 98fd72fe82527fd26618062b60cfd329451f2329 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Wed, 30 May 2018 19:22:50 +1000
Subject: [PATCH 141/251] powerpc/powernv/ioda2: Remove redundant free of TCE
 pages

When IODA2 creates a PE, it creates an IOMMU table with it_ops::free
set to pnv_ioda2_table_free() which calls pnv_pci_ioda2_table_free_pages().

Since iommu_tce_table_put() calls it_ops::free when the last reference
to the table is released, explicit call to pnv_pci_ioda2_table_free_pages()
is not needed so let's remove it.

This should fix double free in the case of PCI hotuplug as
pnv_pci_ioda2_table_free_pages() does not reset neither
iommu_table::it_base nor ::it_size.

This was not exposed by SRIOV as it uses different code path via
pnv_pcibios_sriov_disable().

IODA1 does not inialize it_ops::free so it does not have this issue.

Fixes: c5f7700bbd2e ("powerpc/powernv: Dynamically release PE")
Cc: stable@vger.kernel.org # v4.8+
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 1b02f86dfec10..5bd0eb6681bcb 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -3671,7 +3671,6 @@ static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
 		WARN_ON(pe->table_group.group);
 	}
 
-	pnv_pci_ioda2_table_free_pages(tbl);
 	iommu_tce_table_put(tbl);
 }
 

From ebb37cf3ffd39fdb6ec5b07111f8bb2f11d92c5f Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 5 May 2018 03:19:25 +1000
Subject: [PATCH 142/251] powerpc/64: irq_work avoid interrupt when called with
 hardware irqs enabled

irq_work_raise should not cause a decrementer exception unless it is
called from NMI context. Doing so often just results in an immediate
masked decrementer interrupt:

   <...>-550    90d...    4us : update_curr_rt <-dequeue_task_rt
   <...>-550    90d...    5us : dbs_update_util_handler <-update_curr_rt
   <...>-550    90d...    6us : arch_irq_work_raise <-irq_work_queue
   <...>-550    90d...    7us : soft_nmi_interrupt <-soft_nmi_common
   <...>-550    90d...    7us : printk_nmi_enter <-soft_nmi_interrupt
   <...>-550    90d.Z.    8us : rcu_nmi_enter <-soft_nmi_interrupt
   <...>-550    90d.Z.    9us : rcu_nmi_exit <-soft_nmi_interrupt
   <...>-550    90d...    9us : printk_nmi_exit <-soft_nmi_interrupt
   <...>-550    90d...   10us : cpuacct_charge <-update_curr_rt

The soft_nmi_interrupt here is the call into the watchdog, due to the
decrementer interrupt firing with irqs soft-disabled. This is
harmless, but sub-optimal.

When it's not called from NMI context or with interrupts enabled, mark
the decrementer pending in the irq_happened mask directly, rather than
having the masked decrementer interupt handler do it. This will be
replayed at the next local_irq_enable. See the comment for details.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/time.c | 33 +++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 360e71d455ccd..e7e8611e88633 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -513,6 +513,35 @@ static inline void clear_irq_work_pending(void)
 		"i" (offsetof(struct paca_struct, irq_work_pending)));
 }
 
+void arch_irq_work_raise(void)
+{
+	preempt_disable();
+	set_irq_work_pending_flag();
+	/*
+	 * Non-nmi code running with interrupts disabled will replay
+	 * irq_happened before it re-enables interrupts, so setthe
+	 * decrementer there instead of causing a hardware exception
+	 * which would immediately hit the masked interrupt handler
+	 * and have the net effect of setting the decrementer in
+	 * irq_happened.
+	 *
+	 * NMI interrupts can not check this when they return, so the
+	 * decrementer hardware exception is raised, which will fire
+	 * when interrupts are next enabled.
+	 *
+	 * BookE does not support this yet, it must audit all NMI
+	 * interrupt handlers to ensure they call nmi_enter() so this
+	 * check would be correct.
+	 */
+	if (IS_ENABLED(CONFIG_BOOKE) || !irqs_disabled() || in_nmi()) {
+		set_dec(1);
+	} else {
+		hard_irq_disable();
+		local_paca->irq_happened |= PACA_IRQ_DEC;
+	}
+	preempt_enable();
+}
+
 #else /* 32-bit */
 
 DEFINE_PER_CPU(u8, irq_work_pending);
@@ -521,8 +550,6 @@ DEFINE_PER_CPU(u8, irq_work_pending);
 #define test_irq_work_pending()		__this_cpu_read(irq_work_pending)
 #define clear_irq_work_pending()	__this_cpu_write(irq_work_pending, 0)
 
-#endif /* 32 vs 64 bit */
-
 void arch_irq_work_raise(void)
 {
 	preempt_disable();
@@ -531,6 +558,8 @@ void arch_irq_work_raise(void)
 	preempt_enable();
 }
 
+#endif /* 32 vs 64 bit */
+
 #else  /* CONFIG_IRQ_WORK */
 
 #define test_irq_work_pending()	0

From 9f4b61b2777dfd1692189ab3e38f8eb7dc669512 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 5 May 2018 03:19:26 +1000
Subject: [PATCH 143/251] powerpc/pseries: put cede MSR[EE] check under
 IRQ_SOFT_MASK_DEBUG

This check does not catch IRQ soft mask bugs, but this option is
slightly more suitable than TRACE_IRQFLAGS.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/plpar_wrappers.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 96c1a46acbd06..cff5a411e5954 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -39,10 +39,10 @@ static inline long extended_cede_processor(unsigned long latency_hint)
 	set_cede_latency_hint(latency_hint);
 
 	rc = cede_processor();
-#ifdef CONFIG_TRACE_IRQFLAGS
-		/* Ensure that H_CEDE returns with IRQs on */
-		if (WARN_ON(!(mfmsr() & MSR_EE)))
-			__hard_irq_enable();
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+	/* Ensure that H_CEDE returns with IRQs on */
+	if (WARN_ON(!(mfmsr() & MSR_EE)))
+		__hard_irq_enable();
 #endif
 
 	set_cede_latency_hint(old_latency_hint);

From 54071e4176f0cedc39809f51cdbc78edd38ee77a Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 5 May 2018 03:19:28 +1000
Subject: [PATCH 144/251] powerpc/64s: micro-optimise __hard_irq_enable() for
 mtmsrd L=1 support

Book3S minimum supported ISA version now requires mtmsrd L=1. This
instruction does not require bits other than RI and EE to be supplied,
so __hard_irq_enable() and __hard_irq_disable() does not have to read
the kernel_msr from paca.

Interrupt entry code already relies on L=1 support.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/hw_irq.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 855e17d158b11..3be8766427efe 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -228,8 +228,8 @@ static inline bool arch_irqs_disabled(void)
 #define __hard_irq_enable()	asm volatile("wrteei 1" : : : "memory")
 #define __hard_irq_disable()	asm volatile("wrteei 0" : : : "memory")
 #else
-#define __hard_irq_enable()	__mtmsrd(local_paca->kernel_msr | MSR_EE, 1)
-#define __hard_irq_disable()	__mtmsrd(local_paca->kernel_msr, 1)
+#define __hard_irq_enable()	__mtmsrd(MSR_EE|MSR_RI, 1)
+#define __hard_irq_disable()	__mtmsrd(MSR_RI, 1)
 #endif
 
 #define hard_irq_disable()	do {					\

From 36d632ea831fd2fa3cb62599a465825f59076f64 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 5 May 2018 03:19:29 +1000
Subject: [PATCH 145/251] powerpc/64: remove start_tb and accum_tb from
 thread_struct

These fields are only written to.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/processor.h | 4 ----
 arch/powerpc/kernel/process.c        | 6 +-----
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index c4b36a494a636..eff269adfa71c 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -264,10 +264,6 @@ struct thread_struct {
 	struct thread_fp_state	*fp_save_area;
 	int		fpexc_mode;	/* floating-point exception mode */
 	unsigned int	align_ctl;	/* alignment handling control */
-#ifdef CONFIG_PPC64
-	unsigned long	start_tb;	/* Start purr when proc switched in */
-	unsigned long	accum_tb;	/* Total accumulated purr for process */
-#endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 	struct perf_event *ptrace_bps[HBP_NUM];
 	/*
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 25db000fa5b33..f4e5291584c55 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1188,11 +1188,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
 	 */
 	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
 		struct cpu_usage *cu = this_cpu_ptr(&cpu_usage_array);
-		long unsigned start_tb, current_tb;
-		start_tb = old_thread->start_tb;
-		cu->current_tb = current_tb = mfspr(SPRN_PURR);
-		old_thread->accum_tb += (current_tb - start_tb);
-		new_thread->start_tb = current_tb;
+		cu->current_tb = mfspr(SPRN_PURR);
 	}
 #endif /* CONFIG_PPC64 */
 

From 3d3a6021ddcbe9c31520e4e7b65e5ce5dc58274d Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 5 May 2018 03:19:30 +1000
Subject: [PATCH 146/251] powerpc/pseries: lparcfg calculate PURR on demand

For SPLPAR, lparcfg provides a sum of PURR registers for all CPUs.
Currently this is done by reading PURR in context switch and timer
interrupt, and storing that into a per-CPU variable. These are summed
to provide the value.

This does not work with all timer schemes (e.g., NO_HZ_FULL), and it
is sub-optimal for performance because it reads the PURR register on
every context switch, although that's been difficult to distinguish
from noise in the contxt_switch microbenchmark.

This patch implements the sum by calling a function on each CPU, to
read and add PURR values of each CPU.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/time.h          |  8 --------
 arch/powerpc/kernel/process.c            | 14 --------------
 arch/powerpc/kernel/time.c               |  8 --------
 arch/powerpc/platforms/pseries/lparcfg.c | 18 ++++++++++--------
 4 files changed, 10 insertions(+), 38 deletions(-)

diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index db546c034905a..c965c79765c4e 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -196,14 +196,6 @@ extern u64 mulhdu(u64, u64);
 extern void div128_by_32(u64 dividend_high, u64 dividend_low,
 			 unsigned divisor, struct div_result *dr);
 
-/* Used to store Processor Utilization register (purr) values */
-
-struct cpu_usage {
-        u64 current_tb;  /* Holds the current purr register values */
-};
-
-DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array);
-
 extern void secondary_cpu_time_init(void);
 extern void __init time_init(void);
 
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index f4e5291584c55..2a7fa5000ccec 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -846,10 +846,6 @@ bool ppc_breakpoint_available(void)
 }
 EXPORT_SYMBOL_GPL(ppc_breakpoint_available);
 
-#ifdef CONFIG_PPC64
-DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array);
-#endif
-
 static inline bool hw_brk_match(struct arch_hw_breakpoint *a,
 			      struct arch_hw_breakpoint *b)
 {
@@ -1182,16 +1178,6 @@ struct task_struct *__switch_to(struct task_struct *prev,
 
 	WARN_ON(!irqs_disabled());
 
-#ifdef CONFIG_PPC64
-	/*
-	 * Collect processor utilization data per process
-	 */
-	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
-		struct cpu_usage *cu = this_cpu_ptr(&cpu_usage_array);
-		cu->current_tb = mfspr(SPRN_PURR);
-	}
-#endif /* CONFIG_PPC64 */
-
 #ifdef CONFIG_PPC_BOOK3S_64
 	batch = this_cpu_ptr(&ppc64_tlb_batch);
 	if (batch->active) {
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index e7e8611e88633..1fe6a24357e7d 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -597,14 +597,6 @@ static void __timer_interrupt(void)
 		__this_cpu_inc(irq_stat.timer_irqs_others);
 	}
 
-#ifdef CONFIG_PPC64
-	/* collect purr register values often, for accurate calculations */
-	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
-		struct cpu_usage *cu = this_cpu_ptr(&cpu_usage_array);
-		cu->current_tb = mfspr(SPRN_PURR);
-	}
-#endif
-
 	trace_timer_interrupt_exit(regs);
 }
 
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index c508c938dc71e..7c872dc01bdb0 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -52,18 +52,20 @@
  * Track sum of all purrs across all processors. This is used to further
  * calculate usage values by different applications
  */
+static void cpu_get_purr(void *arg)
+{
+	atomic64_t *sum = arg;
+
+	atomic64_add(mfspr(SPRN_PURR), sum);
+}
+
 static unsigned long get_purr(void)
 {
-	unsigned long sum_purr = 0;
-	int cpu;
+	atomic64_t purr = ATOMIC64_INIT(0);
 
-	for_each_possible_cpu(cpu) {
-		struct cpu_usage *cu;
+	on_each_cpu(cpu_get_purr, &purr, 1);
 
-		cu = &per_cpu(cpu_usage_array, cpu);
-		sum_purr += cu->current_tb;
-	}
-	return sum_purr;
+	return atomic64_read(&purr);
 }
 
 /*

From 3f984620f9a4fe089c0a3c951b75a460211394bb Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 5 May 2018 03:19:31 +1000
Subject: [PATCH 147/251] powerpc: generic clockevents broadcast receiver call
 tick_receive_broadcast

The broadcast tick recipient can call tick_receive_broadcast rather
than re-running the full timer interrupt.

It does not have to check for the next event time, because the sender
already determined the timer has expired. It does not have to test
irq_work_pending, because that's a direct decrementer interrupt and
does not go through the clock events subsystem. And it does not have
to read PURR because that was removed with the previous patch.

This results in no code size change, but both the decrementer and
broadcast path lengths are reduced.

Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/hw_irq.h |  1 +
 arch/powerpc/include/asm/time.h   |  1 -
 arch/powerpc/kernel/smp.c         |  4 +-
 arch/powerpc/kernel/time.c        | 84 ++++++++++++++-----------------
 4 files changed, 42 insertions(+), 48 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 3be8766427efe..9aec7237f8c24 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -55,6 +55,7 @@ extern void replay_system_reset(void);
 extern void __replay_interrupt(unsigned int vector);
 
 extern void timer_interrupt(struct pt_regs *);
+extern void timer_broadcast_interrupt(void);
 extern void performance_monitor_exception(struct pt_regs *regs);
 extern void WatchdogException(struct pt_regs *regs);
 extern void unknown_exception(struct pt_regs *regs);
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index c965c79765c4e..69b89f9412526 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -28,7 +28,6 @@ extern struct clock_event_device decrementer_clockevent;
 
 struct rtc_time;
 extern void to_tm(int tim, struct rtc_time * tm);
-extern void tick_broadcast_ipi_handler(void);
 
 extern void generic_calibrate_decr(void);
 extern void hdec_interrupt(struct pt_regs *regs);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index c96f8fbc19423..f66eec89c14cb 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -158,7 +158,7 @@ static irqreturn_t reschedule_action(int irq, void *data)
 
 static irqreturn_t tick_broadcast_ipi_action(int irq, void *data)
 {
-	tick_broadcast_ipi_handler();
+	timer_broadcast_interrupt();
 	return IRQ_HANDLED;
 }
 
@@ -279,7 +279,7 @@ irqreturn_t smp_ipi_demux_relaxed(void)
 		if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
 			scheduler_ipi();
 		if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST))
-			tick_broadcast_ipi_handler();
+			timer_broadcast_interrupt();
 #ifdef CONFIG_NMI_IPI
 		if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI))
 			nmi_ipi_action(0, NULL);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 1fe6a24357e7d..ad876906f847b 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -567,47 +567,16 @@ void arch_irq_work_raise(void)
 
 #endif /* CONFIG_IRQ_WORK */
 
-static void __timer_interrupt(void)
-{
-	struct pt_regs *regs = get_irq_regs();
-	u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
-	struct clock_event_device *evt = this_cpu_ptr(&decrementers);
-	u64 now;
-
-	trace_timer_interrupt_entry(regs);
-
-	if (test_irq_work_pending()) {
-		clear_irq_work_pending();
-		irq_work_run();
-	}
-
-	now = get_tb_or_rtc();
-	if (now >= *next_tb) {
-		*next_tb = ~(u64)0;
-		if (evt->event_handler)
-			evt->event_handler(evt);
-		__this_cpu_inc(irq_stat.timer_irqs_event);
-	} else {
-		now = *next_tb - now;
-		if (now <= decrementer_max)
-			set_dec(now);
-		/* We may have raced with new irq work */
-		if (test_irq_work_pending())
-			set_dec(1);
-		__this_cpu_inc(irq_stat.timer_irqs_others);
-	}
-
-	trace_timer_interrupt_exit(regs);
-}
-
 /*
  * timer_interrupt - gets called when the decrementer overflows,
  * with interrupts disabled.
  */
-void timer_interrupt(struct pt_regs * regs)
+void timer_interrupt(struct pt_regs *regs)
 {
-	struct pt_regs *old_regs;
+	struct clock_event_device *evt = this_cpu_ptr(&decrementers);
 	u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
+	struct pt_regs *old_regs;
+	u64 now;
 
 	/* Ensure a positive value is written to the decrementer, or else
 	 * some CPUs will continue to take decrementer exceptions.
@@ -638,13 +607,47 @@ void timer_interrupt(struct pt_regs * regs)
 
 	old_regs = set_irq_regs(regs);
 	irq_enter();
+	trace_timer_interrupt_entry(regs);
+
+	if (test_irq_work_pending()) {
+		clear_irq_work_pending();
+		irq_work_run();
+	}
+
+	now = get_tb_or_rtc();
+	if (now >= *next_tb) {
+		*next_tb = ~(u64)0;
+		if (evt->event_handler)
+			evt->event_handler(evt);
+		__this_cpu_inc(irq_stat.timer_irqs_event);
+	} else {
+		now = *next_tb - now;
+		if (now <= decrementer_max)
+			set_dec(now);
+		/* We may have raced with new irq work */
+		if (test_irq_work_pending())
+			set_dec(1);
+		__this_cpu_inc(irq_stat.timer_irqs_others);
+	}
 
-	__timer_interrupt();
+	trace_timer_interrupt_exit(regs);
 	irq_exit();
 	set_irq_regs(old_regs);
 }
 EXPORT_SYMBOL(timer_interrupt);
 
+void timer_broadcast_interrupt(void)
+{
+	u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
+	struct pt_regs *regs = get_irq_regs();
+
+	trace_timer_interrupt_entry(regs);
+	*next_tb = ~(u64)0;
+	tick_receive_broadcast();
+	__this_cpu_inc(irq_stat.timer_irqs_event);
+	trace_timer_interrupt_exit(regs);
+}
+
 /*
  * Hypervisor decrementer interrupts shouldn't occur but are sometimes
  * left pending on exit from a KVM guest.  We don't need to do anything
@@ -992,15 +995,6 @@ static int decrementer_shutdown(struct clock_event_device *dev)
 	return 0;
 }
 
-/* Interrupt handler for the timer broadcast IPI */
-void tick_broadcast_ipi_handler(void)
-{
-	u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
-
-	*next_tb = get_tb_or_rtc();
-	__timer_interrupt();
-}
-
 static void register_decrementer_clockevent(int cpu)
 {
 	struct clock_event_device *dec = &per_cpu(decrementers, cpu);

From a7cba02deceda96df8018a827e6715d6f37be7b5 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 5 May 2018 03:19:32 +1000
Subject: [PATCH 148/251] powerpc: allow soft-NMI watchdog to cover timer
 interrupts with large decrementers

Large decrementers (e.g., POWER9) can take a very long time to wrap,
so when the timer iterrupt handler sets the decrementer to max so as
to avoid taking another decrementer interrupt when hard enabling
interrupts before running timers, it effectively disables the soft
NMI coverage for timer interrupts.

Fix this by using the traditional 31-bit value instead, which wraps
after a few seconds. masked interrupt code does the same thing, and
in normal operation neither of these paths would ever wrap even the
31 bit value.

Note: the SMP watchdog should catch timer interrupt lockups, but it
is preferable for the local soft-NMI to catch them, mainly to avoid
the IPI.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/time.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index ad876906f847b..5862a36117956 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -578,22 +578,29 @@ void timer_interrupt(struct pt_regs *regs)
 	struct pt_regs *old_regs;
 	u64 now;
 
-	/* Ensure a positive value is written to the decrementer, or else
-	 * some CPUs will continue to take decrementer exceptions.
-	 */
-	set_dec(decrementer_max);
-
 	/* Some implementations of hotplug will get timer interrupts while
 	 * offline, just ignore these and we also need to set
 	 * decrementers_next_tb as MAX to make sure __check_irq_replay
 	 * don't replay timer interrupt when return, otherwise we'll trap
 	 * here infinitely :(
 	 */
-	if (!cpu_online(smp_processor_id())) {
+	if (unlikely(!cpu_online(smp_processor_id()))) {
 		*next_tb = ~(u64)0;
+		set_dec(decrementer_max);
 		return;
 	}
 
+	/* Ensure a positive value is written to the decrementer, or else
+	 * some CPUs will continue to take decrementer exceptions. When the
+	 * PPC_WATCHDOG (decrementer based) is configured, keep this at most
+	 * 31 bits, which is about 4 seconds on most systems, which gives
+	 * the watchdog a chance of catching timer interrupt hard lockups.
+	 */
+	if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
+		set_dec(0x7fffffff);
+	else
+		set_dec(decrementer_max);
+
 	/* Conditionally hard-enable interrupts now that the DEC has been
 	 * bumped to its maximum value
 	 */

From bc9071133144acdbdb28cfc6ee5ce983d8fd5f81 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 5 May 2018 03:19:33 +1000
Subject: [PATCH 149/251] powerpc: move timer broadcast code under
 GENERIC_CLOCKEVENTS_BROADCAST ifdef

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/smp.c  | 8 ++++++++
 arch/powerpc/kernel/time.c | 2 ++
 2 files changed, 10 insertions(+)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index f66eec89c14cb..6f5e3a6e259c0 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -156,11 +156,13 @@ static irqreturn_t reschedule_action(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 static irqreturn_t tick_broadcast_ipi_action(int irq, void *data)
 {
 	timer_broadcast_interrupt();
 	return IRQ_HANDLED;
 }
+#endif
 
 #ifdef CONFIG_NMI_IPI
 static irqreturn_t nmi_ipi_action(int irq, void *data)
@@ -173,7 +175,9 @@ static irqreturn_t nmi_ipi_action(int irq, void *data)
 static irq_handler_t smp_ipi_action[] = {
 	[PPC_MSG_CALL_FUNCTION] =  call_function_action,
 	[PPC_MSG_RESCHEDULE] = reschedule_action,
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 	[PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action,
+#endif
 #ifdef CONFIG_NMI_IPI
 	[PPC_MSG_NMI_IPI] = nmi_ipi_action,
 #endif
@@ -187,7 +191,9 @@ static irq_handler_t smp_ipi_action[] = {
 const char *smp_ipi_name[] = {
 	[PPC_MSG_CALL_FUNCTION] =  "ipi call function",
 	[PPC_MSG_RESCHEDULE] = "ipi reschedule",
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 	[PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast",
+#endif
 	[PPC_MSG_NMI_IPI] = "nmi ipi",
 };
 
@@ -278,8 +284,10 @@ irqreturn_t smp_ipi_demux_relaxed(void)
 			generic_smp_call_function_interrupt();
 		if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
 			scheduler_ipi();
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 		if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST))
 			timer_broadcast_interrupt();
+#endif
 #ifdef CONFIG_NMI_IPI
 		if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI))
 			nmi_ipi_action(0, NULL);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 5862a36117956..23921f7b6e674 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -643,6 +643,7 @@ void timer_interrupt(struct pt_regs *regs)
 }
 EXPORT_SYMBOL(timer_interrupt);
 
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 void timer_broadcast_interrupt(void)
 {
 	u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
@@ -654,6 +655,7 @@ void timer_broadcast_interrupt(void)
 	__this_cpu_inc(irq_stat.timer_irqs_event);
 	trace_timer_interrupt_exit(regs);
 }
+#endif
 
 /*
  * Hypervisor decrementer interrupts shouldn't occur but are sometimes

From 21bfd6a8e9999f40f9eae09ca6ba33e7f75f0be4 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 5 May 2018 03:19:34 +1000
Subject: [PATCH 150/251] powerpc: move a stray NMI IPI case under NMI_IPI
 ifdef

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/smp.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 6f5e3a6e259c0..b009a562c76b2 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -194,7 +194,9 @@ const char *smp_ipi_name[] = {
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 	[PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast",
 #endif
+#ifdef CONFIG_NMI_IPI
 	[PPC_MSG_NMI_IPI] = "nmi ipi",
+#endif
 };
 
 /* optional function to request ipi, for controllers with >= 4 ipis */

From e360cd37f0e9bac7b5f623132549e2d4b6417399 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 5 May 2018 03:19:35 +1000
Subject: [PATCH 151/251] powerpc/time: account broadcast timer event
 interrupts separately

These are not local timer interrupts but IPIs. It's good to be able
to see how timer offloading is behaving, so split these out into
their own category.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/hardirq.h | 1 +
 arch/powerpc/kernel/irq.c          | 6 ++++++
 arch/powerpc/kernel/time.c         | 5 +----
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h
index 5986d473722b5..20b01897ea5d4 100644
--- a/arch/powerpc/include/asm/hardirq.h
+++ b/arch/powerpc/include/asm/hardirq.h
@@ -8,6 +8,7 @@
 typedef struct {
 	unsigned int __softirq_pending;
 	unsigned int timer_irqs_event;
+	unsigned int broadcast_irqs_event;
 	unsigned int timer_irqs_others;
 	unsigned int pmu_irqs;
 	unsigned int mce_exceptions;
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index bbf7ec582d601..0682fef1f3859 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -508,6 +508,11 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 		seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_event);
         seq_printf(p, "  Local timer interrupts for timer event device\n");
 
+	seq_printf(p, "%*s: ", prec, "BCT");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(irq_stat, j).broadcast_irqs_event);
+	seq_printf(p, "  Broadcast timer interrupts for timer event device\n");
+
 	seq_printf(p, "%*s: ", prec, "LOC");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_others);
@@ -567,6 +572,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 {
 	u64 sum = per_cpu(irq_stat, cpu).timer_irqs_event;
 
+	sum += per_cpu(irq_stat, cpu).broadcast_irqs_event;
 	sum += per_cpu(irq_stat, cpu).pmu_irqs;
 	sum += per_cpu(irq_stat, cpu).mce_exceptions;
 	sum += per_cpu(irq_stat, cpu).spurious_irqs;
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 23921f7b6e674..ed6b2abdde15f 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -647,13 +647,10 @@ EXPORT_SYMBOL(timer_interrupt);
 void timer_broadcast_interrupt(void)
 {
 	u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
-	struct pt_regs *regs = get_irq_regs();
 
-	trace_timer_interrupt_entry(regs);
 	*next_tb = ~(u64)0;
 	tick_receive_broadcast();
-	__this_cpu_inc(irq_stat.timer_irqs_event);
-	trace_timer_interrupt_exit(regs);
+	__this_cpu_inc(irq_stat.broadcast_irqs_event);
 }
 #endif
 

From 81ea11d3af3aba0bea475dd1dd2f79977a761239 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 10 May 2018 11:04:23 +1000
Subject: [PATCH 152/251] powerpc/pmu/fsl: fix is_nmi test for irq mask change

When soft enabled was changed to irq disabled mask, this test missed
being converted (although the equivalent book3s test was converted).

The PMU drivers consider it an NMI when they take a PMI while general
interrupts are disabled. This change restores that behaviour.

Fixes: 01417c6cc7 ("powerpc/64: Change soft_enabled from flag to bitmask")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/core-fsl-emb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c
index 85f1d18e5fd30..ba485844d506c 100644
--- a/arch/powerpc/perf/core-fsl-emb.c
+++ b/arch/powerpc/perf/core-fsl-emb.c
@@ -42,7 +42,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
 static inline int perf_intr_is_nmi(struct pt_regs *regs)
 {
 #ifdef __powerpc64__
-	return !regs->softe;
+	return (regs->softe & IRQS_DISABLED);
 #else
 	return 0;
 #endif

From 3130a7bb6eb595f2d963976a4d3e57db77bcf06f Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 10 May 2018 11:04:24 +1000
Subject: [PATCH 153/251] powerpc/64: change softe to irqmask in show_regs and
 xmon

When the soft enabled flag was changed to a soft disable mask, xmon
and register dump code was not updated to reflect that, which is
confusing ('SOFTE: 1' previously meant interrupts were soft enabled,
currently it means the opposite, the general interrupt type has been
disabled).

Fix this by using the name irqmask, and printing it in hex.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/process.c | 2 +-
 arch/powerpc/xmon/xmon.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 2a7fa5000ccec..8f35b30956f43 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1420,7 +1420,7 @@ void show_regs(struct pt_regs * regs)
 		pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
 #endif
 #ifdef CONFIG_PPC64
-	pr_cont("SOFTE: %ld ", regs->softe);
+	pr_cont("IRQMASK: %lx ", regs->softe);
 #endif
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	if (MSR_TM_ACTIVE(regs->msr))
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index d94a41254b11c..0561c14b276b0 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1637,7 +1637,7 @@ static void excprint(struct pt_regs *fp)
 
 	printf("  current = 0x%px\n", current);
 #ifdef CONFIG_PPC64
-	printf("  paca    = 0x%px\t softe: %d\t irq_happened: 0x%02x\n",
+	printf("  paca    = 0x%px\t irqmask: 0x%02x\t irq_happened: 0x%02x\n",
 	       local_paca, local_paca->irq_soft_mask, local_paca->irq_happened);
 #endif
 	if (current) {

From ee03b9b4479d1302d01cebedda3518dc967697b7 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 10 May 2018 22:21:48 +1000
Subject: [PATCH 154/251] powerpc/powernv: call OPAL_QUIESCE before
 OPAL_SIGNAL_SYSTEM_RESET

Although it is often possible to recover a CPU that was interrupted
from OPAL with a system reset NMI, it's undesirable to interrupt them
for a few reasons. Firstly because dump/debug code itself needs to
call firmware, so it could hang on a lock or possibly corrupt a
per-cpu data structure if it or another CPU was interrupted from
OPAL. Secondly, the kexec crash dump code will not return from
interrupt to unwind the OPAL call.

Call OPAL_QUIESCE with QUIESCE_HOLD before sending an NMI IPI to
another CPU, which wait for it to leave firmware (or time out) to
avoid this problem in normal conditions. Firmware bugs may still
result in a timeout and interrupting OPAL, but that is the best
option (stops the CPU, and possibly allows firmware to be debugged).

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/opal-api.h            |  7 +++++++
 arch/powerpc/include/asm/opal.h                |  1 +
 arch/powerpc/platforms/powernv/opal-wrappers.S |  1 +
 arch/powerpc/platforms/powernv/smp.c           | 17 ++++++++++++++++-
 4 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index f34d173a2ebfe..3bab299eda491 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -201,6 +201,7 @@
 #define OPAL_SET_POWER_SHIFT_RATIO		155
 #define OPAL_SENSOR_GROUP_CLEAR			156
 #define OPAL_PCI_SET_P2P			157
+#define OPAL_QUIESCE				158
 #define OPAL_NPU_SPA_SETUP			159
 #define OPAL_NPU_SPA_CLEAR_CACHE		160
 #define OPAL_NPU_TL_SET				161
@@ -209,6 +210,12 @@
 #define OPAL_PCI_SET_PBCQ_TUNNEL_BAR		165
 #define OPAL_LAST				165
 
+#define QUIESCE_HOLD			1 /* Spin all calls at entry */
+#define QUIESCE_REJECT			2 /* Fail all calls with OPAL_BUSY */
+#define QUIESCE_LOCK_BREAK		3 /* Set to ignore locks. */
+#define QUIESCE_RESUME			4 /* Un-quiesce */
+#define QUIESCE_RESUME_FAST_REBOOT	5 /* Un-quiesce, fast reboot */
+
 /* Device tree flags */
 
 /*
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 3960def0e39c4..1dbeb6cd68faa 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -294,6 +294,7 @@ int opal_set_power_shift_ratio(u32 handle, int token, u32 psr);
 int opal_sensor_group_clear(u32 group_hndl, int token);
 
 s64 opal_signal_system_reset(s32 cpu);
+s64 opal_quiesce(u64 shutdown_type, s32 cpu);
 
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 8482df255969c..a8d9b4089c315 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -320,6 +320,7 @@ OPAL_CALL(opal_set_powercap,			OPAL_SET_POWERCAP);
 OPAL_CALL(opal_get_power_shift_ratio,		OPAL_GET_POWER_SHIFT_RATIO);
 OPAL_CALL(opal_set_power_shift_ratio,		OPAL_SET_POWER_SHIFT_RATIO);
 OPAL_CALL(opal_sensor_group_clear,		OPAL_SENSOR_GROUP_CLEAR);
+OPAL_CALL(opal_quiesce,				OPAL_QUIESCE);
 OPAL_CALL(opal_npu_spa_setup,			OPAL_NPU_SPA_SETUP);
 OPAL_CALL(opal_npu_spa_clear_cache,		OPAL_NPU_SPA_CLEAR_CACHE);
 OPAL_CALL(opal_npu_tl_set,			OPAL_NPU_TL_SET);
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 19af6de6b6f00..b80909957792f 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -334,7 +334,16 @@ static int pnv_cause_nmi_ipi(int cpu)
 	int64_t rc;
 
 	if (cpu >= 0) {
-		rc = opal_signal_system_reset(get_hard_smp_processor_id(cpu));
+		int h = get_hard_smp_processor_id(cpu);
+
+		if (opal_check_token(OPAL_QUIESCE))
+			opal_quiesce(QUIESCE_HOLD, h);
+
+		rc = opal_signal_system_reset(h);
+
+		if (opal_check_token(OPAL_QUIESCE))
+			opal_quiesce(QUIESCE_RESUME, h);
+
 		if (rc != OPAL_SUCCESS)
 			return 0;
 		return 1;
@@ -343,6 +352,8 @@ static int pnv_cause_nmi_ipi(int cpu)
 		bool success = true;
 		int c;
 
+		if (opal_check_token(OPAL_QUIESCE))
+			opal_quiesce(QUIESCE_HOLD, -1);
 
 		/*
 		 * We do not use broadcasts (yet), because it's not clear
@@ -358,6 +369,10 @@ static int pnv_cause_nmi_ipi(int cpu)
 			if (rc != OPAL_SUCCESS)
 				success = false;
 		}
+
+		if (opal_check_token(OPAL_QUIESCE))
+			opal_quiesce(QUIESCE_RESUME, -1);
+
 		if (success)
 			return 1;
 

From 56c0b48b1e443efa5d6f4d60513302c934e55b17 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 11 May 2018 03:20:05 +1000
Subject: [PATCH 155/251] powerpc/powernv: process all OPAL event interrupts
 with kopald

Using irq_work for processing OPAL event interrupts is not necessary.
irq_work is typically used to schedule work from NMI context, a
softirq may be more appropriate. However OPAL events are not
particularly performance or latency critical, so they can all be
invoked by kopald.

This patch removes the irq_work queueing, and instead wakes up
kopald when there is an event to be processed. kopald processes
interrupts individually, enabling irqs and calling cond_resched
between each one to minimise latencies.

Event handlers themselves should still use threaded handlers,
workqueues, etc. as necessary to avoid high interrupts-off latencies
within any single interrupt.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/opal-irqchip.c | 87 +++++++++----------
 arch/powerpc/platforms/powernv/opal.c         | 23 +++--
 arch/powerpc/platforms/powernv/powernv.h      |  3 +-
 3 files changed, 52 insertions(+), 61 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
index 05ffe05f0fdc7..605c7e5d52c2c 100644
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -22,7 +22,6 @@
 #include <linux/kthread.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
-#include <linux/irq_work.h>
 
 #include <asm/machdep.h>
 #include <asm/opal.h>
@@ -38,37 +37,47 @@ struct opal_event_irqchip {
 	unsigned long mask;
 };
 static struct opal_event_irqchip opal_event_irqchip;
-
+static u64 last_outstanding_events;
 static unsigned int opal_irq_count;
 static unsigned int *opal_irqs;
 
-static void opal_handle_irq_work(struct irq_work *work);
-static u64 last_outstanding_events;
-static struct irq_work opal_event_irq_work = {
-	.func = opal_handle_irq_work,
-};
-
-void opal_handle_events(uint64_t events)
+void opal_handle_events(void)
 {
-	int virq, hwirq = 0;
-	u64 mask = opal_event_irqchip.mask;
+	__be64 events = 0;
+	u64 e;
+
+	e = READ_ONCE(last_outstanding_events) & opal_event_irqchip.mask;
+again:
+	while (e) {
+		int virq, hwirq;
+
+		hwirq = fls64(e) - 1;
+		e &= ~BIT_ULL(hwirq);
+
+		local_irq_disable();
+		virq = irq_find_mapping(opal_event_irqchip.domain, hwirq);
+		if (virq) {
+			irq_enter();
+			generic_handle_irq(virq);
+			irq_exit();
+		}
+		local_irq_enable();
 
-	if (!in_irq() && (events & mask)) {
-		last_outstanding_events = events;
-		irq_work_queue(&opal_event_irq_work);
-		return;
+		cond_resched();
 	}
+	last_outstanding_events = 0;
+	if (opal_poll_events(&events) != OPAL_SUCCESS)
+		return;
+	e = be64_to_cpu(events) & opal_event_irqchip.mask;
+	if (e)
+		goto again;
+}
 
-	while (events & mask) {
-		hwirq = fls64(events) - 1;
-		if (BIT_ULL(hwirq) & mask) {
-			virq = irq_find_mapping(opal_event_irqchip.domain,
-						hwirq);
-			if (virq)
-				generic_handle_irq(virq);
-		}
-		events &= ~BIT_ULL(hwirq);
-	}
+bool opal_have_pending_events(void)
+{
+	if (last_outstanding_events & opal_event_irqchip.mask)
+		return true;
+	return false;
 }
 
 static void opal_event_mask(struct irq_data *d)
@@ -78,24 +87,9 @@ static void opal_event_mask(struct irq_data *d)
 
 static void opal_event_unmask(struct irq_data *d)
 {
-	__be64 events;
-
 	set_bit(d->hwirq, &opal_event_irqchip.mask);
-
-	opal_poll_events(&events);
-	last_outstanding_events = be64_to_cpu(events);
-
-	/*
-	 * We can't just handle the events now with opal_handle_events().
-	 * If we did we would deadlock when opal_event_unmask() is called from
-	 * handle_level_irq() with the irq descriptor lock held, because
-	 * calling opal_handle_events() would call generic_handle_irq() and
-	 * then handle_level_irq() which would try to take the descriptor lock
-	 * again. Instead queue the events for later.
-	 */
-	if (last_outstanding_events & opal_event_irqchip.mask)
-		/* Need to retrigger the interrupt */
-		irq_work_queue(&opal_event_irq_work);
+	if (opal_have_pending_events())
+		opal_wake_poller();
 }
 
 static int opal_event_set_type(struct irq_data *d, unsigned int flow_type)
@@ -136,16 +130,13 @@ static irqreturn_t opal_interrupt(int irq, void *data)
 	__be64 events;
 
 	opal_handle_interrupt(virq_to_hw(irq), &events);
-	opal_handle_events(be64_to_cpu(events));
+	last_outstanding_events = be64_to_cpu(events);
+	if (opal_have_pending_events())
+		opal_wake_poller();
 
 	return IRQ_HANDLED;
 }
 
-static void opal_handle_irq_work(struct irq_work *work)
-{
-	opal_handle_events(last_outstanding_events);
-}
-
 static int opal_event_match(struct irq_domain *h, struct device_node *node,
 			    enum irq_domain_bus_token bus_token)
 {
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 48fbb41af5d15..0d539c6617481 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -540,21 +540,15 @@ int opal_hmi_exception_early(struct pt_regs *regs)
 /* HMI exception handler called in virtual mode during check_irq_replay. */
 int opal_handle_hmi_exception(struct pt_regs *regs)
 {
-	s64 rc;
-	__be64 evt = 0;
-
 	/*
 	 * Check if HMI event is available.
-	 * if Yes, then call opal_poll_events to pull opal messages and
-	 * process them.
+	 * if Yes, then wake kopald to process them.
 	 */
 	if (!local_paca->hmi_event_available)
 		return 0;
 
 	local_paca->hmi_event_available = 0;
-	rc = opal_poll_events(&evt);
-	if (rc == OPAL_SUCCESS && evt)
-		opal_handle_events(be64_to_cpu(evt));
+	opal_wake_poller();
 
 	return 1;
 }
@@ -757,14 +751,19 @@ static void __init opal_imc_init_dev(void)
 static int kopald(void *unused)
 {
 	unsigned long timeout = msecs_to_jiffies(opal_heartbeat) + 1;
-	__be64 events;
 
 	set_freezable();
 	do {
 		try_to_freeze();
-		opal_poll_events(&events);
-		opal_handle_events(be64_to_cpu(events));
-		schedule_timeout_interruptible(timeout);
+
+		opal_handle_events();
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (opal_have_pending_events())
+			__set_current_state(TASK_RUNNING);
+		else
+			schedule_timeout(timeout);
+
 	} while (!kthread_should_stop());
 
 	return 0;
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index 94f17ab1374bd..fd4a1c5a6369f 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -24,7 +24,8 @@ extern u32 pnv_get_supported_cpuidle_states(void);
 
 extern void pnv_lpc_init(void);
 
-extern void opal_handle_events(uint64_t events);
+extern void opal_handle_events(void);
+extern bool opal_have_pending_events(void);
 extern void opal_event_shutdown(void);
 
 bool cpu_core_split_required(void);

From 819844285ef2b5d15466f5b5062514135ffba06c Mon Sep 17 00:00:00 2001
From: Alastair D'Silva <alastair@d-silva.org>
Date: Fri, 11 May 2018 16:12:57 +1000
Subject: [PATCH 156/251] powerpc: Add TIDR CPU feature for POWER9

This patch adds a CPU feature bit to show whether the CPU has
the TIDR register available, enabling as_notify/wait in userspace.

Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
Reviewed-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/cputable.h | 3 ++-
 arch/powerpc/kernel/dt_cpu_ftrs.c   | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 66fcab13c8b42..9c0a3083571ba 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -215,6 +215,7 @@ static inline void cpu_feature_keys_init(void) { }
 #define CPU_FTR_P9_TM_HV_ASSIST		LONG_ASM_CONST(0x0000100000000000)
 #define CPU_FTR_P9_TM_XER_SO_BUG	LONG_ASM_CONST(0x0000200000000000)
 #define CPU_FTR_P9_TLBIE_BUG		LONG_ASM_CONST(0x0000400000000000)
+#define CPU_FTR_P9_TIDR			LONG_ASM_CONST(0x0000800000000000)
 
 #ifndef __ASSEMBLY__
 
@@ -462,7 +463,7 @@ static inline void cpu_feature_keys_init(void) { }
 	    CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
 	    CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
 	    CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \
-	    CPU_FTR_P9_TLBIE_BUG)
+	    CPU_FTR_P9_TLBIE_BUG | CPU_FTR_P9_TIDR)
 #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \
 			     (~CPU_FTR_SAO))
 #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index c904477abaf38..4be1c0de9406b 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -717,6 +717,7 @@ static __init void cpufeatures_cpu_quirks(void)
 	if ((version & 0xffff0000) == 0x004e0000) {
 		cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
 		cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
+		cur_cpu_spec->cpu_features |= CPU_FTR_P9_TIDR;
 	}
 
 	/*

From 3449f191ca9be1a6ac9757b8ab55f239092362e5 Mon Sep 17 00:00:00 2001
From: Alastair D'Silva <alastair@d-silva.org>
Date: Fri, 11 May 2018 16:12:58 +1000
Subject: [PATCH 157/251] powerpc: Use TIDR CPU feature to control TIDR
 allocation

Switch the use of TIDR on it's CPU feature, rather than assuming it
is available based on architecture.

Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
Reviewed-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/process.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 8f35b30956f43..e8b1d3c306695 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1151,7 +1151,7 @@ static inline void restore_sprs(struct thread_struct *old_thread,
 			mtspr(SPRN_TAR, new_thread->tar);
 	}
 
-	if (cpu_has_feature(CPU_FTR_ARCH_300) &&
+	if (cpu_has_feature(CPU_FTR_P9_TIDR) &&
 	    old_thread->tidr != new_thread->tidr)
 		mtspr(SPRN_TIDR, new_thread->tidr);
 #endif
@@ -1553,7 +1553,7 @@ void clear_thread_tidr(struct task_struct *t)
 	if (!t->thread.tidr)
 		return;
 
-	if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+	if (!cpu_has_feature(CPU_FTR_P9_TIDR)) {
 		WARN_ON_ONCE(1);
 		return;
 	}
@@ -1576,7 +1576,7 @@ int set_thread_tidr(struct task_struct *t)
 {
 	int rc;
 
-	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+	if (!cpu_has_feature(CPU_FTR_P9_TIDR))
 		return -EINVAL;
 
 	if (t != current)

From 71cc64a85d8d99936f6851709a07f18c87a0adab Mon Sep 17 00:00:00 2001
From: Alastair D'Silva <alastair@d-silva.org>
Date: Fri, 11 May 2018 16:12:59 +1000
Subject: [PATCH 158/251] powerpc: use task_pid_nr() for TID allocation

The current implementation of TID allocation, using a global IDR, may
result in an errant process starving the system of available TIDs.
Instead, use task_pid_nr(), as mentioned by the original author. The
scenario described which prevented it's use is not applicable, as
set_thread_tidr can only be called after the task struct has been
populated.

In the unlikely event that 2 threads share the TID and are waiting,
all potential outcomes have been determined safe.

Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
Reviewed-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/switch_to.h |   1 -
 arch/powerpc/kernel/process.c        | 122 ++++++---------------------
 2 files changed, 28 insertions(+), 95 deletions(-)

diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index be8c9fa239834..5b03d8a82409a 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -94,6 +94,5 @@ static inline void clear_task_ebb(struct task_struct *t)
 extern int set_thread_uses_vas(void);
 
 extern int set_thread_tidr(struct task_struct *t);
-extern void clear_thread_tidr(struct task_struct *t);
 
 #endif /* _ASM_POWERPC_SWITCH_TO_H */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index e8b1d3c306695..ebcd3956f2be2 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1479,103 +1479,41 @@ int set_thread_uses_vas(void)
 }
 
 #ifdef CONFIG_PPC64
-static DEFINE_SPINLOCK(vas_thread_id_lock);
-static DEFINE_IDA(vas_thread_ida);
-
-/*
- * We need to assign a unique thread id to each thread in a process.
+/**
+ * Assign a TIDR (thread ID) for task @t and set it in the thread
+ * structure. For now, we only support setting TIDR for 'current' task.
  *
- * This thread id, referred to as TIDR, and separate from the Linux's tgid,
- * is intended to be used to direct an ASB_Notify from the hardware to the
- * thread, when a suitable event occurs in the system.
+ * Since the TID value is a truncated form of it PID, it is possible
+ * (but unlikely) for 2 threads to have the same TID. In the unlikely event
+ * that 2 threads share the same TID and are waiting, one of the following
+ * cases will happen:
  *
- * One such event is a "paste" instruction in the context of Fast Thread
- * Wakeup (aka Core-to-core wake up in the Virtual Accelerator Switchboard
- * (VAS) in POWER9.
+ * 1. The correct thread is running, the wrong thread is not
+ * In this situation, the correct thread is woken and proceeds to pass it's
+ * condition check.
  *
- * To get a unique TIDR per process we could simply reuse task_pid_nr() but
- * the problem is that task_pid_nr() is not yet available copy_thread() is
- * called. Fixing that would require changing more intrusive arch-neutral
- * code in code path in copy_process()?.
+ * 2. Neither threads are running
+ * In this situation, neither thread will be woken. When scheduled, the waiting
+ * threads will execute either a wait, which will return immediately, followed
+ * by a condition check, which will pass for the correct thread and fail
+ * for the wrong thread, or they will execute the condition check immediately.
  *
- * Further, to assign unique TIDRs within each process, we need an atomic
- * field (or an IDR) in task_struct, which again intrudes into the arch-
- * neutral code. So try to assign globally unique TIDRs for now.
+ * 3. The wrong thread is running, the correct thread is not
+ * The wrong thread will be woken, but will fail it's condition check and
+ * re-execute wait. The correct thread, when scheduled, will execute either
+ * it's condition check (which will pass), or wait, which returns immediately
+ * when called the first time after the thread is scheduled, followed by it's
+ * condition check (which will pass).
  *
- * NOTE: TIDR 0 indicates that the thread does not need a TIDR value.
- *	 For now, only threads that expect to be notified by the VAS
- *	 hardware need a TIDR value and we assign values > 0 for those.
- */
-#define MAX_THREAD_CONTEXT	((1 << 16) - 1)
-static int assign_thread_tidr(void)
-{
-	int index;
-	int err;
-	unsigned long flags;
-
-again:
-	if (!ida_pre_get(&vas_thread_ida, GFP_KERNEL))
-		return -ENOMEM;
-
-	spin_lock_irqsave(&vas_thread_id_lock, flags);
-	err = ida_get_new_above(&vas_thread_ida, 1, &index);
-	spin_unlock_irqrestore(&vas_thread_id_lock, flags);
-
-	if (err == -EAGAIN)
-		goto again;
-	else if (err)
-		return err;
-
-	if (index > MAX_THREAD_CONTEXT) {
-		spin_lock_irqsave(&vas_thread_id_lock, flags);
-		ida_remove(&vas_thread_ida, index);
-		spin_unlock_irqrestore(&vas_thread_id_lock, flags);
-		return -ENOMEM;
-	}
-
-	return index;
-}
-
-static void free_thread_tidr(int id)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&vas_thread_id_lock, flags);
-	ida_remove(&vas_thread_ida, id);
-	spin_unlock_irqrestore(&vas_thread_id_lock, flags);
-}
-
-/*
- * Clear any TIDR value assigned to this thread.
- */
-void clear_thread_tidr(struct task_struct *t)
-{
-	if (!t->thread.tidr)
-		return;
-
-	if (!cpu_has_feature(CPU_FTR_P9_TIDR)) {
-		WARN_ON_ONCE(1);
-		return;
-	}
-
-	mtspr(SPRN_TIDR, 0);
-	free_thread_tidr(t->thread.tidr);
-	t->thread.tidr = 0;
-}
-
-void arch_release_task_struct(struct task_struct *t)
-{
-	clear_thread_tidr(t);
-}
-
-/*
- * Assign a unique TIDR (thread id) for task @t and set it in the thread
- * structure. For now, we only support setting TIDR for 'current' task.
+ * 4. Both threads are running
+ * Both threads will be woken. The wrong thread will fail it's condition check
+ * and execute another wait, while the correct thread will pass it's condition
+ * check.
+ *
+ * @t: the task to set the thread ID for
  */
 int set_thread_tidr(struct task_struct *t)
 {
-	int rc;
-
 	if (!cpu_has_feature(CPU_FTR_P9_TIDR))
 		return -EINVAL;
 
@@ -1585,11 +1523,7 @@ int set_thread_tidr(struct task_struct *t)
 	if (t->thread.tidr)
 		return 0;
 
-	rc = assign_thread_tidr();
-	if (rc < 0)
-		return rc;
-
-	t->thread.tidr = rc;
+	t->thread.tidr = (u16)task_pid_nr(t);
 	mtspr(SPRN_TIDR, t->thread.tidr);
 
 	return 0;

From 19df39581ce99eb1fcfb119945810c9c5bc3f8d4 Mon Sep 17 00:00:00 2001
From: Alastair D'Silva <alastair@d-silva.org>
Date: Fri, 11 May 2018 16:13:00 +1000
Subject: [PATCH 159/251] ocxl: Rename pnv_ocxl_spa_remove_pe to clarify it's
 action

The function removes the process element from NPU cache.

Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
Acked-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/pnv-ocxl.h   | 2 +-
 arch/powerpc/platforms/powernv/ocxl.c | 4 ++--
 drivers/misc/ocxl/link.c              | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h
index f6945d3bc9710..208b5503f4edd 100644
--- a/arch/powerpc/include/asm/pnv-ocxl.h
+++ b/arch/powerpc/include/asm/pnv-ocxl.h
@@ -28,7 +28,7 @@ extern int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
 extern int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,
 			void **platform_data);
 extern void pnv_ocxl_spa_release(void *platform_data);
-extern int pnv_ocxl_spa_remove_pe(void *platform_data, int pe_handle);
+extern int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle);
 
 extern int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr);
 extern void pnv_ocxl_free_xive_irq(u32 irq);
diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c
index fa9b53af3c7b1..8c65aacda9c81 100644
--- a/arch/powerpc/platforms/powernv/ocxl.c
+++ b/arch/powerpc/platforms/powernv/ocxl.c
@@ -475,7 +475,7 @@ void pnv_ocxl_spa_release(void *platform_data)
 }
 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release);
 
-int pnv_ocxl_spa_remove_pe(void *platform_data, int pe_handle)
+int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle)
 {
 	struct spa_data *data = (struct spa_data *) platform_data;
 	int rc;
@@ -483,7 +483,7 @@ int pnv_ocxl_spa_remove_pe(void *platform_data, int pe_handle)
 	rc = opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle);
 	return rc;
 }
-EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe);
+EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache);
 
 int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr)
 {
diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
index f30790582dc0a..656e8610eec26 100644
--- a/drivers/misc/ocxl/link.c
+++ b/drivers/misc/ocxl/link.c
@@ -599,7 +599,7 @@ int ocxl_link_remove_pe(void *link_handle, int pasid)
 	 * On powerpc, the entry needs to be cleared from the context
 	 * cache of the NPU.
 	 */
-	rc = pnv_ocxl_spa_remove_pe(link->platform_data, pe_handle);
+	rc = pnv_ocxl_spa_remove_pe_from_cache(link->platform_data, pe_handle);
 	WARN_ON(rc);
 
 	pe_data = radix_tree_delete(&spa->pe_tree, pe_handle);

From e948e06fc63a1c1e36ec4c8e5c510b881ff19c26 Mon Sep 17 00:00:00 2001
From: Alastair D'Silva <alastair@d-silva.org>
Date: Fri, 11 May 2018 16:13:01 +1000
Subject: [PATCH 160/251] ocxl: Expose the thread_id needed for wait on POWER9

In order to successfully issue as_notify, an AFU needs to know the TID
to notify, which in turn means that this information should be
available in userspace so it can be communicated to the AFU.

Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/misc/ocxl/context.c       |  5 ++-
 drivers/misc/ocxl/file.c          | 53 +++++++++++++++++++++++++++++++
 drivers/misc/ocxl/link.c          | 36 +++++++++++++++++++++
 drivers/misc/ocxl/ocxl_internal.h |  1 +
 include/misc/ocxl.h               |  9 ++++++
 include/uapi/misc/ocxl.h          |  8 +++++
 6 files changed, 111 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c
index 909e8807824a9..95f74623113e2 100644
--- a/drivers/misc/ocxl/context.c
+++ b/drivers/misc/ocxl/context.c
@@ -34,6 +34,8 @@ int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu,
 	mutex_init(&ctx->xsl_error_lock);
 	mutex_init(&ctx->irq_lock);
 	idr_init(&ctx->irq_idr);
+	ctx->tidr = 0;
+
 	/*
 	 * Keep a reference on the AFU to make sure it's valid for the
 	 * duration of the life of the context
@@ -65,6 +67,7 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr)
 {
 	int rc;
 
+	// Locks both status & tidr
 	mutex_lock(&ctx->status_mutex);
 	if (ctx->status != OPENED) {
 		rc = -EIO;
@@ -72,7 +75,7 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr)
 	}
 
 	rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid,
-			current->mm->context.id, 0, amr, current->mm,
+			current->mm->context.id, ctx->tidr, amr, current->mm,
 			xsl_fault_error, ctx);
 	if (rc)
 		goto out;
diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
index 038509e5d031f..eb409a469f217 100644
--- a/drivers/misc/ocxl/file.c
+++ b/drivers/misc/ocxl/file.c
@@ -5,6 +5,8 @@
 #include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 #include <uapi/misc/ocxl.h>
+#include <asm/reg.h>
+#include <asm/switch_to.h>
 #include "ocxl_internal.h"
 
 
@@ -123,11 +125,55 @@ static long afu_ioctl_get_metadata(struct ocxl_context *ctx,
 	return 0;
 }
 
+#ifdef CONFIG_PPC64
+static long afu_ioctl_enable_p9_wait(struct ocxl_context *ctx,
+		struct ocxl_ioctl_p9_wait __user *uarg)
+{
+	struct ocxl_ioctl_p9_wait arg;
+
+	memset(&arg, 0, sizeof(arg));
+
+	if (cpu_has_feature(CPU_FTR_P9_TIDR)) {
+		enum ocxl_context_status status;
+
+		// Locks both status & tidr
+		mutex_lock(&ctx->status_mutex);
+		if (!ctx->tidr) {
+			if (set_thread_tidr(current))
+				return -ENOENT;
+
+			ctx->tidr = current->thread.tidr;
+		}
+
+		status = ctx->status;
+		mutex_unlock(&ctx->status_mutex);
+
+		if (status == ATTACHED) {
+			int rc;
+			struct link *link = ctx->afu->fn->link;
+
+			rc = ocxl_link_update_pe(link, ctx->pasid, ctx->tidr);
+			if (rc)
+				return rc;
+		}
+
+		arg.thread_id = ctx->tidr;
+	} else
+		return -ENOENT;
+
+	if (copy_to_user(uarg, &arg, sizeof(arg)))
+		return -EFAULT;
+
+	return 0;
+}
+#endif
+
 #define CMD_STR(x) (x == OCXL_IOCTL_ATTACH ? "ATTACH" :			\
 			x == OCXL_IOCTL_IRQ_ALLOC ? "IRQ_ALLOC" :	\
 			x == OCXL_IOCTL_IRQ_FREE ? "IRQ_FREE" :		\
 			x == OCXL_IOCTL_IRQ_SET_FD ? "IRQ_SET_FD" :	\
 			x == OCXL_IOCTL_GET_METADATA ? "GET_METADATA" :	\
+			x == OCXL_IOCTL_ENABLE_P9_WAIT ? "ENABLE_P9_WAIT" :	\
 			"UNKNOWN")
 
 static long afu_ioctl(struct file *file, unsigned int cmd,
@@ -186,6 +232,13 @@ static long afu_ioctl(struct file *file, unsigned int cmd,
 				(struct ocxl_ioctl_metadata __user *) args);
 		break;
 
+#ifdef CONFIG_PPC64
+	case OCXL_IOCTL_ENABLE_P9_WAIT:
+		rc = afu_ioctl_enable_p9_wait(ctx,
+				(struct ocxl_ioctl_p9_wait __user *) args);
+		break;
+#endif
+
 	default:
 		rc = -EINVAL;
 	}
diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
index 656e8610eec26..88876ae8f3300 100644
--- a/drivers/misc/ocxl/link.c
+++ b/drivers/misc/ocxl/link.c
@@ -544,6 +544,42 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
 }
 EXPORT_SYMBOL_GPL(ocxl_link_add_pe);
 
+int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid)
+{
+	struct link *link = (struct link *) link_handle;
+	struct spa *spa = link->spa;
+	struct ocxl_process_element *pe;
+	int pe_handle, rc;
+
+	if (pasid > SPA_PASID_MAX)
+		return -EINVAL;
+
+	pe_handle = pasid & SPA_PE_MASK;
+	pe = spa->spa_mem + pe_handle;
+
+	mutex_lock(&spa->spa_lock);
+
+	pe->tid = tid;
+
+	/*
+	 * The barrier makes sure the PE is updated
+	 * before we clear the NPU context cache below, so that the
+	 * old PE cannot be reloaded erroneously.
+	 */
+	mb();
+
+	/*
+	 * hook to platform code
+	 * On powerpc, the entry needs to be cleared from the context
+	 * cache of the NPU.
+	 */
+	rc = pnv_ocxl_spa_remove_pe_from_cache(link->platform_data, pe_handle);
+	WARN_ON(rc);
+
+	mutex_unlock(&spa->spa_lock);
+	return rc;
+}
+
 int ocxl_link_remove_pe(void *link_handle, int pasid)
 {
 	struct link *link = (struct link *) link_handle;
diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h
index 5d421824afd92..a32f2151029f6 100644
--- a/drivers/misc/ocxl/ocxl_internal.h
+++ b/drivers/misc/ocxl/ocxl_internal.h
@@ -77,6 +77,7 @@ struct ocxl_context {
 	struct ocxl_xsl_error xsl_error;
 	struct mutex irq_lock;
 	struct idr irq_idr;
+	u16 tidr; // Thread ID used for P9 wait implementation
 };
 
 struct ocxl_process_element {
diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h
index 51ccf76db2936..9ff6ddc28e221 100644
--- a/include/misc/ocxl.h
+++ b/include/misc/ocxl.h
@@ -188,6 +188,15 @@ extern int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
 		void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
 		void *xsl_err_data);
 
+/**
+ * Update values within a Process Element
+ *
+ * link_handle: the link handle associated with the process element
+ * pasid: the PASID for the AFU context
+ * tid: the new thread id for the process element
+ */
+extern int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid);
+
 /*
  * Remove a Process Element from the Shared Process Area for a link
  */
diff --git a/include/uapi/misc/ocxl.h b/include/uapi/misc/ocxl.h
index 0af83d80fb3ea..561e6f0dfcb76 100644
--- a/include/uapi/misc/ocxl.h
+++ b/include/uapi/misc/ocxl.h
@@ -48,6 +48,13 @@ struct ocxl_ioctl_metadata {
 	__u64 reserved[13]; // Total of 16*u64
 };
 
+struct ocxl_ioctl_p9_wait {
+	__u16 thread_id; // The thread ID required to wake this thread
+	__u16 reserved1;
+	__u32 reserved2;
+	__u64 reserved3[3];
+};
+
 struct ocxl_ioctl_irq_fd {
 	__u64 irq_offset;
 	__s32 eventfd;
@@ -62,5 +69,6 @@ struct ocxl_ioctl_irq_fd {
 #define OCXL_IOCTL_IRQ_FREE	_IOW(OCXL_MAGIC, 0x12, __u64)
 #define OCXL_IOCTL_IRQ_SET_FD	_IOW(OCXL_MAGIC, 0x13, struct ocxl_ioctl_irq_fd)
 #define OCXL_IOCTL_GET_METADATA _IOR(OCXL_MAGIC, 0x14, struct ocxl_ioctl_metadata)
+#define OCXL_IOCTL_ENABLE_P9_WAIT	_IOR(OCXL_MAGIC, 0x15, struct ocxl_ioctl_p9_wait)
 
 #endif /* _UAPI_MISC_OCXL_H */

From 02a8e5bc1c06045f36423bd9632ad9f40da18d3f Mon Sep 17 00:00:00 2001
From: Alastair D'Silva <alastair@d-silva.org>
Date: Fri, 11 May 2018 16:13:02 +1000
Subject: [PATCH 161/251] ocxl: Add an IOCTL so userspace knows what OCXL
 features are available

In order for a userspace AFU driver to call the POWER9 specific
OCXL_IOCTL_ENABLE_P9_WAIT, it needs to verify that it can actually
make that call.

Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
Acked-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/misc/ocxl/file.c | 25 +++++++++++++++++++++++++
 include/uapi/misc/ocxl.h |  6 ++++++
 2 files changed, 31 insertions(+)

diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
index eb409a469f217..33ae46ce0a8ae 100644
--- a/drivers/misc/ocxl/file.c
+++ b/drivers/misc/ocxl/file.c
@@ -168,12 +168,32 @@ static long afu_ioctl_enable_p9_wait(struct ocxl_context *ctx,
 }
 #endif
 
+
+static long afu_ioctl_get_features(struct ocxl_context *ctx,
+		struct ocxl_ioctl_features __user *uarg)
+{
+	struct ocxl_ioctl_features arg;
+
+	memset(&arg, 0, sizeof(arg));
+
+#ifdef CONFIG_PPC64
+	if (cpu_has_feature(CPU_FTR_P9_TIDR))
+		arg.flags[0] |= OCXL_IOCTL_FEATURES_FLAGS0_P9_WAIT;
+#endif
+
+	if (copy_to_user(uarg, &arg, sizeof(arg)))
+		return -EFAULT;
+
+	return 0;
+}
+
 #define CMD_STR(x) (x == OCXL_IOCTL_ATTACH ? "ATTACH" :			\
 			x == OCXL_IOCTL_IRQ_ALLOC ? "IRQ_ALLOC" :	\
 			x == OCXL_IOCTL_IRQ_FREE ? "IRQ_FREE" :		\
 			x == OCXL_IOCTL_IRQ_SET_FD ? "IRQ_SET_FD" :	\
 			x == OCXL_IOCTL_GET_METADATA ? "GET_METADATA" :	\
 			x == OCXL_IOCTL_ENABLE_P9_WAIT ? "ENABLE_P9_WAIT" :	\
+			x == OCXL_IOCTL_GET_FEATURES ? "GET_FEATURES" :	\
 			"UNKNOWN")
 
 static long afu_ioctl(struct file *file, unsigned int cmd,
@@ -239,6 +259,11 @@ static long afu_ioctl(struct file *file, unsigned int cmd,
 		break;
 #endif
 
+	case OCXL_IOCTL_GET_FEATURES:
+		rc = afu_ioctl_get_features(ctx,
+				(struct ocxl_ioctl_features __user *) args);
+		break;
+
 	default:
 		rc = -EINVAL;
 	}
diff --git a/include/uapi/misc/ocxl.h b/include/uapi/misc/ocxl.h
index 561e6f0dfcb76..97937cfa3baae 100644
--- a/include/uapi/misc/ocxl.h
+++ b/include/uapi/misc/ocxl.h
@@ -55,6 +55,11 @@ struct ocxl_ioctl_p9_wait {
 	__u64 reserved3[3];
 };
 
+#define OCXL_IOCTL_FEATURES_FLAGS0_P9_WAIT	0x01
+struct ocxl_ioctl_features {
+	__u64 flags[4];
+};
+
 struct ocxl_ioctl_irq_fd {
 	__u64 irq_offset;
 	__s32 eventfd;
@@ -70,5 +75,6 @@ struct ocxl_ioctl_irq_fd {
 #define OCXL_IOCTL_IRQ_SET_FD	_IOW(OCXL_MAGIC, 0x13, struct ocxl_ioctl_irq_fd)
 #define OCXL_IOCTL_GET_METADATA _IOR(OCXL_MAGIC, 0x14, struct ocxl_ioctl_metadata)
 #define OCXL_IOCTL_ENABLE_P9_WAIT	_IOR(OCXL_MAGIC, 0x15, struct ocxl_ioctl_p9_wait)
+#define OCXL_IOCTL_GET_FEATURES _IOR(OCXL_MAGIC, 0x16, struct ocxl_ioctl_features)
 
 #endif /* _UAPI_MISC_OCXL_H */

From 721c551d31fb441ff3be701ad3be14cf6e0aca3f Mon Sep 17 00:00:00 2001
From: Alastair D'Silva <alastair@d-silva.org>
Date: Fri, 11 May 2018 16:13:03 +1000
Subject: [PATCH 162/251] ocxl: Document new OCXL IOCTLs

Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
Acked-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 Documentation/accelerators/ocxl.rst | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/Documentation/accelerators/ocxl.rst b/Documentation/accelerators/ocxl.rst
index ddcc58d01cfbc..14cefc020e2d5 100644
--- a/Documentation/accelerators/ocxl.rst
+++ b/Documentation/accelerators/ocxl.rst
@@ -157,6 +157,17 @@ OCXL_IOCTL_GET_METADATA:
   Obtains configuration information from the card, such at the size of
   MMIO areas, the AFU version, and the PASID for the current context.
 
+OCXL_IOCTL_ENABLE_P9_WAIT:
+
+  Allows the AFU to wake a userspace thread executing 'wait'. Returns
+  information to userspace to allow it to configure the AFU. Note that
+  this is only available on POWER9.
+
+OCXL_IOCTL_GET_FEATURES:
+
+  Reports on which CPU features that affect OpenCAPI are usable from
+  userspace.
+
 
 mmap
 ----

From f069ff396d657ac7bdb5de866c3ec28b8d08d953 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Tue, 29 May 2018 19:58:38 +0530
Subject: [PATCH 163/251] powerpc/mm/hugetlb: Update huge_ptep_set_access_flags
 to call __ptep_set_access_flags directly

In a later patch, we want to update __ptep_set_access_flags take page size
arg. This makes ptep_set_access_flags only work with mmu_virtual_psize.
To simplify the code make huge_ptep_set_access_flags directly call
__ptep_set_access_flags so that we can compute the hugetlb page size in
hugetlb function.

Now that ptep_set_access_flags won't be called for hugetlb remove
the is_vm_hugetlb_page() check and add the assert of pte lock
unconditionally.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/hugetlb.h | 19 +++--------------
 arch/powerpc/mm/pgtable.c          | 33 ++++++++++++++++++++++++++++--
 2 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 96444bc08034c..3225eb6402cc6 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -166,22 +166,9 @@ static inline pte_t huge_pte_wrprotect(pte_t pte)
 	return pte_wrprotect(pte);
 }
 
-static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
-					     unsigned long addr, pte_t *ptep,
-					     pte_t pte, int dirty)
-{
-#ifdef HUGETLB_NEED_PRELOAD
-	/*
-	 * The "return 1" forces a call of update_mmu_cache, which will write a
-	 * TLB entry.  Without this, platforms that don't do a write of the TLB
-	 * entry in the TLB miss handler asm will fault ad infinitum.
-	 */
-	ptep_set_access_flags(vma, addr, ptep, pte, dirty);
-	return 1;
-#else
-	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
-#endif
-}
+extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+				      unsigned long addr, pte_t *ptep,
+				      pte_t pte, int dirty);
 
 static inline pte_t huge_ptep_get(pte_t *ptep)
 {
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 9f361ae571e95..e70af99393793 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -221,14 +221,43 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
 	entry = set_access_flags_filter(entry, vma, dirty);
 	changed = !pte_same(*(ptep), entry);
 	if (changed) {
-		if (!is_vm_hugetlb_page(vma))
-			assert_pte_locked(vma->vm_mm, address);
+		assert_pte_locked(vma->vm_mm, address);
 		__ptep_set_access_flags(vma->vm_mm, ptep, entry, address);
 		flush_tlb_page(vma, address);
 	}
 	return changed;
 }
 
+#ifdef CONFIG_HUGETLB_PAGE
+extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+				      unsigned long addr, pte_t *ptep,
+				      pte_t pte, int dirty)
+{
+#ifdef HUGETLB_NEED_PRELOAD
+	/*
+	 * The "return 1" forces a call of update_mmu_cache, which will write a
+	 * TLB entry.  Without this, platforms that don't do a write of the TLB
+	 * entry in the TLB miss handler asm will fault ad infinitum.
+	 */
+	ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+	return 1;
+#else
+	int changed;
+
+	pte = set_access_flags_filter(pte, vma, dirty);
+	changed = !pte_same(*(ptep), pte);
+	if (changed) {
+#ifdef CONFIG_DEBUG_VM
+		assert_spin_locked(&vma->vm_mm->page_table_lock);
+#endif
+		__ptep_set_access_flags(vma->vm_mm, ptep, pte, addr);
+		flush_hugetlb_page(vma, addr);
+	}
+	return changed;
+#endif
+}
+#endif /* CONFIG_HUGETLB_PAGE */
+
 #ifdef CONFIG_DEBUG_VM
 void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
 {

From 044003b52a78bcbda7103633c351da16505096cf Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Tue, 29 May 2018 19:58:39 +0530
Subject: [PATCH 164/251] powerpc/mm/radix: Move function from radix.h to
 pgtable-radix.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In later patch we will update them which require them to be moved
to pgtable-radix.c. Keeping the function in radix.h results in
compile warning as below.

./arch/powerpc/include/asm/book3s/64/radix.h: In function ‘radix__ptep_set_access_flags’:
./arch/powerpc/include/asm/book3s/64/radix.h:196:28: error: dereferencing pointer to incomplete type ‘struct vm_area_struct’
  struct mm_struct *mm = vma->vm_mm;
                            ^~
./arch/powerpc/include/asm/book3s/64/radix.h:204:6: error: implicit declaration of function ‘atomic_read’; did you mean ‘__atomic_load’? [-Werror=implicit-function-declaration]
      atomic_read(&mm->context.copros) > 0) {
      ^~~~~~~~~~~
      __atomic_load
./arch/powerpc/include/asm/book3s/64/radix.h:204:21: error: dereferencing pointer to incomplete type ‘struct mm_struct’
      atomic_read(&mm->context.copros) > 0) {

Instead of fixing header dependencies, we move the function to pgtable-radix.c
Also the function is now large to be a static inline . Doing the
move in separate patch helps in review.

No functional change in this patch. Only code movement.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/radix.h | 31 +++-------------------
 arch/powerpc/mm/pgtable-radix.c            | 22 +++++++++++++++
 2 files changed, 25 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 705193e7192fb..36ed025b4e139 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -124,6 +124,9 @@ extern void radix__mark_rodata_ro(void);
 extern void radix__mark_initmem_nx(void);
 #endif
 
+extern void radix__ptep_set_access_flags(struct mm_struct *mm, pte_t *ptep,
+					 pte_t entry, unsigned long address);
+
 static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
 					       unsigned long set)
 {
@@ -190,34 +193,6 @@ static inline pte_t radix__ptep_get_and_clear_full(struct mm_struct *mm,
 	return __pte(old_pte);
 }
 
-/*
- * Set the dirty and/or accessed bits atomically in a linux PTE, this
- * function doesn't need to invalidate tlb.
- */
-static inline void radix__ptep_set_access_flags(struct mm_struct *mm,
-						pte_t *ptep, pte_t entry,
-						unsigned long address)
-{
-
-	unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
-					      _PAGE_RW | _PAGE_EXEC);
-
-	if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
-
-		unsigned long old_pte, new_pte;
-
-		old_pte = __radix_pte_update(ptep, ~0, 0);
-		/*
-		 * new value of pte
-		 */
-		new_pte = old_pte | set;
-		radix__flush_tlb_pte_p9_dd1(old_pte, mm, address);
-		__radix_pte_update(ptep, 0, new_pte);
-	} else
-		__radix_pte_update(ptep, 0, set);
-	asm volatile("ptesync" : : : "memory");
-}
-
 static inline int radix__pte_same(pte_t pte_a, pte_t pte_b)
 {
 	return ((pte_raw(pte_a) ^ pte_raw(pte_b)) == 0);
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index ce24d72ea6799..a6eec41dd3475 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -1084,3 +1084,25 @@ int radix__has_transparent_hugepage(void)
 	return 0;
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+void radix__ptep_set_access_flags(struct mm_struct *mm,
+				  pte_t *ptep, pte_t entry,
+				  unsigned long address)
+{
+	unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
+					      _PAGE_RW | _PAGE_EXEC);
+
+	if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+		unsigned long old_pte, new_pte;
+
+		old_pte = __radix_pte_update(ptep, ~0, 0);
+		/*
+		 * new value of pte
+		 */
+		new_pte = old_pte | set;
+		radix__flush_tlb_pte_p9_dd1(old_pte, mm, address);
+		__radix_pte_update(ptep, 0, new_pte);
+	} else
+		__radix_pte_update(ptep, 0, set);
+	asm volatile("ptesync" : : : "memory");
+}

From e4c1112c3fc503fc78379fa61450bfda3f0717fe Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Tue, 29 May 2018 19:58:40 +0530
Subject: [PATCH 165/251] powerpc/mm: Change function prototype

In later patch, we use the vma and psize to do tlb flush. Do the prototype
update in separate patch to make the review easy.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/32/pgtable.h |  5 +++--
 arch/powerpc/include/asm/book3s/64/pgtable.h |  8 +++++---
 arch/powerpc/include/asm/book3s/64/radix.h   |  5 +++--
 arch/powerpc/include/asm/nohash/32/pgtable.h |  5 +++--
 arch/powerpc/include/asm/nohash/64/pgtable.h |  5 +++--
 arch/powerpc/mm/pgtable-book3s64.c           |  8 ++++++--
 arch/powerpc/mm/pgtable-radix.c              |  6 +++---
 arch/powerpc/mm/pgtable.c                    | 18 +++++++++++++++---
 8 files changed, 41 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index c615abdce119e..39d3a4245694a 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -235,9 +235,10 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 }
 
 
-static inline void __ptep_set_access_flags(struct mm_struct *mm,
+static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
 					   pte_t *ptep, pte_t entry,
-					   unsigned long address)
+					   unsigned long address,
+					   int psize)
 {
 	unsigned long set = pte_val(entry) &
 		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index c233915abb68e..42fe7c2ff2df9 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -767,12 +767,14 @@ static inline bool check_pte_access(unsigned long access, unsigned long ptev)
  * Generic functions with hash/radix callbacks
  */
 
-static inline void __ptep_set_access_flags(struct mm_struct *mm,
+static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
 					   pte_t *ptep, pte_t entry,
-					   unsigned long address)
+					   unsigned long address,
+					   int psize)
 {
 	if (radix_enabled())
-		return radix__ptep_set_access_flags(mm, ptep, entry, address);
+		return radix__ptep_set_access_flags(vma, ptep, entry,
+						    address, psize);
 	return hash__ptep_set_access_flags(ptep, entry);
 }
 
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 36ed025b4e139..62a73a7a78a40 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -124,8 +124,9 @@ extern void radix__mark_rodata_ro(void);
 extern void radix__mark_initmem_nx(void);
 #endif
 
-extern void radix__ptep_set_access_flags(struct mm_struct *mm, pte_t *ptep,
-					 pte_t entry, unsigned long address);
+extern void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
+					 pte_t entry, unsigned long address,
+					 int psize);
 
 static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
 					       unsigned long set)
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 987a658b18e1c..c2471bac86b9d 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -256,9 +256,10 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 }
 
 
-static inline void __ptep_set_access_flags(struct mm_struct *mm,
+static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
 					   pte_t *ptep, pte_t entry,
-					   unsigned long address)
+					   unsigned long address,
+					   int psize)
 {
 	unsigned long set = pte_val(entry) &
 		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index de78eda5f8414..180161d714fb2 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -281,9 +281,10 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
 /* Set the dirty and/or accessed bits atomically in a linux PTE, this
  * function doesn't need to flush the hash entry
  */
-static inline void __ptep_set_access_flags(struct mm_struct *mm,
+static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
 					   pte_t *ptep, pte_t entry,
-					   unsigned long address)
+					   unsigned long address,
+					   int psize)
 {
 	unsigned long bits = pte_val(entry) &
 		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index abda2b92f1baa..4a8150481a889 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -46,8 +46,12 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
 #endif
 	changed = !pmd_same(*(pmdp), entry);
 	if (changed) {
-		__ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp),
-					pmd_pte(entry), address);
+		/*
+		 * We can use MMU_PAGE_2M here, because only radix
+		 * path look at the psize.
+		 */
+		__ptep_set_access_flags(vma, pmdp_ptep(pmdp),
+					pmd_pte(entry), address, MMU_PAGE_2M);
 		flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
 	}
 	return changed;
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index a6eec41dd3475..2034cbc9aa560 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -1085,10 +1085,10 @@ int radix__has_transparent_hugepage(void)
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-void radix__ptep_set_access_flags(struct mm_struct *mm,
-				  pte_t *ptep, pte_t entry,
-				  unsigned long address)
+void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
+				  pte_t entry, unsigned long address, int psize)
 {
+	struct mm_struct *mm = vma->vm_mm;
 	unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
 					      _PAGE_RW | _PAGE_EXEC);
 
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index e70af99393793..20cacd33e5be2 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -222,7 +222,8 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
 	changed = !pte_same(*(ptep), entry);
 	if (changed) {
 		assert_pte_locked(vma->vm_mm, address);
-		__ptep_set_access_flags(vma->vm_mm, ptep, entry, address);
+		__ptep_set_access_flags(vma, ptep, entry,
+					address, mmu_virtual_psize);
 		flush_tlb_page(vma, address);
 	}
 	return changed;
@@ -242,15 +243,26 @@ extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 	ptep_set_access_flags(vma, addr, ptep, pte, dirty);
 	return 1;
 #else
-	int changed;
+	int changed, psize;
 
 	pte = set_access_flags_filter(pte, vma, dirty);
 	changed = !pte_same(*(ptep), pte);
 	if (changed) {
+
+#ifdef CONFIG_PPC_BOOK3S_64
+		struct hstate *hstate = hstate_file(vma->vm_file);
+		psize = hstate_get_psize(hstate);
+#else
+		/*
+		 * Not used on non book3s64 platforms. But 8xx
+		 * can possibly use tsize derived from hstate.
+		 */
+		psize = 0;
+#endif
 #ifdef CONFIG_DEBUG_VM
 		assert_spin_locked(&vma->vm_mm->page_table_lock);
 #endif
-		__ptep_set_access_flags(vma->vm_mm, ptep, pte, addr);
+		__ptep_set_access_flags(vma, ptep, pte, addr, psize);
 		flush_hugetlb_page(vma, addr);
 	}
 	return changed;

From bd5050e38aec3055ff4257ade987d808ac93b582 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Tue, 29 May 2018 19:58:41 +0530
Subject: [PATCH 166/251] powerpc/mm/radix: Change pte relax sequence to handle
 nest MMU hang

When relaxing access (read -> read_write update), pte needs to be marked invalid
to handle a nest MMU bug. We also need to do a tlb flush after the pte is
marked invalid before updating the pte with new access bits.

We also move tlb flush to platform specific __ptep_set_access_flags. This will
help us to gerid of unnecessary tlb flush on BOOK3S 64 later. We don't do that
in this patch. This also helps in avoiding multiple tlbies with coprocessor
attached.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/32/pgtable.h |  2 ++
 arch/powerpc/include/asm/nohash/32/pgtable.h |  2 ++
 arch/powerpc/include/asm/nohash/64/pgtable.h |  2 ++
 arch/powerpc/include/asm/pgtable.h           |  1 +
 arch/powerpc/mm/pgtable-book3s64.c           |  1 -
 arch/powerpc/mm/pgtable-radix.c              | 14 ++++++++++----
 arch/powerpc/mm/pgtable.c                    |  2 --
 7 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 39d3a4245694a..02f5acd7ccc4d 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -245,6 +245,8 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
 	unsigned long clr = ~pte_val(entry) & _PAGE_RO;
 
 	pte_update(ptep, clr, set);
+
+	flush_tlb_page(vma, address);
 }
 
 #define __HAVE_ARCH_PTE_SAME
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index c2471bac86b9d..7c46a98cc7f47 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -266,6 +266,8 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
 	unsigned long clr = ~pte_val(entry) & (_PAGE_RO | _PAGE_NA);
 
 	pte_update(ptep, clr, set);
+
+	flush_tlb_page(vma, address);
 }
 
 static inline int pte_young(pte_t pte)
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 180161d714fb2..dd0c7236208f2 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -304,6 +304,8 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
 	unsigned long old = pte_val(*ptep);
 	*ptep = __pte(old | bits);
 #endif
+
+	flush_tlb_page(vma, address);
 }
 
 #define __HAVE_ARCH_PTE_SAME
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index ab7d2d996be4d..14c79a7dc8550 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -8,6 +8,7 @@
 #include <asm/processor.h>		/* For TASK_SIZE */
 #include <asm/mmu.h>
 #include <asm/page.h>
+#include <asm/tlbflush.h>
 
 struct mm_struct;
 
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 4a8150481a889..82fed87289de8 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -52,7 +52,6 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
 		 */
 		__ptep_set_access_flags(vma, pmdp_ptep(pmdp),
 					pmd_pte(entry), address, MMU_PAGE_2M);
-		flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
 	}
 	return changed;
 }
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 2034cbc9aa560..0ddfe591cd246 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -1091,8 +1091,12 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
 					      _PAGE_RW | _PAGE_EXEC);
-
-	if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+	/*
+	 * To avoid NMMU hang while relaxing access, we need mark
+	 * the pte invalid in between.
+	 */
+	if (cpu_has_feature(CPU_FTR_POWER9_DD1) ||
+	    atomic_read(&mm->context.copros) > 0) {
 		unsigned long old_pte, new_pte;
 
 		old_pte = __radix_pte_update(ptep, ~0, 0);
@@ -1100,9 +1104,11 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
 		 * new value of pte
 		 */
 		new_pte = old_pte | set;
-		radix__flush_tlb_pte_p9_dd1(old_pte, mm, address);
+		radix__flush_tlb_page_psize(mm, address, psize);
 		__radix_pte_update(ptep, 0, new_pte);
-	} else
+	} else {
 		__radix_pte_update(ptep, 0, set);
+		radix__flush_tlb_page_psize(mm, address, psize);
+	}
 	asm volatile("ptesync" : : : "memory");
 }
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 20cacd33e5be2..5281c2c064af2 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -224,7 +224,6 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
 		assert_pte_locked(vma->vm_mm, address);
 		__ptep_set_access_flags(vma, ptep, entry,
 					address, mmu_virtual_psize);
-		flush_tlb_page(vma, address);
 	}
 	return changed;
 }
@@ -263,7 +262,6 @@ extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 		assert_spin_locked(&vma->vm_mm->page_table_lock);
 #endif
 		__ptep_set_access_flags(vma, ptep, pte, addr, psize);
-		flush_hugetlb_page(vma, addr);
 	}
 	return changed;
 #endif

From e5f7cb58c2b77a0249c2028b6d1ec4d6d420816d Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 1 Jun 2018 20:01:15 +1000
Subject: [PATCH 167/251] powerpc/64s/radix: do not flush TLB when relaxing
 access

Radix flushes the TLB when updating ptes to increase permissiveness
of protection (increase access authority). Book3S does not require
TLB flushing in this case, and it is not done on hash. This patch
avoids the flush for radix.

>From Power ISA v3.0B, p.1090:

    Setting a Reference or Change Bit or Upgrading Access Authority
    (PTE Subject to Atomic Hardware Updates)

    If the only change being made to a valid PTE that is subject to
    atomic hardware updates is to set the Reference or Change bit to 1
    or to add access authorities, a simpler sequence suffices because
    the translation hardware will refetch the PTE if an access is
    attempted for which the only problems were reference and/or change
    bits needing to be set or insufficient access authority.

The nest MMU on POWER9 does not re-fetch the PTE after such an access
attempt before faulting, so address spaces with a coprocessor
attached will continue to flush in these cases.

This reduces tlbies for a kernel compile workload from 1.28M to 0.95M,
tlbiels from 20.17M 19.68M.

fork --fork --exec benchmark improved 2.77% (12000->12300).

Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/pgtable-radix.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 0ddfe591cd246..d6f74cbf0fed0 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -1108,7 +1108,12 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
 		__radix_pte_update(ptep, 0, new_pte);
 	} else {
 		__radix_pte_update(ptep, 0, set);
-		radix__flush_tlb_page_psize(mm, address, psize);
+		/*
+		 * Book3S does not require a TLB flush when relaxing access
+		 * restrictions when the address space is not attached to a
+		 * NMMU, because the core MMU will reload the pte after taking
+		 * an access fault, which is defined by the architectue.
+		 */
 	}
 	asm volatile("ptesync" : : : "memory");
 }

From 6d8278c414cb24ac2b424f50afa99d13a49064b7 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 1 Jun 2018 20:01:16 +1000
Subject: [PATCH 168/251] powerpc/64s/radix: do not flush TLB on spurious fault

In the case of a spurious fault (which can happen due to a race with
another thread that changes the page table), the default Linux mm code
calls flush_tlb_page for that address. This is not required because
the pte will be re-fetched. Hash does not wire this up to a hardware
TLB flush for this reason. This patch avoids the flush for radix.

>From Power ISA v3.0B, p.1090:

    Setting a Reference or Change Bit or Upgrading Access Authority
    (PTE Subject to Atomic Hardware Updates)

    If the only change being made to a valid PTE that is subject to
    atomic hardware updates is to set the Refer- ence or Change bit to
    1 or to add access authorities, a simpler sequence suffices
    because the translation hardware will refetch the PTE if an access
    is attempted for which the only problems were reference and/or
    change bits needing to be set or insufficient access authority.

The nest MMU on POWER9 does not re-fetch the PTE after such an access
attempt before faulting, so address spaces with a coprocessor
attached will continue to flush in these cases.

This reduces tlbies for a kernel compile workload from 0.95M to 0.90M.

fork --fork --exec benchmark improved 0.5% (12300->12400).

Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/tlbflush.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index 0cac17253513d..ebf572ea621ee 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -4,7 +4,7 @@
 
 #define MMU_NO_CONTEXT	~0UL
 
-
+#include <linux/mm_types.h>
 #include <asm/book3s/64/tlbflush-hash.h>
 #include <asm/book3s/64/tlbflush-radix.h>
 
@@ -137,6 +137,16 @@ static inline void flush_all_mm(struct mm_struct *mm)
 #define flush_tlb_page(vma, addr)	local_flush_tlb_page(vma, addr)
 #define flush_all_mm(mm)		local_flush_all_mm(mm)
 #endif /* CONFIG_SMP */
+
+#define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault
+static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
+						unsigned long address)
+{
+	/* See ptep_set_access_flags comment */
+	if (atomic_read(&vma->vm_mm->context.copros) > 0)
+		flush_tlb_page(vma, address);
+}
+
 /*
  * flush the page walk cache for the address
  */

From f569bd94efc821f485cb4742858fdfe16f03c201 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 1 Jun 2018 20:01:17 +1000
Subject: [PATCH 169/251] powerpc/64s/radix: make ptep_get_and_clear_full
 non-atomic for the full case

This matches other architectures, when we know there will be no
further accesses to the address (e.g., for teardown), page table
entries can be cleared non-atomically.

The comments about NMMU are bogus: all MMU notifiers (including NMMU)
are released at this point, with their TLBs flushed. An NMMU access at
this point would be a bug.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/radix.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 62a73a7a78a40..01f6c2ca7ecd5 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -180,14 +180,8 @@ static inline pte_t radix__ptep_get_and_clear_full(struct mm_struct *mm,
 	unsigned long old_pte;
 
 	if (full) {
-		/*
-		 * If we are trying to clear the pte, we can skip
-		 * the DD1 pte update sequence and batch the tlb flush. The
-		 * tlb flush batching is done by mmu gather code. We
-		 * still keep the cmp_xchg update to make sure we get
-		 * correct R/C bit which might be updated via Nest MMU.
-		 */
-		old_pte = __radix_pte_update(ptep, ~0ul, 0);
+		old_pte = pte_val(*ptep);
+		*ptep = __pte(0);
 	} else
 		old_pte = radix__pte_update(mm, addr, ptep, ~0ul, 0, 0);
 

From 68662f85f3c3ea8a6fb61586a05955ab51bbb9e6 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 1 Jun 2018 20:01:18 +1000
Subject: [PATCH 170/251] powerpc/64s/radix: prefetch user address in
 update_mmu_cache

Prefetch the faulting address in update_mmu_cache to give the page
table walker perhaps 100 cycles head start as locks are dropped and
the interrupt completed.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/mem.c              | 4 +++-
 arch/powerpc/mm/pgtable-book3s64.c | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index c3c39b02b2ba7..8cecda4bd66ae 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -509,8 +509,10 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 	 */
 	unsigned long access, trap;
 
-	if (radix_enabled())
+	if (radix_enabled()) {
+		prefetch((void *)address);
 		return;
+	}
 
 	/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
 	if (!pte_young(*ptep) || address >= TASK_SIZE)
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 82fed87289de8..c1f4ca45c93a4 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -152,7 +152,8 @@ pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
 			  pmd_t *pmd)
 {
-	return;
+	if (radix_enabled())
+		prefetch((void *)addr);
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 

From f1cb8f9beba8699dd1b4518418191499e53f7b17 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 1 Jun 2018 20:01:19 +1000
Subject: [PATCH 171/251] powerpc/64s/radix: avoid ptesync after set_pte and
 ptep_set_access_flags

The ISA suggests ptesync after setting a pte, to prevent a table walk
initiated by a subsequent access from missing that store and causing a
spurious fault. This is an architectual allowance that allows an
implementation's page table walker to be incoherent with the store
queue.

However there is no correctness problem in taking a spurious fault in
userspace -- the kernel copes with these at any time, so the updated
pte will be found eventually. Spurious kernel faults on vmap memory
must be avoided, so a ptesync is put into flush_cache_vmap.

On POWER9 so far I have not found a measurable window where this can
result in more minor faults, so as an optimisation, remove the costly
ptesync from pte updates. If an implementation benefits from ptesync,
it would be better to add it back in update_mmu_cache, so it's not
done for things like fork(2).

fork --fork --exec benchmark improved 5.2% (12400->13100).

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/radix.h | 19 ++++++++++++++++++-
 arch/powerpc/include/asm/cacheflush.h      | 13 +++++++++++++
 arch/powerpc/mm/pgtable-radix.c            |  2 +-
 3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 01f6c2ca7ecd5..9c567d243f619 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -202,7 +202,24 @@ static inline void radix__set_pte_at(struct mm_struct *mm, unsigned long addr,
 				 pte_t *ptep, pte_t pte, int percpu)
 {
 	*ptep = pte;
-	asm volatile("ptesync" : : : "memory");
+
+	/*
+	 * The architecture suggests a ptesync after setting the pte, which
+	 * orders the store that updates the pte with subsequent page table
+	 * walk accesses which may load the pte. Without this it may be
+	 * possible for a subsequent access to result in spurious fault.
+	 *
+	 * This is not necessary for correctness, because a spurious fault
+	 * is tolerated by the page fault handler, and this store will
+	 * eventually be seen. In testing, there was no noticable increase
+	 * in user faults on POWER9. Avoiding ptesync here is a significant
+	 * win for things like fork. If a future microarchitecture benefits
+	 * from ptesync, it should probably go into update_mmu_cache, rather
+	 * than set_pte_at (which is used to set ptes unrelated to faults).
+	 *
+	 * Spurious faults to vmalloc region are not tolerated, so there is
+	 * a ptesync in flush_cache_vmap.
+	 */
 }
 
 static inline int radix__pmd_bad(pmd_t pmd)
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index 11843e37d9cf9..e9662648e72d7 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -26,6 +26,19 @@
 #define flush_cache_vmap(start, end)		do { } while (0)
 #define flush_cache_vunmap(start, end)		do { } while (0)
 
+#ifdef CONFIG_BOOK3S_64
+/*
+ * Book3s has no ptesync after setting a pte, so without this ptesync it's
+ * possible for a kernel virtual mapping access to return a spurious fault
+ * if it's accessed right after the pte is set. The page fault handler does
+ * not expect this type of fault. flush_cache_vmap is not exactly the right
+ * place to put this, but it seems to work well enough.
+ */
+#define flush_cache_vmap(start, end)		do { asm volatile("ptesync"); } while (0)
+#else
+#define flush_cache_vmap(start, end)		do { } while (0)
+#endif
+
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *page);
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index d6f74cbf0fed0..96f68c5aa1f5b 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -1115,5 +1115,5 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
 		 * an access fault, which is defined by the architectue.
 		 */
 	}
-	asm volatile("ptesync" : : : "memory");
+	/* See ptesync comment in radix__set_pte_at */
 }

From 85bcfaf69cbd610fdfac3351cf385809a2f4a93b Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 1 Jun 2018 20:01:20 +1000
Subject: [PATCH 172/251] powerpc/64s/radix: optimise pte_update

Implementing pte_update with pte_xchg (which uses cmpxchg) is
inefficient. A single larx/stcx. works fine, no need for the less
efficient cmpxchg sequence.

Then remove the memory barriers from the operation. There is a
requirement for TLB flushing to load mm_cpumask after the store
that reduces pte permissions, which is moved into the TLB flush
code.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/radix.h | 25 +++++++++++-----------
 arch/powerpc/mm/mmu_context.c              |  6 ++++--
 arch/powerpc/mm/tlb-radix.c                | 11 +++++++++-
 3 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 9c567d243f619..ef9f96742ce16 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -131,20 +131,21 @@ extern void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep
 static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
 					       unsigned long set)
 {
-	pte_t pte;
-	unsigned long old_pte, new_pte;
-
-	do {
-		pte = READ_ONCE(*ptep);
-		old_pte = pte_val(pte);
-		new_pte = (old_pte | set) & ~clr;
-
-	} while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
-
-	return old_pte;
+	__be64 old_be, tmp_be;
+
+	__asm__ __volatile__(
+	"1:	ldarx	%0,0,%3		# pte_update\n"
+	"	andc	%1,%0,%5	\n"
+	"	or	%1,%1,%4	\n"
+	"	stdcx.	%1,0,%3		\n"
+	"	bne-	1b"
+	: "=&r" (old_be), "=&r" (tmp_be), "=m" (*ptep)
+	: "r" (ptep), "r" (cpu_to_be64(set)), "r" (cpu_to_be64(clr))
+	: "cc" );
+
+	return be64_to_cpu(old_be);
 }
 
-
 static inline unsigned long radix__pte_update(struct mm_struct *mm,
 					unsigned long addr,
 					pte_t *ptep, unsigned long clr,
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index 0ab297c4cfad1..f84e14f23e50a 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -57,8 +57,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		 * in switch_slb(), and/or the store of paca->mm_ctx_id in
 		 * copy_mm_to_paca().
 		 *
-		 * On the read side the barrier is in pte_xchg(), which orders
-		 * the store to the PTE vs the load of mm_cpumask.
+		 * On the other side, the barrier is in mm/tlb-radix.c for
+		 * radix which orders earlier stores to clear the PTEs vs
+		 * the load of mm_cpumask. And pte_xchg which does the same
+		 * thing for hash.
 		 *
 		 * This full barrier is needed by membarrier when switching
 		 * between processes after store to rq->curr, before user-space
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 5ac3206c51ccf..cdc50398fd600 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -524,6 +524,11 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
 		return;
 
 	preempt_disable();
+	/*
+	 * Order loads of mm_cpumask vs previous stores to clear ptes before
+	 * the invalidate. See barrier in switch_mm_irqs_off
+	 */
+	smp_mb();
 	if (!mm_is_thread_local(mm)) {
 		if (mm_needs_flush_escalation(mm))
 			_tlbie_pid(pid, RIC_FLUSH_ALL);
@@ -544,6 +549,7 @@ void radix__flush_all_mm(struct mm_struct *mm)
 		return;
 
 	preempt_disable();
+	smp_mb(); /* see radix__flush_tlb_mm */
 	if (!mm_is_thread_local(mm))
 		_tlbie_pid(pid, RIC_FLUSH_ALL);
 	else
@@ -568,6 +574,7 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
 		return;
 
 	preempt_disable();
+	smp_mb(); /* see radix__flush_tlb_mm */
 	if (!mm_is_thread_local(mm))
 		_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
 	else
@@ -630,6 +637,7 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 		return;
 
 	preempt_disable();
+	smp_mb(); /* see radix__flush_tlb_mm */
 	if (mm_is_thread_local(mm)) {
 		local = true;
 		full = (end == TLB_FLUSH_ALL ||
@@ -791,6 +799,7 @@ static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
 		return;
 
 	preempt_disable();
+	smp_mb(); /* see radix__flush_tlb_mm */
 	if (mm_is_thread_local(mm)) {
 		local = true;
 		full = (end == TLB_FLUSH_ALL ||
@@ -849,7 +858,7 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
 
 	/* Otherwise first do the PWC, then iterate the pages. */
 	preempt_disable();
-
+	smp_mb(); /* see radix__flush_tlb_mm */
 	if (mm_is_thread_local(mm)) {
 		_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
 	} else {

From 0cef77c7798a7832769fbd25a4d0b0b3361cc6f0 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 1 Jun 2018 20:01:21 +1000
Subject: [PATCH 173/251] powerpc/64s/radix: flush remote CPUs out of
 single-threaded mm_cpumask

When a single-threaded process has a non-local mm_cpumask, try to use
that point to flush the TLBs out of other CPUs in the cpumask.

An IPI is used for clearing remote CPUs for a few reasons:
- An IPI can end lazy TLB use of the mm, which is required to prevent
  TLB entries being created on the remote CPU. The alternative is to
  drop lazy TLB switching completely, which costs 7.5% in a context
  switch ping-pong test betwee a process and kernel idle thread.
- An IPI can have remote CPUs flush the entire PID, but the local CPU
  can flush a specific VA. tlbie would require over-flushing of the
  local CPU (where the process is running).
- A single threaded process that is migrated to a different CPU is
  likely to have a relatively small mm_cpumask, so IPI is reasonable.

No other thread can concurrently switch to this mm, because it must
have been given a reference to mm_users by the current thread before it
can use_mm. mm_users can be asynchronously incremented (by
mm_activate or mmget_not_zero), but those users must use remote mm
access and can't use_mm or access user address space. Existing code
makes the this assumption already, for example sparc64 has reset
mm_cpumask using this condition since the start of history, see
arch/sparc/kernel/smp_64.c.

This reduces tlbies for a kernel compile workload from 0.90M to 0.12M,
tlbiels are increased significantly due to the PID flushing for the
cleaning up remote CPUs, and increased local flushes (PID flushes take
128 tlbiels vs 1 tlbie).

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/tlb.h |  13 +++
 arch/powerpc/mm/tlb-radix.c    | 148 +++++++++++++++++++++++++++------
 2 files changed, 134 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index a7eabff27a0fa..9138baccebb0a 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -76,6 +76,19 @@ static inline int mm_is_thread_local(struct mm_struct *mm)
 		return false;
 	return cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm));
 }
+static inline void mm_reset_thread_local(struct mm_struct *mm)
+{
+	WARN_ON(atomic_read(&mm->context.copros) > 0);
+	/*
+	 * It's possible for mm_access to take a reference on mm_users to
+	 * access the remote mm from another thread, but it's not allowed
+	 * to set mm_cpumask, so mm_users may be > 1 here.
+	 */
+	WARN_ON(current->mm != mm);
+	atomic_set(&mm->context.active_cpus, 1);
+	cpumask_clear(mm_cpumask(mm));
+	cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+}
 #else /* CONFIG_PPC_BOOK3S_64 */
 static inline int mm_is_thread_local(struct mm_struct *mm)
 {
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index cdc50398fd600..67a6e86d3e7ef 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -12,6 +12,8 @@
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/memblock.h>
+#include <linux/mmu_context.h>
+#include <linux/sched/mm.h>
 
 #include <asm/ppc-opcode.h>
 #include <asm/tlb.h>
@@ -504,6 +506,15 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd
 }
 EXPORT_SYMBOL(radix__local_flush_tlb_page);
 
+static bool mm_is_singlethreaded(struct mm_struct *mm)
+{
+	if (atomic_read(&mm->context.copros) > 0)
+		return false;
+	if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm)
+		return true;
+	return false;
+}
+
 static bool mm_needs_flush_escalation(struct mm_struct *mm)
 {
 	/*
@@ -511,10 +522,47 @@ static bool mm_needs_flush_escalation(struct mm_struct *mm)
 	 * caching PTEs and not flushing them properly when
 	 * RIC = 0 for a PID/LPID invalidate
 	 */
-	return atomic_read(&mm->context.copros) != 0;
+	if (atomic_read(&mm->context.copros) > 0)
+		return true;
+	return false;
 }
 
 #ifdef CONFIG_SMP
+static void do_exit_flush_lazy_tlb(void *arg)
+{
+	struct mm_struct *mm = arg;
+	unsigned long pid = mm->context.id;
+
+	if (current->mm == mm)
+		return; /* Local CPU */
+
+	if (current->active_mm == mm) {
+		/*
+		 * Must be a kernel thread because sender is single-threaded.
+		 */
+		BUG_ON(current->mm);
+		mmgrab(&init_mm);
+		switch_mm(mm, &init_mm, current);
+		current->active_mm = &init_mm;
+		mmdrop(mm);
+	}
+	_tlbiel_pid(pid, RIC_FLUSH_ALL);
+}
+
+static void exit_flush_lazy_tlbs(struct mm_struct *mm)
+{
+	/*
+	 * Would be nice if this was async so it could be run in
+	 * parallel with our local flush, but generic code does not
+	 * give a good API for it. Could extend the generic code or
+	 * make a special powerpc IPI for flushing TLBs.
+	 * For now it's not too performance critical.
+	 */
+	smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb,
+				(void *)mm, 1);
+	mm_reset_thread_local(mm);
+}
+
 void radix__flush_tlb_mm(struct mm_struct *mm)
 {
 	unsigned long pid;
@@ -530,17 +578,24 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
 	 */
 	smp_mb();
 	if (!mm_is_thread_local(mm)) {
+		if (unlikely(mm_is_singlethreaded(mm))) {
+			exit_flush_lazy_tlbs(mm);
+			goto local;
+		}
+
 		if (mm_needs_flush_escalation(mm))
 			_tlbie_pid(pid, RIC_FLUSH_ALL);
 		else
 			_tlbie_pid(pid, RIC_FLUSH_TLB);
-	} else
+	} else {
+local:
 		_tlbiel_pid(pid, RIC_FLUSH_TLB);
+	}
 	preempt_enable();
 }
 EXPORT_SYMBOL(radix__flush_tlb_mm);
 
-void radix__flush_all_mm(struct mm_struct *mm)
+static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
 {
 	unsigned long pid;
 
@@ -550,12 +605,24 @@ void radix__flush_all_mm(struct mm_struct *mm)
 
 	preempt_disable();
 	smp_mb(); /* see radix__flush_tlb_mm */
-	if (!mm_is_thread_local(mm))
+	if (!mm_is_thread_local(mm)) {
+		if (unlikely(mm_is_singlethreaded(mm))) {
+			if (!fullmm) {
+				exit_flush_lazy_tlbs(mm);
+				goto local;
+			}
+		}
 		_tlbie_pid(pid, RIC_FLUSH_ALL);
-	else
+	} else {
+local:
 		_tlbiel_pid(pid, RIC_FLUSH_ALL);
+	}
 	preempt_enable();
 }
+void radix__flush_all_mm(struct mm_struct *mm)
+{
+	__flush_all_mm(mm, false);
+}
 EXPORT_SYMBOL(radix__flush_all_mm);
 
 void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
@@ -575,10 +642,16 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
 
 	preempt_disable();
 	smp_mb(); /* see radix__flush_tlb_mm */
-	if (!mm_is_thread_local(mm))
+	if (!mm_is_thread_local(mm)) {
+		if (unlikely(mm_is_singlethreaded(mm))) {
+			exit_flush_lazy_tlbs(mm);
+			goto local;
+		}
 		_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
-	else
+	} else {
+local:
 		_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
+	}
 	preempt_enable();
 }
 
@@ -638,14 +711,21 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 
 	preempt_disable();
 	smp_mb(); /* see radix__flush_tlb_mm */
-	if (mm_is_thread_local(mm)) {
-		local = true;
-		full = (end == TLB_FLUSH_ALL ||
-				nr_pages > tlb_local_single_page_flush_ceiling);
-	} else {
+	if (!mm_is_thread_local(mm)) {
+		if (unlikely(mm_is_singlethreaded(mm))) {
+			if (end != TLB_FLUSH_ALL) {
+				exit_flush_lazy_tlbs(mm);
+				goto is_local;
+			}
+		}
 		local = false;
 		full = (end == TLB_FLUSH_ALL ||
 				nr_pages > tlb_single_page_flush_ceiling);
+	} else {
+is_local:
+		local = true;
+		full = (end == TLB_FLUSH_ALL ||
+				nr_pages > tlb_local_single_page_flush_ceiling);
 	}
 
 	if (full) {
@@ -766,7 +846,7 @@ void radix__tlb_flush(struct mmu_gather *tlb)
 	 * See the comment for radix in arch_exit_mmap().
 	 */
 	if (tlb->fullmm) {
-		radix__flush_all_mm(mm);
+		__flush_all_mm(mm, true);
 	} else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
 		if (!tlb->need_flush_all)
 			radix__flush_tlb_mm(mm);
@@ -800,24 +880,32 @@ static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
 
 	preempt_disable();
 	smp_mb(); /* see radix__flush_tlb_mm */
-	if (mm_is_thread_local(mm)) {
-		local = true;
-		full = (end == TLB_FLUSH_ALL ||
-				nr_pages > tlb_local_single_page_flush_ceiling);
-	} else {
+	if (!mm_is_thread_local(mm)) {
+		if (unlikely(mm_is_singlethreaded(mm))) {
+			if (end != TLB_FLUSH_ALL) {
+				exit_flush_lazy_tlbs(mm);
+				goto is_local;
+			}
+		}
 		local = false;
 		full = (end == TLB_FLUSH_ALL ||
 				nr_pages > tlb_single_page_flush_ceiling);
+	} else {
+is_local:
+		local = true;
+		full = (end == TLB_FLUSH_ALL ||
+				nr_pages > tlb_local_single_page_flush_ceiling);
 	}
 
 	if (full) {
-		if (!local && mm_needs_flush_escalation(mm))
-			also_pwc = true;
-
-		if (local)
+		if (local) {
 			_tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
-		else
-			_tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL: RIC_FLUSH_TLB);
+		} else {
+			if (mm_needs_flush_escalation(mm))
+				also_pwc = true;
+
+			_tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
+		}
 	} else {
 		if (local)
 			_tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
@@ -859,10 +947,16 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
 	/* Otherwise first do the PWC, then iterate the pages. */
 	preempt_disable();
 	smp_mb(); /* see radix__flush_tlb_mm */
-	if (mm_is_thread_local(mm)) {
-		_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
-	} else {
+	if (!mm_is_thread_local(mm)) {
+		if (unlikely(mm_is_singlethreaded(mm))) {
+			exit_flush_lazy_tlbs(mm);
+			goto local;
+		}
 		_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
+		goto local;
+	} else {
+local:
+		_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
 	}
 
 	preempt_enable();

From 926bc2f100c24d4842b3064b5af44ae964c1d81c Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 30 May 2018 20:31:22 +1000
Subject: [PATCH 174/251] powerpc/64s: Fix compiler store ordering to SLB
 shadow area

The stores to update the SLB shadow area must be made as they appear
in the C code, so that the hypervisor does not see an entry with
mismatched vsid and esid. Use WRITE_ONCE for this.

GCC has been observed to elide the first store to esid in the update,
which means that if the hypervisor interrupts the guest after storing
to vsid, it could see an entry with old esid and new vsid, which may
possibly result in memory corruption.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/slb.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 66577cc66dc9f..2f4b33b24b3b9 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -63,14 +63,14 @@ static inline void slb_shadow_update(unsigned long ea, int ssize,
 	 * updating it.  No write barriers are needed here, provided
 	 * we only update the current CPU's SLB shadow buffer.
 	 */
-	p->save_area[index].esid = 0;
-	p->save_area[index].vsid = cpu_to_be64(mk_vsid_data(ea, ssize, flags));
-	p->save_area[index].esid = cpu_to_be64(mk_esid_data(ea, ssize, index));
+	WRITE_ONCE(p->save_area[index].esid, 0);
+	WRITE_ONCE(p->save_area[index].vsid, cpu_to_be64(mk_vsid_data(ea, ssize, flags)));
+	WRITE_ONCE(p->save_area[index].esid, cpu_to_be64(mk_esid_data(ea, ssize, index)));
 }
 
 static inline void slb_shadow_clear(enum slb_index index)
 {
-	get_slb_shadow()->save_area[index].esid = 0;
+	WRITE_ONCE(get_slb_shadow()->save_area[index].esid, 0);
 }
 
 static inline void create_shadowed_slbe(unsigned long ea, int ssize,

From 91d06971881f71d945910de128658038513d1b24 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Wed, 30 May 2018 18:48:04 +0530
Subject: [PATCH 175/251] powerpc/mm/hash: Add missing isync prior to kernel
 stack SLB switch

Currently we do not have an isync, or any other context synchronizing
instruction prior to the slbie/slbmte in _switch() that updates the
SLB entry for the kernel stack.

However that is not correct as outlined in the ISA.

From Power ISA Version 3.0B, Book III, Chapter 11, page 1133:

  "Changing the contents of ... the contents of SLB entries ... can
   have the side effect of altering the context in which data
   addresses and instruction addresses are interpreted, and in which
   instructions are executed and data accesses are performed.
   ...
   These side effects need not occur in program order, and therefore
   may require explicit synchronization by software.
   ...
   The synchronizing instruction before the context-altering
   instruction ensures that all instructions up to and including that
   synchronizing instruction are fetched and executed in the context
   that existed before the alteration."

And page 1136:

  "For data accesses, the context synchronizing instruction before the
   slbie, slbieg, slbia, slbmte, tlbie, or tlbiel instruction ensures
   that all preceding instructions that access data storage have
   completed to a point at which they have reported all exceptions
   they will cause."

We're not aware of any bugs caused by this, but it should be fixed
regardless.

Add the missing isync when updating kernel stack SLB entry.

Cc: stable@vger.kernel.org
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
[mpe: Flesh out change log with more ISA text & explanation]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/entry_64.S | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 51695608c68b3..3d1af55e09dc5 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -596,6 +596,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	 * actually hit this code path.
 	 */
 
+	isync
 	slbie	r6
 	slbie	r6		/* Workaround POWER5 < DD2.1 issue */
 	slbmte	r7,r0

From ed515b6898c36775ddd99ff9ffeda4e83917acda Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Fri, 1 Jun 2018 13:54:24 +0530
Subject: [PATCH 176/251] powerpc/mm/hugetlb: Update hugetlb related locks

With split pmd page table lock enabled, we don't use mm->page_table_lock when
updating pmd entries. This patch update hugetlb path to use the right lock
when inserting huge page directory entries into page table.

ex: if we are using hugepd and inserting hugepd entry at the pmd level, we
use pmd_lockptr, which based on config can be split pmd lock.

For update huge page directory entries itself we use mm->page_table_lock. We
do have a helper huge_pte_lockptr() for that.

Fixes: 675d99529 ("powerpc/book3s64: Enable split pmd ptlock")
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/hugetlbpage.c | 33 +++++++++++++++++++++++----------
 arch/powerpc/mm/pgtable.c     | 12 +++++++-----
 2 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 2a4b1bf8bde6c..7c5f479c5c00f 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -52,7 +52,8 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long s
 }
 
 static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
-			   unsigned long address, unsigned pdshift, unsigned pshift)
+			   unsigned long address, unsigned int pdshift,
+			   unsigned int pshift, spinlock_t *ptl)
 {
 	struct kmem_cache *cachep;
 	pte_t *new;
@@ -82,8 +83,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
 	 */
 	smp_wmb();
 
-	spin_lock(&mm->page_table_lock);
-
+	spin_lock(ptl);
 	/*
 	 * We have multiple higher-level entries that point to the same
 	 * actual pte location.  Fill in each as we go and backtrack on error.
@@ -113,7 +113,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
 			*hpdp = __hugepd(0);
 		kmem_cache_free(cachep, new);
 	}
-	spin_unlock(&mm->page_table_lock);
+	spin_unlock(ptl);
 	return 0;
 }
 
@@ -138,6 +138,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
 	hugepd_t *hpdp = NULL;
 	unsigned pshift = __ffs(sz);
 	unsigned pdshift = PGDIR_SHIFT;
+	spinlock_t *ptl;
 
 	addr &= ~(sz-1);
 	pg = pgd_offset(mm, addr);
@@ -146,39 +147,46 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
 	if (pshift == PGDIR_SHIFT)
 		/* 16GB huge page */
 		return (pte_t *) pg;
-	else if (pshift > PUD_SHIFT)
+	else if (pshift > PUD_SHIFT) {
 		/*
 		 * We need to use hugepd table
 		 */
+		ptl = &mm->page_table_lock;
 		hpdp = (hugepd_t *)pg;
-	else {
+	} else {
 		pdshift = PUD_SHIFT;
 		pu = pud_alloc(mm, pg, addr);
 		if (pshift == PUD_SHIFT)
 			return (pte_t *)pu;
-		else if (pshift > PMD_SHIFT)
+		else if (pshift > PMD_SHIFT) {
+			ptl = pud_lockptr(mm, pu);
 			hpdp = (hugepd_t *)pu;
-		else {
+		} else {
 			pdshift = PMD_SHIFT;
 			pm = pmd_alloc(mm, pu, addr);
 			if (pshift == PMD_SHIFT)
 				/* 16MB hugepage */
 				return (pte_t *)pm;
-			else
+			else {
+				ptl = pmd_lockptr(mm, pm);
 				hpdp = (hugepd_t *)pm;
+			}
 		}
 	}
 #else
 	if (pshift >= HUGEPD_PGD_SHIFT) {
+		ptl = &mm->page_table_lock;
 		hpdp = (hugepd_t *)pg;
 	} else {
 		pdshift = PUD_SHIFT;
 		pu = pud_alloc(mm, pg, addr);
 		if (pshift >= HUGEPD_PUD_SHIFT) {
+			ptl = pud_lockptr(mm, pu);
 			hpdp = (hugepd_t *)pu;
 		} else {
 			pdshift = PMD_SHIFT;
 			pm = pmd_alloc(mm, pu, addr);
+			ptl = pmd_lockptr(mm, pm);
 			hpdp = (hugepd_t *)pm;
 		}
 	}
@@ -188,7 +196,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
 
 	BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
 
-	if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
+	if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr,
+						  pdshift, pshift, ptl))
 		return NULL;
 
 	return hugepte_offset(*hpdp, addr, pdshift);
@@ -499,6 +508,10 @@ struct page *follow_huge_pd(struct vm_area_struct *vma,
 	struct mm_struct *mm = vma->vm_mm;
 
 retry:
+	/*
+	 * hugepage directory entries are protected by mm->page_table_lock
+	 * Use this instead of huge_pte_lockptr
+	 */
 	ptl = &mm->page_table_lock;
 	spin_lock(ptl);
 
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 5281c2c064af2..d71c7777669cd 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -249,17 +249,19 @@ extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 	if (changed) {
 
 #ifdef CONFIG_PPC_BOOK3S_64
-		struct hstate *hstate = hstate_file(vma->vm_file);
-		psize = hstate_get_psize(hstate);
+		struct hstate *h = hstate_vma(vma);
+
+		psize = hstate_get_psize(h);
+#ifdef CONFIG_DEBUG_VM
+		assert_spin_locked(huge_pte_lockptr(h, vma->vm_mm, ptep));
+#endif
+
 #else
 		/*
 		 * Not used on non book3s64 platforms. But 8xx
 		 * can possibly use tsize derived from hstate.
 		 */
 		psize = 0;
-#endif
-#ifdef CONFIG_DEBUG_VM
-		assert_spin_locked(&vma->vm_mm->page_table_lock);
 #endif
 		__ptep_set_access_flags(vma, ptep, pte, addr, psize);
 	}

From a5db5060e0b2e27605df272224bfd470f644d8a5 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Fri, 1 Jun 2018 13:54:02 +0530
Subject: [PATCH 177/251] powerpc/mm/hash: hard disable irq in the SLB insert
 path

When inserting SLB entries for EA above 512TB, we need to hard disable irq.
This will make sure we don't take a PMU interrupt that can possibly touch
user space address via a stack dump. To prevent this, we need to hard disable
the interrupt.

Also add a comment explaining why we don't need context synchronizing isync
with slbmte.

Fixes: f384796c4 ("powerpc/mm: Add support for handling > 512TB address in SLB miss")
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/slb.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 2f4b33b24b3b9..cb796724a6fc7 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -352,6 +352,14 @@ static void insert_slb_entry(unsigned long vsid, unsigned long ea,
 	/*
 	 * We are irq disabled, hence should be safe to access PACA.
 	 */
+	VM_WARN_ON(!irqs_disabled());
+
+	/*
+	 * We can't take a PMU exception in the following code, so hard
+	 * disable interrupts.
+	 */
+	hard_irq_disable();
+
 	index = get_paca()->stab_rr;
 
 	/*
@@ -369,6 +377,11 @@ static void insert_slb_entry(unsigned long vsid, unsigned long ea,
 		    ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
 	esid_data = mk_esid_data(ea, ssize, index);
 
+	/*
+	 * No need for an isync before or after this slbmte. The exception
+	 * we enter with and the rfid we exit with are context synchronizing.
+	 * Also we only handle user segments here.
+	 */
 	asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)
 		     : "memory");
 

From eff06ef0891d200eb0ddd156c6e96ce3dd18edc0 Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Wed, 6 Dec 2017 12:03:52 +0100
Subject: [PATCH 178/251] powerpc/pasemi: Set PCI_SCAN_ALL_PCI_DEVS

Needed on Amiga X1000 with SB600.

Reported-by: Christian Zigotzky <chzigotzky@xenosoft.de>
Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/pasemi/pci.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c
index 5ff6108f19e98..aea9ff2c8e6da 100644
--- a/arch/powerpc/platforms/pasemi/pci.c
+++ b/arch/powerpc/platforms/pasemi/pci.c
@@ -224,6 +224,8 @@ void __init pas_pci_init(void)
 		return;
 	}
 
+	pci_set_flags(PCI_SCAN_ALL_PCIE_DEVS);
+
 	for (np = NULL; (np = of_get_next_child(root, np)) != NULL;)
 		if (np->name && !strcmp(np->name, "pxp") && !pas_add_bridge(np))
 			of_node_get(np);

From 6e8cef384a41882b2d4ec6992dd0d74246b3f353 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 23 Apr 2018 10:36:38 +0200
Subject: [PATCH 179/251] powerpc: always enable RTC_LIB

In order to use the rtc_tm_to_time64() and rtc_time64_to_tm()
helper functions in later patches, we have to ensure that
CONFIG_RTC_LIB is always built-in.

Note that this symbol only controls a couple of helper functions,
not the actual RTC subsystem, which remains optional and is
enabled with CONFIG_RTC_CLASS.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 23247fa551e72..940c955d1c9c7 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -234,6 +234,7 @@ config PPC
 	select OF_RESERVED_MEM
 	select OLD_SIGACTION			if PPC32
 	select OLD_SIGSUSPEND
+	select RTC_LIB
 	select SPARSE_IRQ
 	select SYSCTL_EXCEPTION_TRACE
 	select VIRT_TO_BUS			if !PPC64

From 2dc20f454dcf82c52ed41362ce0b3140ce8ad4be Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 23 Apr 2018 10:36:39 +0200
Subject: [PATCH 180/251] powerpc: rtas: clean up time handling

The to_tm() helper function operates on a signed integer for the time,
so it will suffer from overflow in 2038, even on 64-bit kernels.

Rather than fix that function, this replaces its use in the rtas
procfs implementation with the standard rtc_time64_to_tm() helper
that is very similar but is not affected by the overflow.

In order to actually support long times, the parser function gets
changed to 64-bit user input and output as well. Note that the tm_mon
and tm_year representation is slightly different, so we have to manually
add an offset here.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/rtas-proc.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index 11ef978e95dbf..5f13c8358aeb9 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -280,7 +280,7 @@ static int __init proc_rtas_init(void)
 
 __initcall(proc_rtas_init);
 
-static int parse_number(const char __user *p, size_t count, unsigned long *val)
+static int parse_number(const char __user *p, size_t count, u64 *val)
 {
 	char buf[40];
 	char *end;
@@ -293,7 +293,7 @@ static int parse_number(const char __user *p, size_t count, unsigned long *val)
 
 	buf[count] = 0;
 
-	*val = simple_strtoul(buf, &end, 10);
+	*val = simple_strtoull(buf, &end, 10);
 	if (*end && *end != '\n')
 		return -EINVAL;
 
@@ -307,17 +307,17 @@ static ssize_t ppc_rtas_poweron_write(struct file *file,
 		const char __user *buf, size_t count, loff_t *ppos)
 {
 	struct rtc_time tm;
-	unsigned long nowtime;
+	time64_t nowtime;
 	int error = parse_number(buf, count, &nowtime);
 	if (error)
 		return error;
 
 	power_on_time = nowtime; /* save the time */
 
-	to_tm(nowtime, &tm);
+	rtc_time64_to_tm(nowtime, &tm);
 
 	error = rtas_call(rtas_token("set-time-for-power-on"), 7, 1, NULL, 
-			tm.tm_year, tm.tm_mon, tm.tm_mday, 
+			tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
 			tm.tm_hour, tm.tm_min, tm.tm_sec, 0 /* nano */);
 	if (error)
 		printk(KERN_WARNING "error: setting poweron time returned: %s\n", 
@@ -373,14 +373,14 @@ static ssize_t ppc_rtas_clock_write(struct file *file,
 		const char __user *buf, size_t count, loff_t *ppos)
 {
 	struct rtc_time tm;
-	unsigned long nowtime;
+	time64_t nowtime;
 	int error = parse_number(buf, count, &nowtime);
 	if (error)
 		return error;
 
-	to_tm(nowtime, &tm);
+	rtc_time64_to_tm(nowtime, &tm);
 	error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL, 
-			tm.tm_year, tm.tm_mon, tm.tm_mday, 
+			tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
 			tm.tm_hour, tm.tm_min, tm.tm_sec, 0);
 	if (error)
 		printk(KERN_WARNING "error: setting the clock returned: %s\n", 
@@ -401,8 +401,8 @@ static int ppc_rtas_clock_show(struct seq_file *m, void *v)
 		unsigned int year, mon, day, hour, min, sec;
 		year = ret[0]; mon  = ret[1]; day  = ret[2];
 		hour = ret[3]; min  = ret[4]; sec  = ret[5];
-		seq_printf(m, "%lu\n",
-				mktime(year, mon, day, hour, min, sec));
+		seq_printf(m, "%lld\n",
+				mktime64(year, mon, day, hour, min, sec));
 	}
 	return 0;
 }
@@ -731,7 +731,7 @@ static void get_location_code(struct seq_file *m, struct individual_sensor *s,
 static ssize_t ppc_rtas_tone_freq_write(struct file *file,
 		const char __user *buf, size_t count, loff_t *ppos)
 {
-	unsigned long freq;
+	u64 freq;
 	int error = parse_number(buf, count, &freq);
 	if (error)
 		return error;
@@ -756,7 +756,7 @@ static int ppc_rtas_tone_freq_show(struct seq_file *m, void *v)
 static ssize_t ppc_rtas_tone_volume_write(struct file *file,
 		const char __user *buf, size_t count, loff_t *ppos)
 {
-	unsigned long volume;
+	u64 volume;
 	int error = parse_number(buf, count, &volume);
 	if (error)
 		return error;

From 5bfd643583b2e2a203163fd6b617cd9027054200 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 23 Apr 2018 10:36:40 +0200
Subject: [PATCH 181/251] powerpc: use time64_t in read_persistent_clock

Looking through the remaining users of the deprecated mktime()
function, I found the powerpc rtc handlers, which use it in
place of rtc_tm_to_time64().

To clean this up, I'm changing over the read_persistent_clock()
function to the read_persistent_clock64() variant, and change
all the platform specific handlers along with it.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/machdep.h        |  2 +-
 arch/powerpc/include/asm/opal.h           |  2 +-
 arch/powerpc/include/asm/rtas.h           |  2 +-
 arch/powerpc/kernel/rtas-rtc.c            |  4 +--
 arch/powerpc/kernel/time.c                |  7 +++--
 arch/powerpc/platforms/8xx/m8xx_setup.c   |  4 +--
 arch/powerpc/platforms/maple/maple.h      |  2 +-
 arch/powerpc/platforms/maple/time.c       |  5 ++--
 arch/powerpc/platforms/pasemi/pasemi.h    |  2 +-
 arch/powerpc/platforms/pasemi/time.c      |  4 +--
 arch/powerpc/platforms/powermac/pmac.h    |  2 +-
 arch/powerpc/platforms/powermac/time.c    | 31 ++++++++---------------
 arch/powerpc/platforms/powernv/opal-rtc.c |  5 ++--
 arch/powerpc/platforms/ps3/platform.h     |  2 +-
 arch/powerpc/platforms/ps3/time.c         |  2 +-
 15 files changed, 31 insertions(+), 45 deletions(-)

diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index ffe7c71e1132e..a47de82fb8e27 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -83,7 +83,7 @@ struct machdep_calls {
 
 	int		(*set_rtc_time)(struct rtc_time *);
 	void		(*get_rtc_time)(struct rtc_time *);
-	unsigned long	(*get_boot_time)(void);
+	time64_t	(*get_boot_time)(void);
 	unsigned char 	(*rtc_read_val)(int addr);
 	void		(*rtc_write_val)(int addr, unsigned char val);
 
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 1dbeb6cd68faa..e1b2910c6e818 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -328,7 +328,7 @@ extern int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data);
 extern int opal_get_sensor_data_u64(u32 sensor_hndl, u64 *sensor_data);
 
 struct rtc_time;
-extern unsigned long opal_get_boot_time(void);
+extern time64_t opal_get_boot_time(void);
 extern void opal_nvram_init(void);
 extern void opal_flash_update_init(void);
 extern void opal_flash_update_print_message(void);
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index ec9dd79398ee9..71e393c46a490 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -361,7 +361,7 @@ extern int rtas_offline_cpus_mask(cpumask_var_t cpus);
 extern int rtas_ibm_suspend_me(u64 handle);
 
 struct rtc_time;
-extern unsigned long rtas_get_boot_time(void);
+extern time64_t rtas_get_boot_time(void);
 extern void rtas_get_rtc_time(struct rtc_time *rtc_time);
 extern int rtas_set_rtc_time(struct rtc_time *rtc_time);
 
diff --git a/arch/powerpc/kernel/rtas-rtc.c b/arch/powerpc/kernel/rtas-rtc.c
index 49600985c7ef0..a28239b8b0c0b 100644
--- a/arch/powerpc/kernel/rtas-rtc.c
+++ b/arch/powerpc/kernel/rtas-rtc.c
@@ -13,7 +13,7 @@
 
 #define MAX_RTC_WAIT 5000	/* 5 sec */
 #define RTAS_CLOCK_BUSY (-2)
-unsigned long __init rtas_get_boot_time(void)
+time64_t __init rtas_get_boot_time(void)
 {
 	int ret[8];
 	int error;
@@ -38,7 +38,7 @@ unsigned long __init rtas_get_boot_time(void)
 		return 0;
 	}
 
-	return mktime(ret[0], ret[1], ret[2], ret[3], ret[4], ret[5]);
+	return mktime64(ret[0], ret[1], ret[2], ret[3], ret[4], ret[5]);
 }
 
 /* NOTE: get_rtc_time will get an error if executed in interrupt context
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index ed6b2abdde15f..da06a4d84ae23 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -825,7 +825,7 @@ int update_persistent_clock(struct timespec now)
 	return ppc_md.set_rtc_time(&tm);
 }
 
-static void __read_persistent_clock(struct timespec *ts)
+static void __read_persistent_clock(struct timespec64 *ts)
 {
 	struct rtc_time tm;
 	static int first = 1;
@@ -849,11 +849,10 @@ static void __read_persistent_clock(struct timespec *ts)
 	}
 	ppc_md.get_rtc_time(&tm);
 
-	ts->tv_sec = mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday,
-			    tm.tm_hour, tm.tm_min, tm.tm_sec);
+	ts->tv_sec = rtc_tm_to_time64(&tm);
 }
 
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
 {
 	__read_persistent_clock(ts);
 
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
index 2188d691a40f1..d76daa90647b6 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -192,9 +192,7 @@ void mpc8xx_get_rtc_time(struct rtc_time *tm)
 
 	/* Get time from the RTC. */
 	data = in_be32(&sys_tmr->sit_rtc);
-	to_tm(data, tm);
-	tm->tm_year -= 1900;
-	tm->tm_mon -= 1;
+	rtc_time64_to_tm(data, tm);
 	immr_unmap(sys_tmr);
 	return;
 }
diff --git a/arch/powerpc/platforms/maple/maple.h b/arch/powerpc/platforms/maple/maple.h
index d10f4af3a42e4..4f358b55c3413 100644
--- a/arch/powerpc/platforms/maple/maple.h
+++ b/arch/powerpc/platforms/maple/maple.h
@@ -6,7 +6,7 @@
  */
 extern int maple_set_rtc_time(struct rtc_time *tm);
 extern void maple_get_rtc_time(struct rtc_time *tm);
-extern unsigned long maple_get_boot_time(void);
+extern time64_t maple_get_boot_time(void);
 extern void maple_calibrate_decr(void);
 extern void maple_pci_init(void);
 extern void maple_pci_irq_fixup(struct pci_dev *dev);
diff --git a/arch/powerpc/platforms/maple/time.c b/arch/powerpc/platforms/maple/time.c
index cfddc87f81bff..becf2ebf7df57 100644
--- a/arch/powerpc/platforms/maple/time.c
+++ b/arch/powerpc/platforms/maple/time.c
@@ -137,7 +137,7 @@ static struct resource rtc_iores = {
 	.flags = IORESOURCE_IO | IORESOURCE_BUSY,
 };
 
-unsigned long __init maple_get_boot_time(void)
+time64_t __init maple_get_boot_time(void)
 {
 	struct rtc_time tm;
 	struct device_node *rtcs;
@@ -170,7 +170,6 @@ unsigned long __init maple_get_boot_time(void)
 	request_resource(&ioport_resource, &rtc_iores);
 
 	maple_get_rtc_time(&tm);
-	return mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday,
-		      tm.tm_hour, tm.tm_min, tm.tm_sec);
+	return rtc_tm_to_time64(&tm);
 }
 
diff --git a/arch/powerpc/platforms/pasemi/pasemi.h b/arch/powerpc/platforms/pasemi/pasemi.h
index 329d2a6192549..70b56048ed1be 100644
--- a/arch/powerpc/platforms/pasemi/pasemi.h
+++ b/arch/powerpc/platforms/pasemi/pasemi.h
@@ -2,7 +2,7 @@
 #ifndef _PASEMI_PASEMI_H
 #define _PASEMI_PASEMI_H
 
-extern unsigned long pas_get_boot_time(void);
+extern time64_t pas_get_boot_time(void);
 extern void pas_pci_init(void);
 extern void pas_pci_irq_fixup(struct pci_dev *dev);
 extern void pas_pci_dma_dev_setup(struct pci_dev *dev);
diff --git a/arch/powerpc/platforms/pasemi/time.c b/arch/powerpc/platforms/pasemi/time.c
index fa54351ac268d..ea815254ee7b7 100644
--- a/arch/powerpc/platforms/pasemi/time.c
+++ b/arch/powerpc/platforms/pasemi/time.c
@@ -21,8 +21,8 @@
 
 #include <asm/time.h>
 
-unsigned long __init pas_get_boot_time(void)
+time64_t __init pas_get_boot_time(void)
 {
 	/* Let's just return a fake date right now */
-	return mktime(2006, 1, 1, 12, 0, 0);
+	return mktime64(2006, 1, 1, 12, 0, 0);
 }
diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h
index 6f15b8804e9b5..16a52afdb76e8 100644
--- a/arch/powerpc/platforms/powermac/pmac.h
+++ b/arch/powerpc/platforms/powermac/pmac.h
@@ -15,7 +15,7 @@ struct rtc_time;
 extern int pmac_newworld;
 
 extern long pmac_time_init(void);
-extern unsigned long pmac_get_boot_time(void);
+extern time64_t pmac_get_boot_time(void);
 extern void pmac_get_rtc_time(struct rtc_time *);
 extern int pmac_set_rtc_time(struct rtc_time *);
 extern void pmac_read_rtc_time(void);
diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c
index 274af6fa388ef..d5d1c452038e5 100644
--- a/arch/powerpc/platforms/powermac/time.c
+++ b/arch/powerpc/platforms/powermac/time.c
@@ -84,15 +84,6 @@ long __init pmac_time_init(void)
 	return delta;
 }
 
-#if defined(CONFIG_ADB_CUDA) || defined(CONFIG_ADB_PMU)
-static void to_rtc_time(unsigned long now, struct rtc_time *tm)
-{
-	to_tm(now, tm);
-	tm->tm_year -= 1900;
-	tm->tm_mon -= 1;
-}
-#endif
-
 #if defined(CONFIG_ADB_CUDA) || defined(CONFIG_ADB_PMU) || \
     defined(CONFIG_PMAC_SMU)
 static unsigned long from_rtc_time(struct rtc_time *tm)
@@ -103,10 +94,10 @@ static unsigned long from_rtc_time(struct rtc_time *tm)
 #endif
 
 #ifdef CONFIG_ADB_CUDA
-static unsigned long cuda_get_time(void)
+static time64_t cuda_get_time(void)
 {
 	struct adb_request req;
-	unsigned int now;
+	time64_t now;
 
 	if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME) < 0)
 		return 0;
@@ -117,10 +108,10 @@ static unsigned long cuda_get_time(void)
 		       req.reply_len);
 	now = (req.reply[3] << 24) + (req.reply[4] << 16)
 		+ (req.reply[5] << 8) + req.reply[6];
-	return ((unsigned long)now) - RTC_OFFSET;
+	return now - RTC_OFFSET;
 }
 
-#define cuda_get_rtc_time(tm)	to_rtc_time(cuda_get_time(), (tm))
+#define cuda_get_rtc_time(tm)	rtc_time64_to_tm(cuda_get_time(), (tm))
 
 static int cuda_set_rtc_time(struct rtc_time *tm)
 {
@@ -147,10 +138,10 @@ static int cuda_set_rtc_time(struct rtc_time *tm)
 #endif
 
 #ifdef CONFIG_ADB_PMU
-static unsigned long pmu_get_time(void)
+static time64_t pmu_get_time(void)
 {
 	struct adb_request req;
-	unsigned int now;
+	time64_t now;
 
 	if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0)
 		return 0;
@@ -160,10 +151,10 @@ static unsigned long pmu_get_time(void)
 		       req.reply_len);
 	now = (req.reply[0] << 24) + (req.reply[1] << 16)
 		+ (req.reply[2] << 8) + req.reply[3];
-	return ((unsigned long)now) - RTC_OFFSET;
+	return now - RTC_OFFSET;
 }
 
-#define pmu_get_rtc_time(tm)	to_rtc_time(pmu_get_time(), (tm))
+#define pmu_get_rtc_time(tm)	rtc_time64_to_tm(pmu_get_time(), (tm))
 
 static int pmu_set_rtc_time(struct rtc_time *tm)
 {
@@ -188,13 +179,13 @@ static int pmu_set_rtc_time(struct rtc_time *tm)
 #endif
 
 #ifdef CONFIG_PMAC_SMU
-static unsigned long smu_get_time(void)
+static time64_t smu_get_time(void)
 {
 	struct rtc_time tm;
 
 	if (smu_get_rtc_time(&tm, 1))
 		return 0;
-	return from_rtc_time(&tm);
+	return rtc_tm_to_time64(&tm);
 }
 
 #else
@@ -204,7 +195,7 @@ static unsigned long smu_get_time(void)
 #endif
 
 /* Can't be __init, it's called when suspending and resuming */
-unsigned long pmac_get_boot_time(void)
+time64_t pmac_get_boot_time(void)
 {
 	/* Get the time from the RTC, used only at boot time */
 	switch (sys_ctrler) {
diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c
index aa2a5139462ea..42ec642a3eba5 100644
--- a/arch/powerpc/platforms/powernv/opal-rtc.c
+++ b/arch/powerpc/platforms/powernv/opal-rtc.c
@@ -34,7 +34,7 @@ static void opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm)
 	tm->tm_wday     = -1;
 }
 
-unsigned long __init opal_get_boot_time(void)
+time64_t __init opal_get_boot_time(void)
 {
 	struct rtc_time tm;
 	u32 y_m_d;
@@ -61,8 +61,7 @@ unsigned long __init opal_get_boot_time(void)
 	y_m_d = be32_to_cpu(__y_m_d);
 	h_m_s_ms = be64_to_cpu(__h_m_s_ms);
 	opal_to_tm(y_m_d, h_m_s_ms, &tm);
-	return mktime(tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
-		      tm.tm_hour, tm.tm_min, tm.tm_sec);
+	return rtc_tm_to_time64(&tm);
 }
 
 static __init int opal_time_init(void)
diff --git a/arch/powerpc/platforms/ps3/platform.h b/arch/powerpc/platforms/ps3/platform.h
index 1809cfc562ee6..9bc68f9134663 100644
--- a/arch/powerpc/platforms/ps3/platform.h
+++ b/arch/powerpc/platforms/ps3/platform.h
@@ -57,7 +57,7 @@ static inline void ps3_smp_cleanup_cpu(int cpu) { }
 /* time */
 
 void __init ps3_calibrate_decr(void);
-unsigned long __init ps3_get_boot_time(void);
+time64_t __init ps3_get_boot_time(void);
 void ps3_get_rtc_time(struct rtc_time *time);
 int ps3_set_rtc_time(struct rtc_time *time);
 
diff --git a/arch/powerpc/platforms/ps3/time.c b/arch/powerpc/platforms/ps3/time.c
index 11b45b58c81bd..9dac125c997ea 100644
--- a/arch/powerpc/platforms/ps3/time.c
+++ b/arch/powerpc/platforms/ps3/time.c
@@ -76,7 +76,7 @@ static u64 read_rtc(void)
 	return rtc_val;
 }
 
-unsigned long __init ps3_get_boot_time(void)
+time64_t __init ps3_get_boot_time(void)
 {
 	return read_rtc() + ps3_os_area_get_rtc_diff();
 }

From 5235afa89a246c9d5ab35996bc38681c474c3ed7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 23 Apr 2018 10:36:41 +0200
Subject: [PATCH 182/251] powerpc: use time64_t in update_persistent_clock

update_persistent_clock() is deprecated because it suffers from overflow
in 2038 on 32-bit architectures. This changes powerpc to use the
update_persistent_clock64() replacement, and to pass down 64-bit
timestamps consistently.

This is now simpler, as we no longer have to worry about the offset
numbers in tm_year and tm_mon that are different between the Linux
conventions and RTAS.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/time.c              |  6 ++----
 arch/powerpc/platforms/8xx/m8xx_setup.c |  7 +++----
 arch/powerpc/platforms/powermac/time.c  | 17 ++++-------------
 3 files changed, 9 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index da06a4d84ae23..b025acb32bfea 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -811,16 +811,14 @@ void __init generic_calibrate_decr(void)
 	}
 }
 
-int update_persistent_clock(struct timespec now)
+int update_persistent_clock64(struct timespec64 now)
 {
 	struct rtc_time tm;
 
 	if (!ppc_md.set_rtc_time)
 		return -ENODEV;
 
-	to_tm(now.tv_sec + 1 + timezone_offset, &tm);
-	tm.tm_year -= 1900;
-	tm.tm_mon -= 1;
+	rtc_time64_to_tm(now.tv_sec + 1 + timezone_offset, &tm);
 
 	return ppc_md.set_rtc_time(&tm);
 }
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
index d76daa90647b6..027c42d8966cf 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -169,15 +169,14 @@ int mpc8xx_set_rtc_time(struct rtc_time *tm)
 {
 	sitk8xx_t __iomem *sys_tmr1;
 	sit8xx_t __iomem *sys_tmr2;
-	int time;
+	time64_t time;
 
 	sys_tmr1 = immr_map(im_sitk);
 	sys_tmr2 = immr_map(im_sit);
-	time = mktime(tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday,
-	              tm->tm_hour, tm->tm_min, tm->tm_sec);
+	time = rtc_tm_to_time64(tm);
 
 	out_be32(&sys_tmr1->sitk_rtck, KAPWR_KEY);
-	out_be32(&sys_tmr2->sit_rtc, time);
+	out_be32(&sys_tmr2->sit_rtc, (u32)time);
 	out_be32(&sys_tmr1->sitk_rtck, ~KAPWR_KEY);
 
 	immr_unmap(sys_tmr2);
diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c
index d5d1c452038e5..7c968e46736fa 100644
--- a/arch/powerpc/platforms/powermac/time.c
+++ b/arch/powerpc/platforms/powermac/time.c
@@ -84,15 +84,6 @@ long __init pmac_time_init(void)
 	return delta;
 }
 
-#if defined(CONFIG_ADB_CUDA) || defined(CONFIG_ADB_PMU) || \
-    defined(CONFIG_PMAC_SMU)
-static unsigned long from_rtc_time(struct rtc_time *tm)
-{
-	return mktime(tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday,
-		      tm->tm_hour, tm->tm_min, tm->tm_sec);
-}
-#endif
-
 #ifdef CONFIG_ADB_CUDA
 static time64_t cuda_get_time(void)
 {
@@ -115,10 +106,10 @@ static time64_t cuda_get_time(void)
 
 static int cuda_set_rtc_time(struct rtc_time *tm)
 {
-	unsigned int nowtime;
+	time64_t nowtime;
 	struct adb_request req;
 
-	nowtime = from_rtc_time(tm) + RTC_OFFSET;
+	nowtime = rtc_tm_to_time64(tm) + RTC_OFFSET;
 	if (cuda_request(&req, NULL, 6, CUDA_PACKET, CUDA_SET_TIME,
 			 nowtime >> 24, nowtime >> 16, nowtime >> 8,
 			 nowtime) < 0)
@@ -158,10 +149,10 @@ static time64_t pmu_get_time(void)
 
 static int pmu_set_rtc_time(struct rtc_time *tm)
 {
-	unsigned int nowtime;
+	time64_t nowtime;
 	struct adb_request req;
 
-	nowtime = from_rtc_time(tm) + RTC_OFFSET;
+	nowtime = rtc_tm_to_time64(tm) + RTC_OFFSET;
 	if (pmu_request(&req, NULL, 5, PMU_SET_RTC, nowtime >> 24,
 			nowtime >> 16, nowtime >> 8, nowtime) < 0)
 		return -ENXIO;

From 34efabe418953002d1c8e0ab28634929a9ddf433 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 23 Apr 2018 10:36:42 +0200
Subject: [PATCH 183/251] powerpc: remove unused to_tm() helper

to_tm() is now completely unused, the only reference being in the
_dump_time() helper that is also unused. This removes both, leaving
the rest of the powerpc RTC code y2038 safe to as far as the hardware
supports.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/time.h   |  2 --
 arch/powerpc/kernel/time.c        | 50 -------------------------------
 arch/powerpc/platforms/ps3/time.c | 24 ---------------
 3 files changed, 76 deletions(-)

diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 69b89f9412526..b80d492ceb296 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -26,8 +26,6 @@ extern unsigned long tb_ticks_per_usec;
 extern unsigned long tb_ticks_per_sec;
 extern struct clock_event_device decrementer_clockevent;
 
-struct rtc_time;
-extern void to_tm(int tim, struct rtc_time * tm);
 
 extern void generic_calibrate_decr(void);
 extern void hdec_interrupt(struct pt_regs *regs);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index b025acb32bfea..2530cf60b8392 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -1159,56 +1159,6 @@ void __init time_init(void)
 #endif
 }
 
-
-#define FEBRUARY	2
-#define	STARTOFTIME	1970
-#define SECDAY		86400L
-#define SECYR		(SECDAY * 365)
-#define	leapyear(year)		((year) % 4 == 0 && \
-				 ((year) % 100 != 0 || (year) % 400 == 0))
-#define	days_in_year(a) 	(leapyear(a) ? 366 : 365)
-#define	days_in_month(a) 	(month_days[(a) - 1])
-
-static int month_days[12] = {
-	31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
-};
-
-void to_tm(int tim, struct rtc_time * tm)
-{
-	register int    i;
-	register long   hms, day;
-
-	day = tim / SECDAY;
-	hms = tim % SECDAY;
-
-	/* Hours, minutes, seconds are easy */
-	tm->tm_hour = hms / 3600;
-	tm->tm_min = (hms % 3600) / 60;
-	tm->tm_sec = (hms % 3600) % 60;
-
-	/* Number of years in days */
-	for (i = STARTOFTIME; day >= days_in_year(i); i++)
-		day -= days_in_year(i);
-	tm->tm_year = i;
-
-	/* Number of months in days left */
-	if (leapyear(tm->tm_year))
-		days_in_month(FEBRUARY) = 29;
-	for (i = 1; day >= days_in_month(i); i++)
-		day -= days_in_month(i);
-	days_in_month(FEBRUARY) = 28;
-	tm->tm_mon = i;
-
-	/* Days are what is left over (+1) from all that. */
-	tm->tm_mday = day + 1;
-
-	/*
-	 * No-one uses the day of the week.
-	 */
-	tm->tm_wday = -1;
-}
-EXPORT_SYMBOL(to_tm);
-
 /*
  * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit
  * result.
diff --git a/arch/powerpc/platforms/ps3/time.c b/arch/powerpc/platforms/ps3/time.c
index 9dac125c997ea..08ca76e23d096 100644
--- a/arch/powerpc/platforms/ps3/time.c
+++ b/arch/powerpc/platforms/ps3/time.c
@@ -28,30 +28,6 @@
 
 #include "platform.h"
 
-#define dump_tm(_a) _dump_tm(_a, __func__, __LINE__)
-static void _dump_tm(const struct rtc_time *tm, const char* func, int line)
-{
-	pr_debug("%s:%d tm_sec  %d\n", func, line, tm->tm_sec);
-	pr_debug("%s:%d tm_min  %d\n", func, line, tm->tm_min);
-	pr_debug("%s:%d tm_hour %d\n", func, line, tm->tm_hour);
-	pr_debug("%s:%d tm_mday %d\n", func, line, tm->tm_mday);
-	pr_debug("%s:%d tm_mon  %d\n", func, line, tm->tm_mon);
-	pr_debug("%s:%d tm_year %d\n", func, line, tm->tm_year);
-	pr_debug("%s:%d tm_wday %d\n", func, line, tm->tm_wday);
-}
-
-#define dump_time(_a) _dump_time(_a, __func__, __LINE__)
-static void __maybe_unused _dump_time(int time, const char *func,
-	int line)
-{
-	struct rtc_time tm;
-
-	to_tm(time, &tm);
-
-	pr_debug("%s:%d time    %d\n", func, line, time);
-	_dump_tm(&tm, func, line);
-}
-
 void __init ps3_calibrate_decr(void)
 {
 	int result;

From 447808bf500a7cc92173266a59f8a494e132b122 Mon Sep 17 00:00:00 2001
From: Stewart Smith <stewart@linux.ibm.com>
Date: Thu, 29 Mar 2018 17:02:46 +1100
Subject: [PATCH 184/251] hvc_opal: don't set tb_ticks_per_usec in
 udbg_init_opal_common()

time_init() will set up tb_ticks_per_usec based on reality.
time_init() is called *after* udbg_init_opal_common() during boot.

from arch/powerpc/kernel/time.c:
  unsigned long tb_ticks_per_usec = 100; /* sane default */

Currently, all powernv systems have a timebase frequency of 512mhz
(512000000/1000000 == 0x200) - although there's nothing written
down anywhere that I can find saying that we couldn't make that
different based on the requirements in the ISA.

So, we've been (accidentally) thwacking the (currently) correct
(for powernv at least) value for tb_ticks_per_usec earlier than
we otherwise would have.

The "sane default" seems to be adequate for our purposes between
udbg_init_opal_common() and time_init() being called, and if it isn't,
then we should probably be setting it somewhere that isn't hvc_opal.c!

Signed-off-by: Stewart Smith <stewart@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/tty/hvc/hvc_opal.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c
index 2ed07ca6389e5..9645c0062a900 100644
--- a/drivers/tty/hvc/hvc_opal.c
+++ b/drivers/tty/hvc/hvc_opal.c
@@ -318,7 +318,6 @@ static void udbg_init_opal_common(void)
 	udbg_putc = udbg_opal_putc;
 	udbg_getc = udbg_opal_getc;
 	udbg_getc_poll = udbg_opal_getc_poll;
-	tb_ticks_per_usec = 0x200; /* Make udelay not suck */
 }
 
 void __init hvc_opal_init_early(void)

From 8a792262f320245de0174e6bcb551312f2e2debe Mon Sep 17 00:00:00 2001
From: Russell Currey <ruscur@russell.cc>
Date: Fri, 11 May 2018 18:03:13 +1000
Subject: [PATCH 185/251] powerpc/xive: Remove (almost) unused macros

The GETFIELD and SETFIELD macros in xive-regs.h aren't used except for
a single instance of GETFIELD, so replace that and remove them.

These macros are also defined in vas.h, so either those should be
eventually replaced or the macros moved into bitops.h.

Signed-off-by: Russell Currey <ruscur@russell.cc>
[mpe: Rewrite the assignment to 'he' to avoid ffs() etc.]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/xive-regs.h | 6 ------
 arch/powerpc/sysdev/xive/native.c    | 2 +-
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h
index fa4288822b681..6de989f8defd4 100644
--- a/arch/powerpc/include/asm/xive-regs.h
+++ b/arch/powerpc/include/asm/xive-regs.h
@@ -123,10 +123,4 @@
 #define TM_QW3_NSR_I		PPC_BIT8(2)
 #define TM_QW3_NSR_GRP_LVL	PPC_BIT8(3,7)
 
-/* Utilities to manipulate these (originaly from OPAL) */
-#define MASK_TO_LSH(m)		(__builtin_ffsl(m) - 1)
-#define GETFIELD(m, v)		(((v) & (m)) >> MASK_TO_LSH(m))
-#define SETFIELD(m, v, val)				\
-	(((v) & ~(m)) |	((((typeof(v))(val)) << MASK_TO_LSH(m)) & (m)))
-
 #endif /* _ASM_POWERPC_XIVE_REGS_H */
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index b48454be5b982..83bcd72b21cf7 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -341,7 +341,7 @@ static void xive_native_update_pending(struct xive_cpu *xc)
 	 * of the hypervisor interrupt (if any)
 	 */
 	cppr = ack & 0xff;
-	he = GETFIELD(TM_QW3_NSR_HE, (ack >> 8));
+	he = (ack >> 8) >> 6;
 	switch(he) {
 	case TM_QW3_NSR_HE_NONE: /* Nothing to see here */
 		break;

From c5cbde2df3951c59dc099dd2e452b534cabd0eb8 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Sat, 5 May 2018 00:01:25 -0300
Subject: [PATCH 186/251] powerpc: cpm_gpio: Remove owner assignment from
 platform_driver

Structure platform_driver does not need to set the owner field, as this
will be populated by the driver core.

Generated by scripts/coccinelle/api/platform_no_drv_owner.cocci.

Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/sysdev/cpm_gpio.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/sysdev/cpm_gpio.c b/arch/powerpc/sysdev/cpm_gpio.c
index 0badc90be6668..0695d26bd301d 100644
--- a/arch/powerpc/sysdev/cpm_gpio.c
+++ b/arch/powerpc/sysdev/cpm_gpio.c
@@ -63,7 +63,6 @@ static struct platform_driver cpm_gpio_driver = {
 	.probe		= cpm_gpio_probe,
 	.driver		= {
 		.name	= "cpm-gpio",
-		.owner	= THIS_MODULE,
 		.of_match_table	= cpm_gpio_match,
 	},
 };

From 589b1f7e4b0db4c31cef3b55f7514857bfc4b093 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 25 Apr 2018 19:27:07 +0800
Subject: [PATCH 187/251] powerpc/xics: Add missing of_node_put() in error path

The device node obtained with of_find_compatible_node() should be
released by calling of_node_put().  But it was not released when
of_get_property() failed.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
[mpe: Invert the sense of the if so we only need one return path]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/sysdev/xics/xics-common.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index 77e864d5506d3..f87470319d71e 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -446,10 +446,11 @@ static void __init xics_get_server_size(void)
 	np = of_find_compatible_node(NULL, NULL, "ibm,ppc-xics");
 	if (!np)
 		return;
+
 	isize = of_get_property(np, "ibm,interrupt-server#-size", NULL);
-	if (!isize)
-		return;
-	xics_interrupt_server_size = be32_to_cpu(*isize);
+	if (isize)
+		xics_interrupt_server_size = be32_to_cpu(*isize);
+
 	of_node_put(np);
 }
 

From cb094fa5af7c9623084aa4c3cf529b196f5c3b5c Mon Sep 17 00:00:00 2001
From: Anju T Sudhakar <anju@linux.vnet.ibm.com>
Date: Tue, 22 May 2018 14:42:34 +0530
Subject: [PATCH 188/251] powerpc/perf: Rearrange memory freeing in imc init

When any of the IMC (In-Memory Collection counter) devices fail
to initialize, imc_common_mem_free() frees set of memory. In doing so,
pmu_ptr pointer is also freed. But pmu_ptr pointer is used in subsequent
function (imc_common_cpuhp_mem_free()) which is wrong. Patch here reorders
the code to avoid such access.

Also free the memory which is dynamically allocated during imc
initialization, wherever required.

Signed-off-by: Anju T Sudhakar <anju@linux.vnet.ibm.com>
Reviewed-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/imc-pmu.c               | 32 ++++++++++++-----------
 arch/powerpc/platforms/powernv/opal-imc.c | 13 ++++++---
 2 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 75fb23c24ee8d..c1665fff3a483 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -1153,7 +1153,7 @@ static void cleanup_all_core_imc_memory(void)
 	/* mem_info will never be NULL */
 	for (i = 0; i < nr_cores; i++) {
 		if (ptr[i].vbase)
-			free_pages((u64)ptr->vbase, get_order(size));
+			free_pages((u64)ptr[i].vbase, get_order(size));
 	}
 
 	kfree(ptr);
@@ -1191,7 +1191,6 @@ static void imc_common_mem_free(struct imc_pmu *pmu_ptr)
 	if (pmu_ptr->attr_groups[IMC_EVENT_ATTR])
 		kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
 	kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
-	kfree(pmu_ptr);
 }
 
 /*
@@ -1208,6 +1207,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
 			cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
 			kfree(nest_imc_refc);
 			kfree(per_nest_pmu_arr);
+			per_nest_pmu_arr = NULL;
 		}
 
 		if (nest_pmus > 0)
@@ -1319,10 +1319,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
 	int ret;
 
 	ret = imc_mem_init(pmu_ptr, parent, pmu_idx);
-	if (ret) {
-		imc_common_mem_free(pmu_ptr);
-		return ret;
-	}
+	if (ret)
+		goto err_free_mem;
 
 	switch (pmu_ptr->domain) {
 	case IMC_DOMAIN_NEST:
@@ -1337,7 +1335,9 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
 			ret = init_nest_pmu_ref();
 			if (ret) {
 				mutex_unlock(&nest_init_lock);
-				goto err_free;
+				kfree(per_nest_pmu_arr);
+				per_nest_pmu_arr = NULL;
+				goto err_free_mem;
 			}
 			/* Register for cpu hotplug notification. */
 			ret = nest_pmu_cpumask_init();
@@ -1345,7 +1345,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
 				mutex_unlock(&nest_init_lock);
 				kfree(nest_imc_refc);
 				kfree(per_nest_pmu_arr);
-				goto err_free;
+				per_nest_pmu_arr = NULL;
+				goto err_free_mem;
 			}
 		}
 		nest_pmus++;
@@ -1355,7 +1356,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
 		ret = core_imc_pmu_cpumask_init();
 		if (ret) {
 			cleanup_all_core_imc_memory();
-			return ret;
+			goto err_free_mem;
 		}
 
 		break;
@@ -1363,7 +1364,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
 		ret = thread_imc_cpu_init();
 		if (ret) {
 			cleanup_all_thread_imc_memory();
-			return ret;
+			goto err_free_mem;
 		}
 
 		break;
@@ -1373,23 +1374,24 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
 
 	ret = update_events_in_group(parent, pmu_ptr);
 	if (ret)
-		goto err_free;
+		goto err_free_cpuhp_mem;
 
 	ret = update_pmu_ops(pmu_ptr);
 	if (ret)
-		goto err_free;
+		goto err_free_cpuhp_mem;
 
 	ret = perf_pmu_register(&pmu_ptr->pmu, pmu_ptr->pmu.name, -1);
 	if (ret)
-		goto err_free;
+		goto err_free_cpuhp_mem;
 
 	pr_info("%s performance monitor hardware support registered\n",
 							pmu_ptr->pmu.name);
 
 	return 0;
 
-err_free:
-	imc_common_mem_free(pmu_ptr);
+err_free_cpuhp_mem:
 	imc_common_cpuhp_mem_free(pmu_ptr);
+err_free_mem:
+	imc_common_mem_free(pmu_ptr);
 	return ret;
 }
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 2a14fda5ea260..490bb721879fb 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -115,8 +115,10 @@ static int imc_get_mem_addr_nest(struct device_node *node,
 		return -ENOMEM;
 
 	chipid_arr = kcalloc(nr_chips, sizeof(*chipid_arr), GFP_KERNEL);
-	if (!chipid_arr)
+	if (!chipid_arr) {
+		kfree(base_addr_arr);
 		return -ENOMEM;
+	}
 
 	if (of_property_read_u32_array(node, "chip-id", chipid_arr, nr_chips))
 		goto error;
@@ -143,7 +145,6 @@ static int imc_get_mem_addr_nest(struct device_node *node,
 	return 0;
 
 error:
-	kfree(pmu_ptr->mem_info);
 	kfree(base_addr_arr);
 	kfree(chipid_arr);
 	return -1;
@@ -183,8 +184,14 @@ static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain)
 
 	/* Function to register IMC pmu */
 	ret = init_imc_pmu(parent, pmu_ptr, pmu_index);
-	if (ret)
+	if (ret) {
 		pr_err("IMC PMU %s Register failed\n", pmu_ptr->pmu.name);
+		kfree(pmu_ptr->pmu.name);
+		if (pmu_ptr->domain == IMC_DOMAIN_NEST)
+			kfree(pmu_ptr->mem_info);
+		kfree(pmu_ptr);
+		return ret;
+	}
 
 	return 0;
 

From b41bb28b9ea22e7c22a95456c085e5b943c3fe8d Mon Sep 17 00:00:00 2001
From: Anju T Sudhakar <anju@linux.vnet.ibm.com>
Date: Tue, 22 May 2018 14:42:35 +0530
Subject: [PATCH 189/251] powerpc/perf: Replace the direct return with goto
 statement

Replace the direct return statement in imc_mem_init() with goto, to adhere
to the kernel coding style.

Signed-off-by: Anju T Sudhakar <anju@linux.vnet.ibm.com>
Reviewed-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/imc-pmu.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index c1665fff3a483..61000d174dbdf 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -1236,7 +1236,7 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
 								int pmu_index)
 {
 	const char *s;
-	int nr_cores, cpu, res;
+	int nr_cores, cpu, res = -ENOMEM;
 
 	if (of_property_read_string(parent, "name", &s))
 		return -ENODEV;
@@ -1246,7 +1246,7 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
 		/* Update the pmu name */
 		pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s_imc", "nest_", s);
 		if (!pmu_ptr->pmu.name)
-			return -ENOMEM;
+			goto err;
 
 		/* Needed for hotplug/migration */
 		if (!per_nest_pmu_arr) {
@@ -1254,7 +1254,7 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
 						sizeof(struct imc_pmu *),
 						GFP_KERNEL);
 			if (!per_nest_pmu_arr)
-				return -ENOMEM;
+				goto err;
 		}
 		per_nest_pmu_arr[pmu_index] = pmu_ptr;
 		break;
@@ -1262,21 +1262,21 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
 		/* Update the pmu name */
 		pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
 		if (!pmu_ptr->pmu.name)
-			return -ENOMEM;
+			goto err;
 
 		nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
 		pmu_ptr->mem_info = kcalloc(nr_cores, sizeof(struct imc_mem_info),
 								GFP_KERNEL);
 
 		if (!pmu_ptr->mem_info)
-			return -ENOMEM;
+			goto err;
 
 		core_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref),
 								GFP_KERNEL);
 
 		if (!core_imc_refc) {
 			kfree(pmu_ptr->mem_info);
-			return -ENOMEM;
+			goto err;
 		}
 
 		core_imc_pmu = pmu_ptr;
@@ -1285,14 +1285,14 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
 		/* Update the pmu name */
 		pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
 		if (!pmu_ptr->pmu.name)
-			return -ENOMEM;
+			goto err;
 
 		thread_imc_mem_size = pmu_ptr->counter_mem_size;
 		for_each_online_cpu(cpu) {
 			res = thread_imc_mem_alloc(cpu, pmu_ptr->counter_mem_size);
 			if (res) {
 				cleanup_all_thread_imc_memory();
-				return res;
+				goto err;
 			}
 		}
 
@@ -1302,6 +1302,8 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
 	}
 
 	return 0;
+err:
+	return res;
 }
 
 /*

From e7a8ac433845aa86f39bf404ae4b7709f21c7d70 Mon Sep 17 00:00:00 2001
From: Anju T Sudhakar <anju@linux.vnet.ibm.com>
Date: Tue, 22 May 2018 14:42:36 +0530
Subject: [PATCH 190/251] powerpc/perf: Return appropriate value for unknown
 domain

Return proper error code for unknown domain during IMC initialization.

Signed-off-by: Anju T Sudhakar <anju@linux.vnet.ibm.com>
Reviewed-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/imc-pmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 61000d174dbdf..f563d30292ade 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -1371,7 +1371,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
 
 		break;
 	default:
-		return  -1;	/* Unknown domain */
+		return  -EINVAL;	/* Unknown domain */
 	}
 
 	ret = update_events_in_group(parent, pmu_ptr);

From 25af86b2aeb60dd52bd18a875124490e23bd4515 Mon Sep 17 00:00:00 2001
From: Anju T Sudhakar <anju@linux.vnet.ibm.com>
Date: Tue, 22 May 2018 14:42:37 +0530
Subject: [PATCH 191/251] powerpc/perf: Unregister thread-imc if core-imc not
 supported

Since thread-imc internally use the core-imc hardware infrastructure
and is depended on it, having thread-imc in the kernel in the
absence of core-imc is trivial. Patch disables thread-imc, if
core-imc is not registered.

Signed-off-by: Anju T Sudhakar <anju@linux.vnet.ibm.com>
Reviewed-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/imc-pmu.h        |  1 +
 arch/powerpc/perf/imc-pmu.c               | 12 ++++++++++++
 arch/powerpc/platforms/powernv/opal-imc.c |  9 +++++++++
 3 files changed, 22 insertions(+)

diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
index d76cb11be3e3f..69f516ecb2fde 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -128,4 +128,5 @@ extern int init_imc_pmu(struct device_node *parent,
 				struct imc_pmu *pmu_ptr, int pmu_id);
 extern void thread_imc_disable(void);
 extern int get_max_nest_dev(void);
+extern void unregister_thread_imc(void);
 #endif /* __ASM_POWERPC_IMC_PMU_H */
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index f563d30292ade..d1977b61f8271 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -40,6 +40,7 @@ static struct imc_pmu *core_imc_pmu;
 /* Thread IMC data structures and variables */
 
 static DEFINE_PER_CPU(u64 *, thread_imc_mem);
+static struct imc_pmu *thread_imc_pmu;
 static int thread_imc_mem_size;
 
 struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
@@ -1228,6 +1229,16 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
 	}
 }
 
+/*
+ * Function to unregister thread-imc if core-imc
+ * is not registered.
+ */
+void unregister_thread_imc(void)
+{
+	imc_common_cpuhp_mem_free(thread_imc_pmu);
+	imc_common_mem_free(thread_imc_pmu);
+	perf_pmu_unregister(&thread_imc_pmu->pmu);
+}
 
 /*
  * imc_mem_init : Function to support memory allocation for core imc.
@@ -1296,6 +1307,7 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
 			}
 		}
 
+		thread_imc_pmu = pmu_ptr;
 		break;
 	default:
 		return -EINVAL;
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 490bb721879fb..58a07948c76e7 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -255,6 +255,7 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
 {
 	struct device_node *imc_dev = pdev->dev.of_node;
 	int pmu_count = 0, domain;
+	bool core_imc_reg = false, thread_imc_reg = false;
 	u32 type;
 
 	/*
@@ -292,6 +293,10 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
 		if (!imc_pmu_create(imc_dev, pmu_count, domain)) {
 			if (domain == IMC_DOMAIN_NEST)
 				pmu_count++;
+			if (domain == IMC_DOMAIN_CORE)
+				core_imc_reg = true;
+			if (domain == IMC_DOMAIN_THREAD)
+				thread_imc_reg = true;
 		}
 	}
 
@@ -299,6 +304,10 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
 	if (pmu_count == 0)
 		debugfs_remove_recursive(imc_debugfs_parent);
 
+	/* If core imc is not registered, unregister thread-imc */
+	if (!core_imc_reg && thread_imc_reg)
+		unregister_thread_imc();
+
 	return 0;
 }
 

From 796b9f5b317a46d1b744f661c38a62b1280a6ab7 Mon Sep 17 00:00:00 2001
From: Sam Bobroff <sbobroff@linux.ibm.com>
Date: Fri, 25 May 2018 13:11:28 +1000
Subject: [PATCH 192/251] powerpc/eeh: Add final message for successful
 recovery

Add a single log line at the end of successful EEH recovery, so that
it's clear that event processing has finished.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/eeh_driver.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index b8a329f048141..07e0a42035ceb 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -778,14 +778,13 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 	eeh_pe_update_time_stamp(pe);
 	pe->freeze_count++;
 	if (pe->freeze_count > eeh_max_freezes) {
-		pr_err("EEH: PHB#%x-PE#%x has failed %d times in the\n"
-		       "last hour and has been permanently disabled.\n",
+		pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n",
 		       pe->phb->global_number, pe->addr,
 		       pe->freeze_count);
 		goto hard_fail;
 	}
-	pr_warn("EEH: This PCI device has failed %d times in the last hour\n",
-		pe->freeze_count);
+	pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
+		pe->freeze_count, eeh_max_freezes);
 
 	/* Walk the various device drivers attached to this slot through
 	 * a reset sequence, giving each an opportunity to do what it needs
@@ -911,6 +910,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 	pr_info("EEH: Notify device driver to resume\n");
 	eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
 
+	pr_info("EEH: Recovery successful.\n");
 	goto final;
 
 hard_fail:

From 46d4be41b987a6b2d25a2ebdd94cafb44e21d6c5 Mon Sep 17 00:00:00 2001
From: Sam Bobroff <sbobroff@linux.ibm.com>
Date: Fri, 25 May 2018 13:11:30 +1000
Subject: [PATCH 193/251] powerpc/eeh: Fix use-after-release of EEH driver

Correct two cases where eeh_pcid_get() is used to reference the driver's
module but the reference is dropped before the driver pointer is used.

In eeh_rmv_device() also refactor a little so that only two calls to
eeh_pcid_put() are needed, rather than three and the reference isn't
taken at all if it wasn't needed.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/eeh_driver.c | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 07e0a42035ceb..54333f6c9d67d 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -458,9 +458,11 @@ static void *eeh_add_virt_device(void *data, void *userdata)
 
 	driver = eeh_pcid_get(dev);
 	if (driver) {
-		eeh_pcid_put(dev);
-		if (driver->err_handler)
+		if (driver->err_handler) {
+			eeh_pcid_put(dev);
 			return NULL;
+		}
+		eeh_pcid_put(dev);
 	}
 
 #ifdef CONFIG_PCI_IOV
@@ -497,17 +499,19 @@ static void *eeh_rmv_device(void *data, void *userdata)
 	if (eeh_dev_removed(edev))
 		return NULL;
 
-	driver = eeh_pcid_get(dev);
-	if (driver) {
-		eeh_pcid_put(dev);
-		if (removed &&
-		    eeh_pe_passed(edev->pe))
-			return NULL;
-		if (removed &&
-		    driver->err_handler &&
-		    driver->err_handler->error_detected &&
-		    driver->err_handler->slot_reset)
+	if (removed) {
+		if (eeh_pe_passed(edev->pe))
 			return NULL;
+		driver = eeh_pcid_get(dev);
+		if (driver) {
+			if (driver->err_handler &&
+			    driver->err_handler->error_detected &&
+			    driver->err_handler->slot_reset) {
+				eeh_pcid_put(dev);
+				return NULL;
+			}
+			eeh_pcid_put(dev);
+		}
 	}
 
 	/* Remove it from PCI subsystem */

From a0bd54641be9df3a84f693035fbc2c31f7ca644e Mon Sep 17 00:00:00 2001
From: Sam Bobroff <sbobroff@linux.ibm.com>
Date: Fri, 25 May 2018 13:11:31 +1000
Subject: [PATCH 194/251] powerpc/eeh: Remove unused eeh_pcid_name()

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/eeh_driver.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 54333f6c9d67d..ca9a73fe9cc54 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -39,20 +39,6 @@ struct eeh_rmv_data {
 	int removed;
 };
 
-/**
- * eeh_pcid_name - Retrieve name of PCI device driver
- * @pdev: PCI device
- *
- * This routine is used to retrieve the name of PCI device driver
- * if that's valid.
- */
-static inline const char *eeh_pcid_name(struct pci_dev *pdev)
-{
-	if (pdev && pdev->dev.driver)
-		return pdev->dev.driver->name;
-	return "";
-}
-
 /**
  * eeh_pcid_get - Get the PCI device driver
  * @pdev: PCI device

From d6c4932fbf2421bfd7f8fe75baa4ccef4a845f79 Mon Sep 17 00:00:00 2001
From: Sam Bobroff <sbobroff@linux.ibm.com>
Date: Fri, 25 May 2018 13:11:32 +1000
Subject: [PATCH 195/251] powerpc/eeh: Strengthen types of eeh traversal
 functions

The traversal functions eeh_pe_traverse() and eeh_pe_dev_traverse()
both provide their first argument as void * but every single user casts
it to the expected type.

Change the type of the first parameter from void * to the appropriate
type, and clean up all uses.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/eeh.h   |  7 ++++---
 arch/powerpc/kernel/eeh.c        | 13 +++++--------
 arch/powerpc/kernel/eeh_driver.c | 30 ++++++++++--------------------
 arch/powerpc/kernel/eeh_pe.c     | 19 +++++++------------
 4 files changed, 26 insertions(+), 43 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index c2266ca61853c..f02e0400e6f2e 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -262,7 +262,8 @@ static inline bool eeh_state_active(int state)
 	== (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
 }
 
-typedef void *(*eeh_traverse_func)(void *data, void *flag);
+typedef void *(*eeh_edev_traverse_func)(struct eeh_dev *edev, void *flag);
+typedef void *(*eeh_pe_traverse_func)(struct eeh_pe *pe, void *flag);
 void eeh_set_pe_aux_size(int size);
 int eeh_phb_pe_create(struct pci_controller *phb);
 struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
@@ -272,9 +273,9 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev);
 int eeh_rmv_from_parent_pe(struct eeh_dev *edev);
 void eeh_pe_update_time_stamp(struct eeh_pe *pe);
 void *eeh_pe_traverse(struct eeh_pe *root,
-		eeh_traverse_func fn, void *flag);
+		      eeh_pe_traverse_func fn, void *flag);
 void *eeh_pe_dev_traverse(struct eeh_pe *root,
-		eeh_traverse_func fn, void *flag);
+			  eeh_edev_traverse_func fn, void *flag);
 void eeh_pe_restore_bars(struct eeh_pe *pe);
 const char *eeh_pe_loc_get(struct eeh_pe *pe);
 struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index bc640e4c5ca5a..f82dade4fb9a3 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -263,9 +263,8 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
 	return n;
 }
 
-static void *eeh_dump_pe_log(void *data, void *flag)
+static void *eeh_dump_pe_log(struct eeh_pe *pe, void *flag)
 {
-	struct eeh_pe *pe = data;
 	struct eeh_dev *edev, *tmp;
 	size_t *plen = flag;
 
@@ -686,9 +685,9 @@ int eeh_pci_enable(struct eeh_pe *pe, int function)
 	return rc;
 }
 
-static void *eeh_disable_and_save_dev_state(void *data, void *userdata)
+static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev,
+					    void *userdata)
 {
-	struct eeh_dev *edev = data;
 	struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
 	struct pci_dev *dev = userdata;
 
@@ -714,9 +713,8 @@ static void *eeh_disable_and_save_dev_state(void *data, void *userdata)
 	return NULL;
 }
 
-static void *eeh_restore_dev_state(void *data, void *userdata)
+static void *eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
 {
-	struct eeh_dev *edev = data;
 	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 	struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
 	struct pci_dev *dev = userdata;
@@ -856,11 +854,10 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
  * the indicated device and its children so that the bunch of the
  * devices could be reset properly.
  */
-static void *eeh_set_dev_freset(void *data, void *flag)
+static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
 {
 	struct pci_dev *dev;
 	unsigned int *freset = (unsigned int *)flag;
-	struct eeh_dev *edev = (struct eeh_dev *)data;
 
 	dev = eeh_dev_to_pci_dev(edev);
 	if (dev)
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index ca9a73fe9cc54..188d15c4fe3a7 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -149,9 +149,8 @@ static bool eeh_dev_removed(struct eeh_dev *edev)
 	return false;
 }
 
-static void *eeh_dev_save_state(void *data, void *userdata)
+static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
 {
-	struct eeh_dev *edev = data;
 	struct pci_dev *pdev;
 
 	if (!edev)
@@ -184,9 +183,8 @@ static void *eeh_dev_save_state(void *data, void *userdata)
  * merge the device driver responses. Cumulative response
  * passed back in "userdata".
  */
-static void *eeh_report_error(void *data, void *userdata)
+static void *eeh_report_error(struct eeh_dev *edev, void *userdata)
 {
-	struct eeh_dev *edev = (struct eeh_dev *)data;
 	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 	enum pci_ers_result rc, *res = userdata;
 	struct pci_driver *driver;
@@ -231,9 +229,8 @@ static void *eeh_report_error(void *data, void *userdata)
  * are now enabled. Collects up and merges the device driver responses.
  * Cumulative response passed back in "userdata".
  */
-static void *eeh_report_mmio_enabled(void *data, void *userdata)
+static void *eeh_report_mmio_enabled(struct eeh_dev *edev, void *userdata)
 {
-	struct eeh_dev *edev = (struct eeh_dev *)data;
 	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 	enum pci_ers_result rc, *res = userdata;
 	struct pci_driver *driver;
@@ -273,9 +270,8 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
  * some actions, usually to save data the driver needs so that the
  * driver can work again while the device is recovered.
  */
-static void *eeh_report_reset(void *data, void *userdata)
+static void *eeh_report_reset(struct eeh_dev *edev, void *userdata)
 {
-	struct eeh_dev *edev = (struct eeh_dev *)data;
 	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 	enum pci_ers_result rc, *res = userdata;
 	struct pci_driver *driver;
@@ -310,9 +306,8 @@ static void *eeh_report_reset(void *data, void *userdata)
 	return NULL;
 }
 
-static void *eeh_dev_restore_state(void *data, void *userdata)
+static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
 {
-	struct eeh_dev *edev = data;
 	struct pci_dev *pdev;
 
 	if (!edev)
@@ -348,9 +343,8 @@ static void *eeh_dev_restore_state(void *data, void *userdata)
  * could resume so that the device driver can do some initialization
  * to make the recovered device work again.
  */
-static void *eeh_report_resume(void *data, void *userdata)
+static void *eeh_report_resume(struct eeh_dev *edev, void *userdata)
 {
-	struct eeh_dev *edev = (struct eeh_dev *)data;
 	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 	bool was_in_error;
 	struct pci_driver *driver;
@@ -397,9 +391,8 @@ static void *eeh_report_resume(void *data, void *userdata)
  * This informs the device driver that the device is permanently
  * dead, and that no further recovery attempts will be made on it.
  */
-static void *eeh_report_failure(void *data, void *userdata)
+static void *eeh_report_failure(struct eeh_dev *edev, void *userdata)
 {
-	struct eeh_dev *edev = (struct eeh_dev *)data;
 	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 	struct pci_driver *driver;
 
@@ -457,10 +450,9 @@ static void *eeh_add_virt_device(void *data, void *userdata)
 	return NULL;
 }
 
-static void *eeh_rmv_device(void *data, void *userdata)
+static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
 {
 	struct pci_driver *driver;
-	struct eeh_dev *edev = (struct eeh_dev *)data;
 	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 	struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata;
 	int *removed = rmv_data ? &rmv_data->removed : NULL;
@@ -532,9 +524,8 @@ static void *eeh_rmv_device(void *data, void *userdata)
 	return NULL;
 }
 
-static void *eeh_pe_detach_dev(void *data, void *userdata)
+static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata)
 {
-	struct eeh_pe *pe = (struct eeh_pe *)data;
 	struct eeh_dev *edev, *tmp;
 
 	eeh_pe_for_each_dev(pe, edev, tmp) {
@@ -555,9 +546,8 @@ static void *eeh_pe_detach_dev(void *data, void *userdata)
  * PE reset (for 3 times), we try to clear the frozen state
  * for 3 times as well.
  */
-static void *__eeh_clear_pe_frozen_state(void *data, void *flag)
+static void *__eeh_clear_pe_frozen_state(struct eeh_pe *pe, void *flag)
 {
-	struct eeh_pe *pe = (struct eeh_pe *)data;
 	bool clear_sw_state = *(bool *)flag;
 	int i, rc = 1;
 
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index ee5a67d57aab8..38a4bcd8ed13e 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -173,7 +173,7 @@ static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe,
  * to be traversed.
  */
 void *eeh_pe_traverse(struct eeh_pe *root,
-		      eeh_traverse_func fn, void *flag)
+		      eeh_pe_traverse_func fn, void *flag)
 {
 	struct eeh_pe *pe;
 	void *ret;
@@ -196,7 +196,7 @@ void *eeh_pe_traverse(struct eeh_pe *root,
  * PE and its child PEs.
  */
 void *eeh_pe_dev_traverse(struct eeh_pe *root,
-		eeh_traverse_func fn, void *flag)
+			  eeh_edev_traverse_func fn, void *flag)
 {
 	struct eeh_pe *pe;
 	struct eeh_dev *edev, *tmp;
@@ -235,9 +235,8 @@ struct eeh_pe_get_flag {
 	int config_addr;
 };
 
-static void *__eeh_pe_get(void *data, void *flag)
+static void *__eeh_pe_get(struct eeh_pe *pe, void *flag)
 {
-	struct eeh_pe *pe = (struct eeh_pe *)data;
 	struct eeh_pe_get_flag *tmp = (struct eeh_pe_get_flag *) flag;
 
 	/* Unexpected PHB PE */
@@ -551,9 +550,8 @@ void eeh_pe_update_time_stamp(struct eeh_pe *pe)
  * PE. Also, the associated PCI devices will be put into IO frozen
  * state as well.
  */
-static void *__eeh_pe_state_mark(void *data, void *flag)
+static void *__eeh_pe_state_mark(struct eeh_pe *pe, void *flag)
 {
-	struct eeh_pe *pe = (struct eeh_pe *)data;
 	int state = *((int *)flag);
 	struct eeh_dev *edev, *tmp;
 	struct pci_dev *pdev;
@@ -595,9 +593,8 @@ void eeh_pe_state_mark(struct eeh_pe *pe, int state)
 }
 EXPORT_SYMBOL_GPL(eeh_pe_state_mark);
 
-static void *__eeh_pe_dev_mode_mark(void *data, void *flag)
+static void *__eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag)
 {
-	struct eeh_dev *edev = data;
 	int mode = *((int *)flag);
 
 	edev->mode |= mode;
@@ -625,9 +622,8 @@ void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode)
  * given PE. Besides, we also clear the check count of the PE
  * as well.
  */
-static void *__eeh_pe_state_clear(void *data, void *flag)
+static void *__eeh_pe_state_clear(struct eeh_pe *pe, void *flag)
 {
-	struct eeh_pe *pe = (struct eeh_pe *)data;
 	int state = *((int *)flag);
 	struct eeh_dev *edev, *tmp;
 	struct pci_dev *pdev;
@@ -858,9 +854,8 @@ static void eeh_restore_device_bars(struct eeh_dev *edev)
  * the expansion ROM base address, the latency timer, and etc.
  * from the saved values in the device node.
  */
-static void *eeh_restore_one_device_bars(void *data, void *flag)
+static void *eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
 {
-	struct eeh_dev *edev = (struct eeh_dev *)data;
 	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 
 	/* Do special restore for bridges */

From 2eae39f29b10038601ddc36dae346cd79c96faa1 Mon Sep 17 00:00:00 2001
From: Sam Bobroff <sbobroff@linux.ibm.com>
Date: Fri, 25 May 2018 13:11:33 +1000
Subject: [PATCH 196/251] powerpc/eeh: Add message when PE processing at parent

To aid debugging, add a message to show when EEH processing for a PE
will be done at the device's parent, rather than directly at the
device.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/eeh.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index f82dade4fb9a3..1139821a9aec7 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -541,8 +541,12 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
 
 		/* Frozen parent PE ? */
 		ret = eeh_ops->get_state(parent_pe, NULL);
-		if (ret > 0 && !eeh_state_active(ret))
+		if (ret > 0 && !eeh_state_active(ret)) {
 			pe = parent_pe;
+			pr_err("EEH: Failure of PHB#%x-PE#%x will be handled at parent PHB#%x-PE#%x.\n",
+			       pe->phb->global_number, pe->addr,
+			       pe->phb->global_number, parent_pe->addr);
+		}
 
 		/* Next parent level */
 		parent_pe = parent_pe->parent;

From 30424e386a30d1160a0fdf47beafe8b116d0a8f7 Mon Sep 17 00:00:00 2001
From: Sam Bobroff <sbobroff@linux.ibm.com>
Date: Fri, 25 May 2018 13:11:34 +1000
Subject: [PATCH 197/251] powerpc/eeh: Clean up pci_ers_result handling

As EEH event handling progresses, a cumulative result of type
pci_ers_result is built up by (some of) the eeh_report_*() functions
using either:
	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
or:
	if ((*res == PCI_ERS_RESULT_NONE) ||
	    (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc;
	if (*res == PCI_ERS_RESULT_DISCONNECT &&
	    rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
(Where *res is the accumulator.)

However, the intent is not immediately clear and the result in some
situations is order dependent.

Address this by assigning a priority to each result value, and always
merging to the highest priority. This renders the intent clear, and
provides a stable value for all orderings.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
[mpe: Minor formatting (clang-format)]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/eeh_driver.c | 42 ++++++++++++++++++++++++--------
 1 file changed, 32 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 188d15c4fe3a7..ea51c909f8c99 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -39,6 +39,35 @@ struct eeh_rmv_data {
 	int removed;
 };
 
+static int eeh_result_priority(enum pci_ers_result result)
+{
+	switch (result) {
+	case PCI_ERS_RESULT_NONE:
+		return 1;
+	case PCI_ERS_RESULT_NO_AER_DRIVER:
+		return 2;
+	case PCI_ERS_RESULT_RECOVERED:
+		return 3;
+	case PCI_ERS_RESULT_CAN_RECOVER:
+		return 4;
+	case PCI_ERS_RESULT_DISCONNECT:
+		return 5;
+	case PCI_ERS_RESULT_NEED_RESET:
+		return 6;
+	default:
+		WARN_ONCE(1, "Unknown pci_ers_result value: %d\n", (int)result);
+		return 0;
+	}
+};
+
+static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old,
+						enum pci_ers_result new)
+{
+	if (eeh_result_priority(new) > eeh_result_priority(old))
+		return new;
+	return old;
+}
+
 /**
  * eeh_pcid_get - Get the PCI device driver
  * @pdev: PCI device
@@ -206,9 +235,7 @@ static void *eeh_report_error(struct eeh_dev *edev, void *userdata)
 
 	rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
 
-	/* A driver that needs a reset trumps all others */
-	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
-	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+	*res = pci_ers_merge_result(*res, rc);
 
 	edev->in_error = true;
 	pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
@@ -249,9 +276,7 @@ static void *eeh_report_mmio_enabled(struct eeh_dev *edev, void *userdata)
 
 	rc = driver->err_handler->mmio_enabled(dev);
 
-	/* A driver that needs a reset trumps all others */
-	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
-	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+	*res = pci_ers_merge_result(*res, rc);
 
 out:
 	eeh_pcid_put(dev);
@@ -294,10 +319,7 @@ static void *eeh_report_reset(struct eeh_dev *edev, void *userdata)
 		goto out;
 
 	rc = driver->err_handler->slot_reset(dev);
-	if ((*res == PCI_ERS_RESULT_NONE) ||
-	    (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc;
-	if (*res == PCI_ERS_RESULT_DISCONNECT &&
-	     rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
+	*res = pci_ers_merge_result(*res, rc);
 
 out:
 	eeh_pcid_put(dev);

From 309ed3a7157a50edeeddfe49bd527a7347f76237 Mon Sep 17 00:00:00 2001
From: Sam Bobroff <sbobroff@linux.ibm.com>
Date: Fri, 25 May 2018 13:11:35 +1000
Subject: [PATCH 198/251] powerpc/eeh: Introduce eeh_for_each_pe()

Add a for_each-style macro for iterating through PEs without the
boilerplate required by a traversal function. eeh_pe_next() is now
exported, as it is now used directly in place.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/eeh.h | 4 ++++
 arch/powerpc/kernel/eeh_pe.c   | 7 +++----
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index f02e0400e6f2e..677102baf3cdb 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -106,6 +106,9 @@ struct eeh_pe {
 #define eeh_pe_for_each_dev(pe, edev, tmp) \
 		list_for_each_entry_safe(edev, tmp, &pe->edevs, list)
 
+#define eeh_for_each_pe(root, pe) \
+	for (pe = root; pe; pe = eeh_pe_next(pe, root))
+
 static inline bool eeh_pe_passed(struct eeh_pe *pe)
 {
 	return pe ? !!atomic_read(&pe->pass_dev_cnt) : false;
@@ -267,6 +270,7 @@ typedef void *(*eeh_pe_traverse_func)(struct eeh_pe *pe, void *flag);
 void eeh_set_pe_aux_size(int size);
 int eeh_phb_pe_create(struct pci_controller *phb);
 struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
+struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root);
 struct eeh_pe *eeh_pe_get(struct pci_controller *phb,
 			  int pe_no, int config_addr);
 int eeh_add_to_parent_pe(struct eeh_dev *edev);
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 38a4bcd8ed13e..1b238ecc553e2 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -142,8 +142,7 @@ struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb)
  * The function is used to retrieve the next PE in the
  * hierarchy PE tree.
  */
-static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe,
-				  struct eeh_pe *root)
+struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root)
 {
 	struct list_head *next = pe->child_list.next;
 
@@ -178,7 +177,7 @@ void *eeh_pe_traverse(struct eeh_pe *root,
 	struct eeh_pe *pe;
 	void *ret;
 
-	for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
+	eeh_for_each_pe(root, pe) {
 		ret = fn(pe, flag);
 		if (ret) return ret;
 	}
@@ -209,7 +208,7 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root,
 	}
 
 	/* Traverse root PE */
-	for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
+	eeh_for_each_pe(root, pe) {
 		eeh_pe_for_each_dev(pe, edev, tmp) {
 			ret = fn(edev, flag);
 			if (ret)

From e2b810d51b2b36d41e3e5522e12cc752e0d865ec Mon Sep 17 00:00:00 2001
From: Sam Bobroff <sbobroff@linux.ibm.com>
Date: Fri, 25 May 2018 13:11:36 +1000
Subject: [PATCH 199/251] powerpc/eeh: Introduce eeh_edev_actionable()

The same test is done in every EEH report function, so factor it out.

Since eeh_dev_removed() needs to be moved higher up in the file,
simplify it a little while we're at it.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/eeh_driver.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index ea51c909f8c99..127f2bb41e389 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -68,6 +68,17 @@ static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old,
 	return old;
 }
 
+static bool eeh_dev_removed(struct eeh_dev *edev)
+{
+	return !edev || (edev->mode & EEH_DEV_REMOVED);
+}
+
+static bool eeh_edev_actionable(struct eeh_dev *edev)
+{
+	return (edev->pdev && !eeh_dev_removed(edev) &&
+		!eeh_pe_passed(edev->pe));
+}
+
 /**
  * eeh_pcid_get - Get the PCI device driver
  * @pdev: PCI device
@@ -169,15 +180,6 @@ static void eeh_enable_irq(struct pci_dev *dev)
 	}
 }
 
-static bool eeh_dev_removed(struct eeh_dev *edev)
-{
-	/* EEH device removed ? */
-	if (!edev || (edev->mode & EEH_DEV_REMOVED))
-		return true;
-
-	return false;
-}
-
 static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
 {
 	struct pci_dev *pdev;
@@ -218,7 +220,7 @@ static void *eeh_report_error(struct eeh_dev *edev, void *userdata)
 	enum pci_ers_result rc, *res = userdata;
 	struct pci_driver *driver;
 
-	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
+	if (!eeh_edev_actionable(edev))
 		return NULL;
 
 	device_lock(&dev->dev);
@@ -262,7 +264,7 @@ static void *eeh_report_mmio_enabled(struct eeh_dev *edev, void *userdata)
 	enum pci_ers_result rc, *res = userdata;
 	struct pci_driver *driver;
 
-	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
+	if (!eeh_edev_actionable(edev))
 		return NULL;
 
 	device_lock(&dev->dev);
@@ -301,7 +303,7 @@ static void *eeh_report_reset(struct eeh_dev *edev, void *userdata)
 	enum pci_ers_result rc, *res = userdata;
 	struct pci_driver *driver;
 
-	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
+	if (!eeh_edev_actionable(edev))
 		return NULL;
 
 	device_lock(&dev->dev);
@@ -371,7 +373,7 @@ static void *eeh_report_resume(struct eeh_dev *edev, void *userdata)
 	bool was_in_error;
 	struct pci_driver *driver;
 
-	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
+	if (!eeh_edev_actionable(edev))
 		return NULL;
 
 	device_lock(&dev->dev);
@@ -418,7 +420,7 @@ static void *eeh_report_failure(struct eeh_dev *edev, void *userdata)
 	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 	struct pci_driver *driver;
 
-	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
+	if (!eeh_edev_actionable(edev))
 		return NULL;
 
 	device_lock(&dev->dev);

From 47cc8c1cc2f2d9889e84d59cbbe8cb1cc6e24ed1 Mon Sep 17 00:00:00 2001
From: Sam Bobroff <sbobroff@linux.ibm.com>
Date: Fri, 25 May 2018 13:11:37 +1000
Subject: [PATCH 200/251] powerpc/eeh: Introduce eeh_set_channel_state()

To ease future refactoring, extract setting of the channel state
from the report functions out into their own functions. This increases
the amount of code that is identical across all of the report
functions.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/eeh_driver.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 127f2bb41e389..52b5acdab0f3f 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -205,6 +205,17 @@ static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
 	return NULL;
 }
 
+static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s)
+{
+	struct eeh_pe *pe;
+	struct eeh_dev *edev, *tmp;
+
+	eeh_for_each_pe(root, pe)
+		eeh_pe_for_each_dev(pe, edev, tmp)
+			if (eeh_edev_actionable(edev))
+				edev->pdev->error_state = s;
+}
+
 /**
  * eeh_report_error - Report pci error to each device driver
  * @data: eeh device
@@ -224,7 +235,6 @@ static void *eeh_report_error(struct eeh_dev *edev, void *userdata)
 		return NULL;
 
 	device_lock(&dev->dev);
-	dev->error_state = pci_channel_io_frozen;
 
 	driver = eeh_pcid_get(dev);
 	if (!driver) goto out_no_dev;
@@ -307,7 +317,6 @@ static void *eeh_report_reset(struct eeh_dev *edev, void *userdata)
 		return NULL;
 
 	device_lock(&dev->dev);
-	dev->error_state = pci_channel_io_normal;
 
 	driver = eeh_pcid_get(dev);
 	if (!driver) goto out_no_dev;
@@ -377,7 +386,6 @@ static void *eeh_report_resume(struct eeh_dev *edev, void *userdata)
 		return NULL;
 
 	device_lock(&dev->dev);
-	dev->error_state = pci_channel_io_normal;
 
 	driver = eeh_pcid_get(dev);
 	if (!driver) goto out_no_dev;
@@ -801,6 +809,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 	 * hotplug for this case.
 	 */
 	pr_info("EEH: Notify device drivers to shutdown\n");
+	eeh_set_channel_state(pe, pci_channel_io_frozen);
 	eeh_pe_dev_traverse(pe, eeh_report_error, &result);
 	if ((pe->type & EEH_PE_PHB) &&
 	    result != PCI_ERS_RESULT_NONE &&
@@ -891,6 +900,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 		pr_info("EEH: Notify device drivers "
 			"the completion of reset\n");
 		result = PCI_ERS_RESULT_NONE;
+		eeh_set_channel_state(pe, pci_channel_io_normal);
 		eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
 	}
 
@@ -912,6 +922,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 
 	/* Tell all device drivers that they can resume operations */
 	pr_info("EEH: Notify device driver to resume\n");
+	eeh_set_channel_state(pe, pci_channel_io_normal);
 	eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
 
 	pr_info("EEH: Recovery successful.\n");
@@ -930,6 +941,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 	eeh_slot_error_detail(pe, EEH_LOG_PERM);
 
 	/* Notify all devices that they're about to go down. */
+	eeh_set_channel_state(pe, pci_channel_io_perm_failure);
 	eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
 
 	/* Mark the PE to be removed permanently */
@@ -1039,6 +1051,7 @@ void eeh_handle_special_event(void)
 
 				/* Notify all devices to be down */
 				eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+				eeh_set_channel_state(pe, pci_channel_io_perm_failure);
 				eeh_pe_dev_traverse(pe,
 					eeh_report_failure, NULL);
 				bus = eeh_pe_bus_get(phb_pe);

From 010acfa1a76679174a0d8732965d76ae8a8531f7 Mon Sep 17 00:00:00 2001
From: Sam Bobroff <sbobroff@linux.ibm.com>
Date: Fri, 25 May 2018 13:11:38 +1000
Subject: [PATCH 201/251] powerpc/eeh: Introduce eeh_set_irq_state()

To ease future refactoring, extract calls to eeh_enable_irq() and
eeh_disable_irq() from the various report functions. This makes
the report functions initial sequences more similar, as well as making
the IRQ changes visible when reading eeh_handle_normal_event().

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/eeh_driver.c | 52 +++++++++++++++++++++-----------
 1 file changed, 34 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 52b5acdab0f3f..7b0670b03a971 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -124,22 +124,20 @@ static inline void eeh_pcid_put(struct pci_dev *pdev)
  * do real work because EEH should freeze DMA transfers for those PCI
  * devices encountering EEH errors, which includes MSI or MSI-X.
  */
-static void eeh_disable_irq(struct pci_dev *dev)
+static void eeh_disable_irq(struct eeh_dev *edev)
 {
-	struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
-
 	/* Don't disable MSI and MSI-X interrupts. They are
 	 * effectively disabled by the DMA Stopped state
 	 * when an EEH error occurs.
 	 */
-	if (dev->msi_enabled || dev->msix_enabled)
+	if (edev->pdev->msi_enabled || edev->pdev->msix_enabled)
 		return;
 
-	if (!irq_has_action(dev->irq))
+	if (!irq_has_action(edev->pdev->irq))
 		return;
 
 	edev->mode |= EEH_DEV_IRQ_DISABLED;
-	disable_irq_nosync(dev->irq);
+	disable_irq_nosync(edev->pdev->irq);
 }
 
 /**
@@ -149,10 +147,8 @@ static void eeh_disable_irq(struct pci_dev *dev)
  * This routine must be called to enable interrupt while failed
  * device could be resumed.
  */
-static void eeh_enable_irq(struct pci_dev *dev)
+static void eeh_enable_irq(struct eeh_dev *edev)
 {
-	struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
-
 	if ((edev->mode) & EEH_DEV_IRQ_DISABLED) {
 		edev->mode &= ~EEH_DEV_IRQ_DISABLED;
 		/*
@@ -175,8 +171,8 @@ static void eeh_enable_irq(struct pci_dev *dev)
 		 *
 		 *	tglx
 		 */
-		if (irqd_irq_disabled(irq_get_irq_data(dev->irq)))
-			enable_irq(dev->irq);
+		if (irqd_irq_disabled(irq_get_irq_data(edev->pdev->irq)))
+			enable_irq(edev->pdev->irq);
 	}
 }
 
@@ -216,6 +212,29 @@ static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s)
 				edev->pdev->error_state = s;
 }
 
+static void eeh_set_irq_state(struct eeh_pe *root, bool enable)
+{
+	struct eeh_pe *pe;
+	struct eeh_dev *edev, *tmp;
+
+	eeh_for_each_pe(root, pe) {
+		eeh_pe_for_each_dev(pe, edev, tmp) {
+			if (!eeh_edev_actionable(edev))
+				continue;
+
+			if (!eeh_pcid_get(edev->pdev))
+				continue;
+
+			if (enable)
+				eeh_enable_irq(edev);
+			else
+				eeh_disable_irq(edev);
+
+			eeh_pcid_put(edev->pdev);
+		}
+	}
+}
+
 /**
  * eeh_report_error - Report pci error to each device driver
  * @data: eeh device
@@ -239,8 +258,6 @@ static void *eeh_report_error(struct eeh_dev *edev, void *userdata)
 	driver = eeh_pcid_get(dev);
 	if (!driver) goto out_no_dev;
 
-	eeh_disable_irq(dev);
-
 	if (!driver->err_handler ||
 	    !driver->err_handler->error_detected)
 		goto out;
@@ -321,8 +338,6 @@ static void *eeh_report_reset(struct eeh_dev *edev, void *userdata)
 	driver = eeh_pcid_get(dev);
 	if (!driver) goto out_no_dev;
 
-	eeh_enable_irq(dev);
-
 	if (!driver->err_handler ||
 	    !driver->err_handler->slot_reset ||
 	    (edev->mode & EEH_DEV_NO_HANDLER) ||
@@ -392,7 +407,6 @@ static void *eeh_report_resume(struct eeh_dev *edev, void *userdata)
 
 	was_in_error = edev->in_error;
 	edev->in_error = false;
-	eeh_enable_irq(dev);
 
 	if (!driver->err_handler ||
 	    !driver->err_handler->resume ||
@@ -437,8 +451,6 @@ static void *eeh_report_failure(struct eeh_dev *edev, void *userdata)
 	driver = eeh_pcid_get(dev);
 	if (!driver) goto out_no_dev;
 
-	eeh_disable_irq(dev);
-
 	if (!driver->err_handler ||
 	    !driver->err_handler->error_detected)
 		goto out;
@@ -810,6 +822,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 	 */
 	pr_info("EEH: Notify device drivers to shutdown\n");
 	eeh_set_channel_state(pe, pci_channel_io_frozen);
+	eeh_set_irq_state(pe, false);
 	eeh_pe_dev_traverse(pe, eeh_report_error, &result);
 	if ((pe->type & EEH_PE_PHB) &&
 	    result != PCI_ERS_RESULT_NONE &&
@@ -901,6 +914,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 			"the completion of reset\n");
 		result = PCI_ERS_RESULT_NONE;
 		eeh_set_channel_state(pe, pci_channel_io_normal);
+		eeh_set_irq_state(pe, true);
 		eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
 	}
 
@@ -923,6 +937,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 	/* Tell all device drivers that they can resume operations */
 	pr_info("EEH: Notify device driver to resume\n");
 	eeh_set_channel_state(pe, pci_channel_io_normal);
+	eeh_set_irq_state(pe, true);
 	eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
 
 	pr_info("EEH: Recovery successful.\n");
@@ -942,6 +957,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 
 	/* Notify all devices that they're about to go down. */
 	eeh_set_channel_state(pe, pci_channel_io_perm_failure);
+	eeh_set_irq_state(pe, false);
 	eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
 
 	/* Mark the PE to be removed permanently */

From 665012c5734b0f2123dfb4b2bdd44c3344647b9a Mon Sep 17 00:00:00 2001
From: Sam Bobroff <sbobroff@linux.ibm.com>
Date: Fri, 25 May 2018 13:11:39 +1000
Subject: [PATCH 202/251] powerpc/eeh: Cleaner handling of EEH_DEV_NO_HANDLER

If a device without a driver is recovered via EEH, the flag
EEH_DEV_NO_HANDLER is incorrectly left set on the device after
recovery, because the test in eeh_report_resume() for the existence of
a bound driver is done before the flag is cleared. If a driver is
later bound, and EEH experienced again, some of the drivers EEH
handers are not called.

To correct this, clear the flag unconditionally after EEH processing
is complete.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/eeh_driver.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 7b0670b03a971..e18802d966547 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -411,7 +411,6 @@ static void *eeh_report_resume(struct eeh_dev *edev, void *userdata)
 	if (!driver->err_handler ||
 	    !driver->err_handler->resume ||
 	    (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) {
-		edev->mode &= ~EEH_DEV_NO_HANDLER;
 		goto out;
 	}
 
@@ -786,6 +785,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 {
 	struct pci_bus *bus;
 	struct eeh_dev *edev, *tmp;
+	struct eeh_pe *tmp_pe;
 	int rc = 0;
 	enum pci_ers_result result = PCI_ERS_RESULT_NONE;
 	struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0};
@@ -940,6 +940,10 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 	eeh_set_irq_state(pe, true);
 	eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
 
+	eeh_for_each_pe(pe, tmp_pe)
+		eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+			edev->mode &= ~EEH_DEV_NO_HANDLER;
+
 	pr_info("EEH: Recovery successful.\n");
 	goto final;
 

From 20b344971433da7bcd19265e5dc00a4d0df5e77e Mon Sep 17 00:00:00 2001
From: Sam Bobroff <sbobroff@linux.ibm.com>
Date: Fri, 25 May 2018 13:11:40 +1000
Subject: [PATCH 203/251] powerpc/eeh: Refactor report functions

The EEH report functions now share a fair bit of code around the start
and end of each function.

So factor out as much as possible, and move the traversal into a
custom function. This also allows accurate debug to be generated more
easily.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
[mpe: Format with clang-format]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/eeh_driver.c | 307 ++++++++++++++++---------------
 1 file changed, 157 insertions(+), 150 deletions(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index e18802d966547..67619b4b3f96c 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -60,6 +60,44 @@ static int eeh_result_priority(enum pci_ers_result result)
 	}
 };
 
+const char *pci_ers_result_name(enum pci_ers_result result)
+{
+	switch (result) {
+	case PCI_ERS_RESULT_NONE:
+		return "none";
+	case PCI_ERS_RESULT_CAN_RECOVER:
+		return "can recover";
+	case PCI_ERS_RESULT_NEED_RESET:
+		return "need reset";
+	case PCI_ERS_RESULT_DISCONNECT:
+		return "disconnect";
+	case PCI_ERS_RESULT_RECOVERED:
+		return "recovered";
+	case PCI_ERS_RESULT_NO_AER_DRIVER:
+		return "no AER driver";
+	default:
+		WARN_ONCE(1, "Unknown result type: %d\n", (int)result);
+		return "unknown";
+	}
+};
+
+static __printf(2, 3) void eeh_edev_info(const struct eeh_dev *edev,
+					 const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	printk(KERN_INFO "EEH: PE#%x (PCI %s): %pV\n", edev->pe_config_addr,
+	       edev->pdev ? dev_name(&edev->pdev->dev) : "none", &vaf);
+
+	va_end(args);
+}
+
 static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old,
 						enum pci_ers_result new)
 {
@@ -235,123 +273,117 @@ static void eeh_set_irq_state(struct eeh_pe *root, bool enable)
 	}
 }
 
-/**
- * eeh_report_error - Report pci error to each device driver
- * @data: eeh device
- * @userdata: return value
- *
- * Report an EEH error to each device driver, collect up and
- * merge the device driver responses. Cumulative response
- * passed back in "userdata".
- */
-static void *eeh_report_error(struct eeh_dev *edev, void *userdata)
+typedef enum pci_ers_result (*eeh_report_fn)(struct eeh_dev *,
+					     struct pci_driver *);
+static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
+			       enum pci_ers_result *result)
 {
-	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
-	enum pci_ers_result rc, *res = userdata;
 	struct pci_driver *driver;
+	enum pci_ers_result new_result;
+
+	device_lock(&edev->pdev->dev);
+	if (eeh_edev_actionable(edev)) {
+		driver = eeh_pcid_get(edev->pdev);
+
+		if (!driver)
+			eeh_edev_info(edev, "no driver");
+		else if (!driver->err_handler)
+			eeh_edev_info(edev, "driver not EEH aware");
+		else if (edev->mode & EEH_DEV_NO_HANDLER)
+			eeh_edev_info(edev, "driver bound too late");
+		else {
+			new_result = fn(edev, driver);
+			eeh_edev_info(edev, "%s driver reports: '%s'",
+				      driver->name,
+				      pci_ers_result_name(new_result));
+			if (result)
+				*result = pci_ers_merge_result(*result,
+							       new_result);
+		}
+		if (driver)
+			eeh_pcid_put(edev->pdev);
+	} else {
+		eeh_edev_info(edev, "not actionable (%d,%d,%d)", !!edev->pdev,
+			      !eeh_dev_removed(edev), !eeh_pe_passed(edev->pe));
+	}
+	device_unlock(&edev->pdev->dev);
+}
 
-	if (!eeh_edev_actionable(edev))
-		return NULL;
+static void eeh_pe_report(const char *name, struct eeh_pe *root,
+			  eeh_report_fn fn, enum pci_ers_result *result)
+{
+	struct eeh_pe *pe;
+	struct eeh_dev *edev, *tmp;
 
-	device_lock(&dev->dev);
+	pr_info("EEH: Beginning: '%s'\n", name);
+	eeh_for_each_pe(root, pe) eeh_pe_for_each_dev(pe, edev, tmp)
+		eeh_pe_report_edev(edev, fn, result);
+	if (result)
+		pr_info("EEH: Finished:'%s' with aggregate recovery state:'%s'\n",
+			name, pci_ers_result_name(*result));
+	else
+		pr_info("EEH: Finished:'%s'", name);
+}
 
-	driver = eeh_pcid_get(dev);
-	if (!driver) goto out_no_dev;
+/**
+ * eeh_report_error - Report pci error to each device driver
+ * @edev: eeh device
+ * @driver: device's PCI driver
+ *
+ * Report an EEH error to each device driver.
+ */
+static enum pci_ers_result eeh_report_error(struct eeh_dev *edev,
+					    struct pci_driver *driver)
+{
+	enum pci_ers_result rc;
+	struct pci_dev *dev = edev->pdev;
 
-	if (!driver->err_handler ||
-	    !driver->err_handler->error_detected)
-		goto out;
+	if (!driver->err_handler->error_detected)
+		return PCI_ERS_RESULT_NONE;
 
+	eeh_edev_info(edev, "Invoking %s->error_detected(IO frozen)",
+		      driver->name);
 	rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
 
-	*res = pci_ers_merge_result(*res, rc);
-
 	edev->in_error = true;
 	pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
-
-out:
-	eeh_pcid_put(dev);
-out_no_dev:
-	device_unlock(&dev->dev);
-	return NULL;
+	return rc;
 }
 
 /**
  * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled
- * @data: eeh device
- * @userdata: return value
+ * @edev: eeh device
+ * @driver: device's PCI driver
  *
  * Tells each device driver that IO ports, MMIO and config space I/O
- * are now enabled. Collects up and merges the device driver responses.
- * Cumulative response passed back in "userdata".
+ * are now enabled.
  */
-static void *eeh_report_mmio_enabled(struct eeh_dev *edev, void *userdata)
+static enum pci_ers_result eeh_report_mmio_enabled(struct eeh_dev *edev,
+						   struct pci_driver *driver)
 {
-	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
-	enum pci_ers_result rc, *res = userdata;
-	struct pci_driver *driver;
-
-	if (!eeh_edev_actionable(edev))
-		return NULL;
-
-	device_lock(&dev->dev);
-	driver = eeh_pcid_get(dev);
-	if (!driver) goto out_no_dev;
-
-	if (!driver->err_handler ||
-	    !driver->err_handler->mmio_enabled ||
-	    (edev->mode & EEH_DEV_NO_HANDLER))
-		goto out;
-
-	rc = driver->err_handler->mmio_enabled(dev);
-
-	*res = pci_ers_merge_result(*res, rc);
-
-out:
-	eeh_pcid_put(dev);
-out_no_dev:
-	device_unlock(&dev->dev);
-	return NULL;
+	if (!driver->err_handler->mmio_enabled)
+		return PCI_ERS_RESULT_NONE;
+	eeh_edev_info(edev, "Invoking %s->mmio_enabled()", driver->name);
+	return driver->err_handler->mmio_enabled(edev->pdev);
 }
 
 /**
  * eeh_report_reset - Tell device that slot has been reset
- * @data: eeh device
- * @userdata: return value
+ * @edev: eeh device
+ * @driver: device's PCI driver
  *
  * This routine must be called while EEH tries to reset particular
  * PCI device so that the associated PCI device driver could take
  * some actions, usually to save data the driver needs so that the
  * driver can work again while the device is recovered.
  */
-static void *eeh_report_reset(struct eeh_dev *edev, void *userdata)
+static enum pci_ers_result eeh_report_reset(struct eeh_dev *edev,
+					    struct pci_driver *driver)
 {
-	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
-	enum pci_ers_result rc, *res = userdata;
-	struct pci_driver *driver;
-
-	if (!eeh_edev_actionable(edev))
-		return NULL;
-
-	device_lock(&dev->dev);
-
-	driver = eeh_pcid_get(dev);
-	if (!driver) goto out_no_dev;
-
-	if (!driver->err_handler ||
-	    !driver->err_handler->slot_reset ||
-	    (edev->mode & EEH_DEV_NO_HANDLER) ||
-	    (!edev->in_error))
-		goto out;
-
-	rc = driver->err_handler->slot_reset(dev);
-	*res = pci_ers_merge_result(*res, rc);
-
-out:
-	eeh_pcid_put(dev);
-out_no_dev:
-	device_unlock(&dev->dev);
-	return NULL;
+	if (!driver->err_handler->slot_reset || !edev->in_error)
+		return PCI_ERS_RESULT_NONE;
+	eeh_edev_info(edev, "Invoking %s->slot_reset()", driver->name);
+	return driver->err_handler->slot_reset(edev->pdev);
 }
 
 static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
@@ -384,84 +416,53 @@ static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
 
 /**
  * eeh_report_resume - Tell device to resume normal operations
- * @data: eeh device
- * @userdata: return value
+ * @edev: eeh device
+ * @driver: device's PCI driver
  *
  * This routine must be called to notify the device driver that it
  * could resume so that the device driver can do some initialization
  * to make the recovered device work again.
  */
-static void *eeh_report_resume(struct eeh_dev *edev, void *userdata)
+static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev,
+					     struct pci_driver *driver)
 {
-	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
-	bool was_in_error;
-	struct pci_driver *driver;
-
-	if (!eeh_edev_actionable(edev))
-		return NULL;
-
-	device_lock(&dev->dev);
-
-	driver = eeh_pcid_get(dev);
-	if (!driver) goto out_no_dev;
+	if (!driver->err_handler->resume || !edev->in_error)
+		return PCI_ERS_RESULT_NONE;
 
-	was_in_error = edev->in_error;
-	edev->in_error = false;
+	eeh_edev_info(edev, "Invoking %s->resume()", driver->name);
+	driver->err_handler->resume(edev->pdev);
 
-	if (!driver->err_handler ||
-	    !driver->err_handler->resume ||
-	    (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) {
-		goto out;
-	}
-
-	driver->err_handler->resume(dev);
-
-	pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
-out:
-	eeh_pcid_put(dev);
+	pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED);
 #ifdef CONFIG_PCI_IOV
 	if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev))
 		eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
 #endif
-out_no_dev:
-	device_unlock(&dev->dev);
-	return NULL;
+	return PCI_ERS_RESULT_NONE;
 }
 
 /**
  * eeh_report_failure - Tell device driver that device is dead.
- * @data: eeh device
- * @userdata: return value
+ * @edev: eeh device
+ * @driver: device's PCI driver
  *
  * This informs the device driver that the device is permanently
  * dead, and that no further recovery attempts will be made on it.
  */
-static void *eeh_report_failure(struct eeh_dev *edev, void *userdata)
+static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev,
+					      struct pci_driver *driver)
 {
-	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
-	struct pci_driver *driver;
-
-	if (!eeh_edev_actionable(edev))
-		return NULL;
-
-	device_lock(&dev->dev);
-	dev->error_state = pci_channel_io_perm_failure;
-
-	driver = eeh_pcid_get(dev);
-	if (!driver) goto out_no_dev;
+	enum pci_ers_result rc;
 
-	if (!driver->err_handler ||
-	    !driver->err_handler->error_detected)
-		goto out;
+	if (!driver->err_handler->error_detected)
+		return PCI_ERS_RESULT_NONE;
 
-	driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
+	eeh_edev_info(edev, "Invoking %s->error_detected(permanent failure)",
+		      driver->name);
+	rc = driver->err_handler->error_detected(edev->pdev,
+						 pci_channel_io_perm_failure);
 
-	pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
-out:
-	eeh_pcid_put(dev);
-out_no_dev:
-	device_unlock(&dev->dev);
-	return NULL;
+	pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_DISCONNECT);
+	return rc;
 }
 
 static void *eeh_add_virt_device(void *data, void *userdata)
@@ -823,7 +824,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 	pr_info("EEH: Notify device drivers to shutdown\n");
 	eeh_set_channel_state(pe, pci_channel_io_frozen);
 	eeh_set_irq_state(pe, false);
-	eeh_pe_dev_traverse(pe, eeh_report_error, &result);
+	eeh_pe_report("error_detected(IO frozen)", pe, eeh_report_error,
+		      &result);
 	if ((pe->type & EEH_PE_PHB) &&
 	    result != PCI_ERS_RESULT_NONE &&
 	    result != PCI_ERS_RESULT_NEED_RESET)
@@ -870,7 +872,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 			result = PCI_ERS_RESULT_NEED_RESET;
 		} else {
 			pr_info("EEH: Notify device drivers to resume I/O\n");
-			eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result);
+			eeh_pe_report("mmio_enabled", pe,
+				      eeh_report_mmio_enabled, &result);
 		}
 	}
 
@@ -915,7 +918,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 		result = PCI_ERS_RESULT_NONE;
 		eeh_set_channel_state(pe, pci_channel_io_normal);
 		eeh_set_irq_state(pe, true);
-		eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
+		eeh_pe_report("slot_reset", pe, eeh_report_reset, &result);
 	}
 
 	/* All devices should claim they have recovered by now. */
@@ -938,11 +941,13 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 	pr_info("EEH: Notify device driver to resume\n");
 	eeh_set_channel_state(pe, pci_channel_io_normal);
 	eeh_set_irq_state(pe, true);
-	eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
-
-	eeh_for_each_pe(pe, tmp_pe)
-		eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+	eeh_pe_report("resume", pe, eeh_report_resume, NULL);
+	eeh_for_each_pe(pe, tmp_pe) {
+		eeh_pe_for_each_dev(tmp_pe, edev, tmp) {
 			edev->mode &= ~EEH_DEV_NO_HANDLER;
+			edev->in_error = false;
+		}
+	}
 
 	pr_info("EEH: Recovery successful.\n");
 	goto final;
@@ -962,7 +967,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 	/* Notify all devices that they're about to go down. */
 	eeh_set_channel_state(pe, pci_channel_io_perm_failure);
 	eeh_set_irq_state(pe, false);
-	eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
+	eeh_pe_report("error_detected(permanent failure)", pe,
+		      eeh_report_failure, NULL);
 
 	/* Mark the PE to be removed permanently */
 	eeh_pe_state_mark(pe, EEH_PE_REMOVED);
@@ -1072,7 +1078,8 @@ void eeh_handle_special_event(void)
 				/* Notify all devices to be down */
 				eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
 				eeh_set_channel_state(pe, pci_channel_io_perm_failure);
-				eeh_pe_dev_traverse(pe,
+				eeh_pe_report(
+					"error_detected(permanent failure)", pe,
 					eeh_report_failure, NULL);
 				bus = eeh_pe_bus_get(phb_pe);
 				if (!bus) {

From 6bcdd2972b9f6ebda9ae5c7075e2d59770dbbf12 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 29 May 2018 22:57:38 +1000
Subject: [PATCH 204/251] powerpc/ptrace: Use copy_{from, to}_user() rather
 than open-coding

In PPC_PTRACE_GETHWDBGINFO and PPC_PTRACE_SETHWDEBUG we do an
access_ok() check and then __copy_{from,to}_user().

Instead we should just use copy_{from,to}_user() which does all that
for us and is less error prone.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Samuel Mendoza-Jonas <sam@mendozajonas.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/ptrace.c | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 0f63dd5972e9a..9667666eb18e5 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -3082,27 +3082,19 @@ long arch_ptrace(struct task_struct *child, long request,
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
 #endif /* CONFIG_PPC_ADV_DEBUG_REGS */
 
-		if (!access_ok(VERIFY_WRITE, datavp,
-			       sizeof(struct ppc_debug_info)))
+		if (copy_to_user(datavp, &dbginfo,
+				 sizeof(struct ppc_debug_info)))
 			return -EFAULT;
-		ret = __copy_to_user(datavp, &dbginfo,
-				     sizeof(struct ppc_debug_info)) ?
-		      -EFAULT : 0;
-		break;
+		return 0;
 	}
 
 	case PPC_PTRACE_SETHWDEBUG: {
 		struct ppc_hw_breakpoint bp_info;
 
-		if (!access_ok(VERIFY_READ, datavp,
-			       sizeof(struct ppc_hw_breakpoint)))
+		if (copy_from_user(&bp_info, datavp,
+				   sizeof(struct ppc_hw_breakpoint)))
 			return -EFAULT;
-		ret = __copy_from_user(&bp_info, datavp,
-				       sizeof(struct ppc_hw_breakpoint)) ?
-		      -EFAULT : 0;
-		if (!ret)
-			ret = ppc_set_hwdebug(child, &bp_info);
-		break;
+		return ppc_set_hwdebug(child, &bp_info);
 	}
 
 	case PPC_PTRACE_DELHWDEBUG: {

From ba0635fcbe8c1ce83523c1ec79753868ce57f7a8 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 14 May 2018 23:03:15 +1000
Subject: [PATCH 205/251] powerpc: Rename thread_struct.fs to addr_limit

It's called 'fs' for historical reasons, it's named after the x86 'FS'
register. But we don't have to use that name for the member of
thread_struct, and in fact arch/x86 doesn't even call it 'fs' anymore.

So rename it to 'addr_limit', which better reflects what it's used
for, and is also the name used on other arches.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/processor.h | 6 +++---
 arch/powerpc/include/asm/uaccess.h   | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index eff269adfa71c..5debe337ea9d3 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -249,7 +249,7 @@ struct thread_struct {
 	unsigned long	ksp_vsid;
 #endif
 	struct pt_regs	*regs;		/* Pointer to saved register state */
-	mm_segment_t	fs;		/* for get_fs() validation */
+	mm_segment_t	addr_limit;	/* for get_fs() validation */
 #ifdef CONFIG_BOOKE
 	/* BookE base exception scratch space; align on cacheline */
 	unsigned long	normsave[8] ____cacheline_aligned;
@@ -379,7 +379,7 @@ struct thread_struct {
 #define INIT_THREAD { \
 	.ksp = INIT_SP, \
 	.ksp_limit = INIT_SP_LIMIT, \
-	.fs = KERNEL_DS, \
+	.addr_limit = KERNEL_DS, \
 	.pgdir = swapper_pg_dir, \
 	.fpexc_mode = MSR_FE0 | MSR_FE1, \
 	SPEFSCR_INIT \
@@ -388,7 +388,7 @@ struct thread_struct {
 #define INIT_THREAD  { \
 	.ksp = INIT_SP, \
 	.regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \
-	.fs = KERNEL_DS, \
+	.addr_limit = KERNEL_DS, \
 	.fpexc_mode = 0, \
 	.ppr = INIT_PPR, \
 	.fscr = FSCR_TAR | FSCR_EBB \
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index a62ee663b2c83..a91cea15187bc 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -30,8 +30,8 @@
 #endif
 
 #define get_ds()	(KERNEL_DS)
-#define get_fs()	(current->thread.fs)
-#define set_fs(val)	(current->thread.fs = (val))
+#define get_fs()	(current->thread.addr_limit)
+#define set_fs(val)	(current->thread.addr_limit = (val))
 
 #define segment_eq(a, b)	((a).seg == (b).seg)
 

From 3e3786801b701cf03ee028fca786848d4865563e Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 14 May 2018 23:03:16 +1000
Subject: [PATCH 206/251] powerpc: Check address limit on user-mode return
 (TIF_FSCHECK)

set_fs() sets the addr_limit, which is used in access_ok() to
determine if an address is a user or kernel address.

Some code paths use set_fs() to temporarily elevate the addr_limit so
that kernel code can read/write kernel memory as if it were user
memory. That is fine as long as the code can't ever return to
userspace with the addr_limit still elevated.

If that did happen, then userspace can read/write kernel memory as if
it were user memory, eg. just with write(2). In case it's not clear,
that is very bad. It has also happened in the past due to bugs.

Commit 5ea0727b163c ("x86/syscalls: Check address limit on user-mode
return") added a mechanism to check the addr_limit value before
returning to userspace. Any call to set_fs() sets a thread flag,
TIF_FSCHECK, and if we see that on the return to userspace we go out
of line to check that the addr_limit value is not elevated.

For further info see the above commit, as well as:
  https://lwn.net/Articles/722267/
  https://bugs.chromium.org/p/project-zero/issues/detail?id=990

Verified to work on 64-bit Book3S using a POC that objdumps the system
call handler, and a modified lkdtm_CORRUPT_USER_DS() that doesn't kill
the caller.

Before:
  $ sudo ./test-tif-fscheck
  ...
  0000000000000000 <.data>:
         0:       e1 f7 8a 79     rldicl. r10,r12,30,63
         4:       80 03 82 40     bne     0x384
         8:       00 40 8a 71     andi.   r10,r12,16384
         c:       78 0b 2a 7c     mr      r10,r1
        10:       10 fd 21 38     addi    r1,r1,-752
        14:       08 00 c2 41     beq-    0x1c
        18:       58 09 2d e8     ld      r1,2392(r13)
        1c:       00 00 41 f9     std     r10,0(r1)
        20:       70 01 61 f9     std     r11,368(r1)
        24:       78 01 81 f9     std     r12,376(r1)
        28:       70 00 01 f8     std     r0,112(r1)
        2c:       78 00 41 f9     std     r10,120(r1)
        30:       20 00 82 41     beq     0x50
        34:       a6 42 4c 7d     mftb    r10

After:

  $ sudo ./test-tif-fscheck
  Killed

And in dmesg:
  Invalid address limit on user-mode return
  WARNING: CPU: 1 PID: 3689 at ../include/linux/syscalls.h:260 do_notify_resume+0x140/0x170
  ...
  NIP [c00000000001ee50] do_notify_resume+0x140/0x170
  LR [c00000000001ee4c] do_notify_resume+0x13c/0x170
  Call Trace:
    do_notify_resume+0x13c/0x170 (unreliable)
    ret_from_except_lite+0x70/0x74

Performance overhead is essentially zero in the usual case, because
the bit is checked as part of the existing _TIF_USER_WORK_MASK check.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/thread_info.h | 8 +++++---
 arch/powerpc/include/asm/uaccess.h     | 8 +++++++-
 arch/powerpc/kernel/signal.c           | 4 ++++
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 5964145db03d1..f308dfeb27465 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -79,8 +79,7 @@ extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src
 #define TIF_SYSCALL_TRACE	0	/* syscall trace active */
 #define TIF_SIGPENDING		1	/* signal pending */
 #define TIF_NEED_RESCHED	2	/* rescheduling necessary */
-#define TIF_POLLING_NRFLAG	3	/* true if poll_idle() is polling
-					   TIF_NEED_RESCHED */
+#define TIF_FSCHECK		3	/* Check FS is USER_DS on return */
 #define TIF_32BIT		4	/* 32 bit binary */
 #define TIF_RESTORE_TM		5	/* need to restore TM FP/VEC/VSX */
 #define TIF_PATCH_PENDING	6	/* pending live patching update */
@@ -99,6 +98,7 @@ extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src
 #if defined(CONFIG_PPC64)
 #define TIF_ELF2ABI		18	/* function descriptors must die! */
 #endif
+#define TIF_POLLING_NRFLAG	19	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
@@ -118,13 +118,15 @@ extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src
 #define _TIF_SYSCALL_TRACEPOINT	(1<<TIF_SYSCALL_TRACEPOINT)
 #define _TIF_EMULATE_STACK_STORE	(1<<TIF_EMULATE_STACK_STORE)
 #define _TIF_NOHZ		(1<<TIF_NOHZ)
+#define _TIF_FSCHECK		(1<<TIF_FSCHECK)
 #define _TIF_SYSCALL_DOTRACE	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
 				 _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
 				 _TIF_NOHZ)
 
 #define _TIF_USER_WORK_MASK	(_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
 				 _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
-				 _TIF_RESTORE_TM | _TIF_PATCH_PENDING)
+				 _TIF_RESTORE_TM | _TIF_PATCH_PENDING | \
+				 _TIF_FSCHECK)
 #define _TIF_PERSYSCALL_MASK	(_TIF_RESTOREALL|_TIF_NOERROR)
 
 /* Bits in local_flags */
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index a91cea15187bc..abba80f8ff04a 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -31,7 +31,13 @@
 
 #define get_ds()	(KERNEL_DS)
 #define get_fs()	(current->thread.addr_limit)
-#define set_fs(val)	(current->thread.addr_limit = (val))
+
+static inline void set_fs(mm_segment_t fs)
+{
+	current->thread.addr_limit = fs;
+	/* On user-mode return check addr_limit (fs) is correct */
+	set_thread_flag(TIF_FSCHECK);
+}
 
 #define segment_eq(a, b)	((a).seg == (b).seg)
 
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index 61db86ecd318d..fb932f1202c71 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -15,6 +15,7 @@
 #include <linux/key.h>
 #include <linux/context_tracking.h>
 #include <linux/livepatch.h>
+#include <linux/syscalls.h>
 #include <asm/hw_breakpoint.h>
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
@@ -150,6 +151,9 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
 {
 	user_exit();
 
+	/* Check valid addr_limit, TIF check is done there */
+	addr_limit_user_check();
+
 	if (thread_info_flags & _TIF_UPROBE)
 		uprobe_notify_resume(regs);
 

From 7b08729cb272b4cd5c657cd5ac0dddae15a593ff Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 2 May 2018 23:07:26 +1000
Subject: [PATCH 207/251] powerpc/64: Save stack pointer when we hard disable
 interrupts

A CPU that gets stuck with interrupts hard disable can be difficult to
debug, as on some platforms we have no way to interrupt the CPU to
find out what it's doing.

A stop-gap is to have the CPU save it's stack pointer (r1) in its paca
when it hard disables interrupts. That way if we can't interrupt it,
we can at least trace the stack based on where it last disabled
interrupts.

In some cases that will be total junk, but the stack trace code should
handle that. In the simple case of a CPU that disable interrupts and
then gets stuck in a loop, the stack trace should be informative.

We could clear the saved stack pointer when we enable interrupts, but
that loses information which could be useful if we have nothing else
to go on.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/hw_irq.h    | 6 +++++-
 arch/powerpc/include/asm/paca.h      | 2 +-
 arch/powerpc/kernel/exceptions-64s.S | 1 +
 arch/powerpc/xmon/xmon.c             | 4 ++++
 4 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 9aec7237f8c24..e151774cb5772 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -238,8 +238,12 @@ static inline bool arch_irqs_disabled(void)
 	__hard_irq_disable();						\
 	flags = irq_soft_mask_set_return(IRQS_ALL_DISABLED);		\
 	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;			\
-	if (!arch_irqs_disabled_flags(flags))				\
+	if (!arch_irqs_disabled_flags(flags)) {				\
+		asm ("stdx %%r1, 0, %1 ;"				\
+		     : "=m" (local_paca->saved_r1)			\
+		     : "b" (&local_paca->saved_r1));			\
 		trace_hardirqs_off();					\
+	}								\
 } while(0)
 
 static inline bool lazy_irq_pending(void)
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 81471bd08f66b..6d34bd71139da 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -161,7 +161,7 @@ struct paca_struct {
 	struct task_struct *__current;	/* Pointer to current */
 	u64 kstack;			/* Saved Kernel stack addr */
 	u64 stab_rr;			/* stab/slb round-robin counter */
-	u64 saved_r1;			/* r1 save for RTAS calls or PM */
+	u64 saved_r1;			/* r1 save for RTAS calls or PM or EE=0 */
 	u64 saved_msr;			/* MSR saved here by enter_rtas */
 	u16 trap_save;			/* Used when bad stack is encountered */
 	u8 irq_soft_mask;		/* mask for irq soft masking */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index ae6a849db60b1..bb26fe9e90ce5 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1499,6 +1499,7 @@ masked_##_H##interrupt:					\
 	xori	r10,r10,MSR_EE; /* clear MSR_EE */	\
 	mtspr	SPRN_##_H##SRR1,r10;			\
 2:	mtcrf	0x80,r9;				\
+	std	r1,PACAR1(r13);				\
 	ld	r9,PACA_EXGEN+EX_R9(r13);		\
 	ld	r10,PACA_EXGEN+EX_R10(r13);		\
 	ld	r11,PACA_EXGEN+EX_R11(r13);		\
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 0561c14b276b0..47166ad2a6691 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1172,6 +1172,10 @@ static int cpu_cmd(void)
 	/* try to switch to cpu specified */
 	if (!cpumask_test_cpu(cpu, &cpus_in_xmon)) {
 		printf("cpu 0x%lx isn't in xmon\n", cpu);
+#ifdef CONFIG_PPC64
+		printf("backtrace of paca[0x%lx].saved_r1 (possibly stale):\n", cpu);
+		xmon_show_stack(paca_ptrs[cpu]->saved_r1, 0, 0);
+#endif
 		return 0;
 	}
 	xmon_taken = 0;

From 6ba55716a24f5f399ad4d37685e4bb721f8e6dd5 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 2 May 2018 23:07:27 +1000
Subject: [PATCH 208/251] powerpc/nmi: Add an API for sending "safe" NMIs

Currently the options we have for sending NMIs are not necessarily
safe, that is they can potentially interrupt a CPU in a
non-recoverable region of code, meaning the kernel must then panic().

But we'd like to use smp_send_nmi_ipi() to do cross-CPU calls in
situations where we don't want to risk a panic(), because it doesn't
have the requirement that interrupts must be enabled like
smp_call_function().

So add an API for the caller to indicate that it wants to use the NMI
infrastructure, but doesn't want to do anything "unsafe".

Currently that is implemented by not actually calling cause_nmi_ipi(),
instead falling back to an IPI. In future we can pass the safe
parameter down to cause_nmi_ipi() and the individual backends can
potentially take it into account before deciding what to do.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/smp.h |  1 +
 arch/powerpc/kernel/smp.c      | 20 +++++++++++++++-----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index cfecfee1194b9..29ffaabdf75b5 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -58,6 +58,7 @@ struct smp_ops_t {
 
 extern void smp_flush_nmi_ipi(u64 delay_us);
 extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
+extern int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
 extern void smp_send_debugger_break(void);
 extern void start_secondary_resume(void);
 extern void smp_generic_give_timebase(void);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index b009a562c76b2..5eadfffabe351 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -430,9 +430,9 @@ int smp_handle_nmi_ipi(struct pt_regs *regs)
 	return ret;
 }
 
-static void do_smp_send_nmi_ipi(int cpu)
+static void do_smp_send_nmi_ipi(int cpu, bool safe)
 {
-	if (smp_ops->cause_nmi_ipi && smp_ops->cause_nmi_ipi(cpu))
+	if (!safe && smp_ops->cause_nmi_ipi && smp_ops->cause_nmi_ipi(cpu))
 		return;
 
 	if (cpu >= 0) {
@@ -472,7 +472,7 @@ void smp_flush_nmi_ipi(u64 delay_us)
  * - delay_us > 0 is the delay before giving up waiting for targets to
  *   enter the handler, == 0 specifies indefinite delay.
  */
-int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
+int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool safe)
 {
 	unsigned long flags;
 	int me = raw_smp_processor_id();
@@ -505,7 +505,7 @@ int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
 	nmi_ipi_busy_count++;
 	nmi_ipi_unlock();
 
-	do_smp_send_nmi_ipi(cpu);
+	do_smp_send_nmi_ipi(cpu, safe);
 
 	while (!cpumask_empty(&nmi_ipi_pending_mask)) {
 		udelay(1);
@@ -527,6 +527,16 @@ int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
 
 	return ret;
 }
+
+int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
+{
+	return __smp_send_nmi_ipi(cpu, fn, delay_us, false);
+}
+
+int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
+{
+	return __smp_send_nmi_ipi(cpu, fn, delay_us, true);
+}
 #endif /* CONFIG_NMI_IPI */
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
@@ -570,7 +580,7 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
 			 * entire NMI dance and waiting for
 			 * cpus to clear pending mask, etc.
 			 */
-			do_smp_send_nmi_ipi(cpu);
+			do_smp_send_nmi_ipi(cpu, false);
 		}
 	}
 }

From 5cc05910f26e6fd6da15f052f86f6150e4b91664 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 2 May 2018 23:07:28 +1000
Subject: [PATCH 209/251] powerpc/64s: Wire up arch_trigger_cpumask_backtrace()

This allows eg. the RCU stall detector, or the soft/hardlockup
detectors to trigger a backtrace on all CPUs.

We implement this by sending a "safe" NMI, which will actually only
send an IPI. Unfortunately the generic code prints "NMI", so that's a
little confusing but we can probably live with it.

If one of the CPUs doesn't respond to the IPI, we then print some info
from it's paca and do a backtrace based on its saved_r1.

Example output:

  INFO: rcu_sched detected stalls on CPUs/tasks:
  	2-...0: (0 ticks this GP) idle=1be/1/4611686018427387904 softirq=1055/1055 fqs=25735
  	(detected by 4, t=58847 jiffies, g=58, c=57, q=1258)
  Sending NMI from CPU 4 to CPUs 2:
  CPU 2 didn't respond to backtrace IPI, inspecting paca.
  irq_soft_mask: 0x01 in_mce: 0 in_nmi: 0 current: 3623 (bash)
  Back trace of paca->saved_r1 (0xc0000000e1c83ba0) (possibly stale):
  Call Trace:
  [c0000000e1c83ba0] [0000000000000014] 0x14 (unreliable)
  [c0000000e1c83bc0] [c000000000765798] lkdtm_do_action+0x48/0x80
  [c0000000e1c83bf0] [c000000000765a40] direct_entry+0x110/0x1b0
  [c0000000e1c83c90] [c00000000058e650] full_proxy_write+0x90/0xe0
  [c0000000e1c83ce0] [c0000000003aae3c] __vfs_write+0x6c/0x1f0
  [c0000000e1c83d80] [c0000000003ab214] vfs_write+0xd4/0x240
  [c0000000e1c83dd0] [c0000000003ab5cc] ksys_write+0x6c/0x110
  [c0000000e1c83e30] [c00000000000b860] system_call+0x58/0x6c

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/nmi.h   |  6 ++++
 arch/powerpc/kernel/stacktrace.c | 51 ++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+)

diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h
index 9c80939b4d147..0f571e0ebca19 100644
--- a/arch/powerpc/include/asm/nmi.h
+++ b/arch/powerpc/include/asm/nmi.h
@@ -8,4 +8,10 @@ extern void arch_touch_nmi_watchdog(void);
 static inline void arch_touch_nmi_watchdog(void) {}
 #endif
 
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_STACKTRACE)
+extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
+					   bool exclude_self);
+#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
+#endif
+
 #endif /* _ASM_NMI_H */
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 8dd6ba0c7d353..b4f134e8bbd97 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -13,6 +13,7 @@
 #include <linux/export.h>
 #include <linux/kallsyms.h>
 #include <linux/module.h>
+#include <linux/nmi.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task_stack.h>
@@ -22,6 +23,8 @@
 #include <linux/ftrace.h>
 #include <asm/kprobes.h>
 
+#include <asm/paca.h>
+
 /*
  * Save stack-backtrace addresses into a stack_trace buffer.
  */
@@ -194,3 +197,51 @@ save_stack_trace_tsk_reliable(struct task_struct *tsk,
 }
 EXPORT_SYMBOL_GPL(save_stack_trace_tsk_reliable);
 #endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+static void handle_backtrace_ipi(struct pt_regs *regs)
+{
+	nmi_cpu_backtrace(regs);
+}
+
+static void raise_backtrace_ipi(cpumask_t *mask)
+{
+	unsigned int cpu;
+
+	for_each_cpu(cpu, mask) {
+		if (cpu == smp_processor_id())
+			handle_backtrace_ipi(NULL);
+		else
+			smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, 5 * USEC_PER_SEC);
+	}
+
+	for_each_cpu(cpu, mask) {
+		struct paca_struct *p = paca_ptrs[cpu];
+
+		cpumask_clear_cpu(cpu, mask);
+
+		pr_warn("CPU %d didn't respond to backtrace IPI, inspecting paca.\n", cpu);
+		if (!virt_addr_valid(p)) {
+			pr_warn("paca pointer appears corrupt? (%px)\n", p);
+			continue;
+		}
+
+		pr_warn("irq_soft_mask: 0x%02x in_mce: %d in_nmi: %d",
+			p->irq_soft_mask, p->in_mce, p->in_nmi);
+
+		if (virt_addr_valid(p->__current))
+			pr_cont(" current: %d (%s)\n", p->__current->pid,
+				p->__current->comm);
+		else
+			pr_cont(" current pointer corrupt? (%px)\n", p->__current);
+
+		pr_warn("Back trace of paca->saved_r1 (0x%016llx) (possibly stale):\n", p->saved_r1);
+		show_stack(p->__current, (unsigned long *)p->saved_r1);
+	}
+}
+
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+{
+	nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
+}
+#endif /* CONFIG_PPC64 */

From 7af76c5f23abc7afedf449e7d2960f463cbc4097 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 2 May 2018 23:07:29 +1000
Subject: [PATCH 210/251] powerpc/stacktrace: Update copyright

This now has new code in it written by Nick and I, and switch to a
SPDX tag.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/stacktrace.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index b4f134e8bbd97..07e97f289c520 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -1,13 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+
 /*
- * Stack trace utility
+ * Stack trace utility functions etc.
  *
  * Copyright 2008 Christoph Hellwig, IBM Corp.
  * Copyright 2018 SUSE Linux GmbH
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
+ * Copyright 2018 Nick Piggin, Michael Ellerman, IBM Corp.
  */
 
 #include <linux/export.h>

From a6b3964ad71a61bb7c61d80a60bea7d42187b2eb Mon Sep 17 00:00:00 2001
From: Michal Suchanek <msuchanek@suse.de>
Date: Tue, 24 Apr 2018 14:15:54 +1000
Subject: [PATCH 211/251] powerpc/64s: Add barrier_nospec

A no-op form of ori (or immediate of 0 into r31 and the result stored
in r31) has been re-tasked as a speculation barrier. The instruction
only acts as a barrier on newer machines with appropriate firmware
support. On older CPUs it remains a harmless no-op.

Implement barrier_nospec using this instruction.

mpe: The semantics of the instruction are believed to be that it
prevents execution of subsequent instructions until preceding branches
have been fully resolved and are no longer executing speculatively.
There is no further documentation available at this time.

Signed-off-by: Michal Suchanek <msuchanek@suse.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/barrier.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index c7c63959ba910..e582d2c880922 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -76,6 +76,21 @@ do {									\
 	___p1;								\
 })
 
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * Prevent execution of subsequent instructions until preceding branches have
+ * been fully resolved and are no longer executing speculatively.
+ */
+#define barrier_nospec_asm ori 31,31,0
+
+// This also acts as a compiler barrier due to the memory clobber.
+#define barrier_nospec() asm (stringify_in_c(barrier_nospec_asm) ::: "memory")
+
+#else /* !CONFIG_PPC_BOOK3S_64 */
+#define barrier_nospec_asm
+#define barrier_nospec()
+#endif
+
 #include <asm-generic/barrier.h>
 
 #endif /* _ASM_POWERPC_BARRIER_H */

From 2eea7f067f495e33b8b116b35b5988ab2b8aec55 Mon Sep 17 00:00:00 2001
From: Michal Suchanek <msuchanek@suse.de>
Date: Tue, 24 Apr 2018 14:15:55 +1000
Subject: [PATCH 212/251] powerpc/64s: Add support for ori barrier_nospec
 patching

Based on the RFI patching. This is required to be able to disable the
speculation barrier.

Only one barrier type is supported and it does nothing when the
firmware does not enable it. Also re-patching modules is not supported
So the only meaningful thing that can be done is patching out the
speculation barrier at boot when the user says it is not wanted.

Signed-off-by: Michal Suchanek <msuchanek@suse.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/barrier.h        |  2 +-
 arch/powerpc/include/asm/feature-fixups.h |  9 ++++++++
 arch/powerpc/include/asm/setup.h          |  1 +
 arch/powerpc/kernel/security.c            |  9 ++++++++
 arch/powerpc/kernel/vmlinux.lds.S         |  7 ++++++
 arch/powerpc/lib/feature-fixups.c         | 27 +++++++++++++++++++++++
 6 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index e582d2c880922..f67b3f6e36beb 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -81,7 +81,7 @@ do {									\
  * Prevent execution of subsequent instructions until preceding branches have
  * been fully resolved and are no longer executing speculatively.
  */
-#define barrier_nospec_asm ori 31,31,0
+#define barrier_nospec_asm NOSPEC_BARRIER_FIXUP_SECTION; nop
 
 // This also acts as a compiler barrier due to the memory clobber.
 #define barrier_nospec() asm (stringify_in_c(barrier_nospec_asm) ::: "memory")
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 1e82eb3caabd1..86ac59e75f361 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -195,11 +195,20 @@ label##3:					       	\
 	FTR_ENTRY_OFFSET 951b-952b;			\
 	.popsection;
 
+#define NOSPEC_BARRIER_FIXUP_SECTION			\
+953:							\
+	.pushsection __barrier_nospec_fixup,"a";	\
+	.align 2;					\
+954:							\
+	FTR_ENTRY_OFFSET 953b-954b;			\
+	.popsection;
+
 
 #ifndef __ASSEMBLY__
 #include <linux/types.h>
 
 extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
+extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup;
 
 void apply_feature_fixups(void);
 void setup_feature_keys(void);
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 27fa52ed6d00d..afc7280cce3b1 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -52,6 +52,7 @@ enum l1d_flush_type {
 
 void setup_rfi_flush(enum l1d_flush_type, bool enable);
 void do_rfi_flush_fixups(enum l1d_flush_type types);
+void do_barrier_nospec_fixups(bool enable);
 
 #endif /* !__ASSEMBLY__ */
 
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index bab5a27ea8056..b963eae0b0a0e 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -9,10 +9,19 @@
 #include <linux/seq_buf.h>
 
 #include <asm/security_features.h>
+#include <asm/setup.h>
 
 
 unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
 
+static bool barrier_nospec_enabled;
+
+static void enable_barrier_nospec(bool enable)
+{
+	barrier_nospec_enabled = enable;
+	do_barrier_nospec_fixups(enable);
+}
+
 ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	bool thread_priv;
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index c8af90ff49f05..ff73f498568cb 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -139,6 +139,13 @@ SECTIONS
 		*(__rfi_flush_fixup)
 		__stop___rfi_flush_fixup = .;
 	}
+
+	. = ALIGN(8);
+	__spec_barrier_fixup : AT(ADDR(__spec_barrier_fixup) - LOAD_OFFSET) {
+		__start___barrier_nospec_fixup = .;
+		*(__barrier_nospec_fixup)
+		__stop___barrier_nospec_fixup = .;
+	}
 #endif
 
 	EXCEPTION_TABLE(0)
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index f3e46d4edd72d..ae911dad9b165 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -162,6 +162,33 @@ void do_rfi_flush_fixups(enum l1d_flush_type types)
 		(types &  L1D_FLUSH_MTTRIG)     ? "mttrig type"
 						: "unknown");
 }
+
+void do_barrier_nospec_fixups(bool enable)
+{
+	unsigned int instr, *dest;
+	long *start, *end;
+	int i;
+
+	start = PTRRELOC(&__start___barrier_nospec_fixup),
+	end = PTRRELOC(&__stop___barrier_nospec_fixup);
+
+	instr = 0x60000000; /* nop */
+
+	if (enable) {
+		pr_info("barrier-nospec: using ORI speculation barrier\n");
+		instr = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+	}
+
+	for (i = 0; start < end; start++, i++) {
+		dest = (void *)start + *start;
+
+		pr_devel("patching dest %lx\n", (unsigned long)dest);
+		patch_instruction(dest, instr);
+	}
+
+	printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
+}
+
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
 void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)

From 815069ca57c142eb71d27439bc27f41a433a67b3 Mon Sep 17 00:00:00 2001
From: Michal Suchanek <msuchanek@suse.de>
Date: Tue, 24 Apr 2018 14:15:56 +1000
Subject: [PATCH 213/251] powerpc/64s: Patch barrier_nospec in modules

Note that unlike RFI which is patched only in kernel the nospec state
reflects settings at the time the module was loaded.

Iterating all modules and re-patching every time the settings change
is not implemented.

Based on lwsync patching.

Signed-off-by: Michal Suchanek <msuchanek@suse.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/setup.h  |  7 +++++++
 arch/powerpc/kernel/module.c      |  6 ++++++
 arch/powerpc/kernel/security.c    |  2 +-
 arch/powerpc/lib/feature-fixups.c | 16 +++++++++++++---
 4 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index afc7280cce3b1..a24c3c9053ccc 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -53,6 +53,13 @@ enum l1d_flush_type {
 void setup_rfi_flush(enum l1d_flush_type, bool enable);
 void do_rfi_flush_fixups(enum l1d_flush_type types);
 void do_barrier_nospec_fixups(bool enable);
+extern bool barrier_nospec_enabled;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+void do_barrier_nospec_fixups_range(bool enable, void *start, void *end);
+#else
+static inline void do_barrier_nospec_fixups_range(bool enable, void *start, void *end) { };
+#endif
 
 #endif /* !__ASSEMBLY__ */
 
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index 3f7ba0f5bf29f..1b3c6835e7303 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -72,6 +72,12 @@ int module_finalize(const Elf_Ehdr *hdr,
 		do_feature_fixups(powerpc_firmware_features,
 				  (void *)sect->sh_addr,
 				  (void *)sect->sh_addr + sect->sh_size);
+
+	sect = find_section(hdr, sechdrs, "__spec_barrier_fixup");
+	if (sect != NULL)
+		do_barrier_nospec_fixups_range(barrier_nospec_enabled,
+				  (void *)sect->sh_addr,
+				  (void *)sect->sh_addr + sect->sh_size);
 #endif
 
 	sect = find_section(hdr, sechdrs, "__lwsync_fixup");
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index b963eae0b0a0e..39cc9eae8d7fe 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -14,7 +14,7 @@
 
 unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
 
-static bool barrier_nospec_enabled;
+bool barrier_nospec_enabled;
 
 static void enable_barrier_nospec(bool enable)
 {
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index ae911dad9b165..2b9173d09f24b 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -163,14 +163,14 @@ void do_rfi_flush_fixups(enum l1d_flush_type types)
 						: "unknown");
 }
 
-void do_barrier_nospec_fixups(bool enable)
+void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
 {
 	unsigned int instr, *dest;
 	long *start, *end;
 	int i;
 
-	start = PTRRELOC(&__start___barrier_nospec_fixup),
-	end = PTRRELOC(&__stop___barrier_nospec_fixup);
+	start = fixup_start;
+	end = fixup_end;
 
 	instr = 0x60000000; /* nop */
 
@@ -189,6 +189,16 @@ void do_barrier_nospec_fixups(bool enable)
 	printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
 }
 
+void do_barrier_nospec_fixups(bool enable)
+{
+	void *start, *end;
+
+	start = PTRRELOC(&__start___barrier_nospec_fixup),
+	end = PTRRELOC(&__stop___barrier_nospec_fixup);
+
+	do_barrier_nospec_fixups_range(enable, start, end);
+}
+
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
 void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)

From cb3d6759a93c6d0aea1c10deb6d00e111c29c19c Mon Sep 17 00:00:00 2001
From: Michal Suchanek <msuchanek@suse.de>
Date: Tue, 24 Apr 2018 14:15:57 +1000
Subject: [PATCH 214/251] powerpc/64s: Enable barrier_nospec based on firmware
 settings

Check what firmware told us and enable/disable the barrier_nospec as
appropriate.

We err on the side of enabling the barrier, as it's no-op on older
systems, see the comment for more detail.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/setup.h       |  1 +
 arch/powerpc/kernel/security.c         | 60 ++++++++++++++++++++++++++
 arch/powerpc/platforms/powernv/setup.c |  1 +
 arch/powerpc/platforms/pseries/setup.c |  1 +
 4 files changed, 63 insertions(+)

diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index a24c3c9053ccc..8721fd004291d 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -52,6 +52,7 @@ enum l1d_flush_type {
 
 void setup_rfi_flush(enum l1d_flush_type, bool enable);
 void do_rfi_flush_fixups(enum l1d_flush_type types);
+void setup_barrier_nospec(void);
 void do_barrier_nospec_fixups(bool enable);
 extern bool barrier_nospec_enabled;
 
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 39cc9eae8d7fe..06d5195f67296 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -8,6 +8,7 @@
 #include <linux/device.h>
 #include <linux/seq_buf.h>
 
+#include <asm/debugfs.h>
 #include <asm/security_features.h>
 #include <asm/setup.h>
 
@@ -22,6 +23,65 @@ static void enable_barrier_nospec(bool enable)
 	do_barrier_nospec_fixups(enable);
 }
 
+void setup_barrier_nospec(void)
+{
+	bool enable;
+
+	/*
+	 * It would make sense to check SEC_FTR_SPEC_BAR_ORI31 below as well.
+	 * But there's a good reason not to. The two flags we check below are
+	 * both are enabled by default in the kernel, so if the hcall is not
+	 * functional they will be enabled.
+	 * On a system where the host firmware has been updated (so the ori
+	 * functions as a barrier), but on which the hypervisor (KVM/Qemu) has
+	 * not been updated, we would like to enable the barrier. Dropping the
+	 * check for SEC_FTR_SPEC_BAR_ORI31 achieves that. The only downside is
+	 * we potentially enable the barrier on systems where the host firmware
+	 * is not updated, but that's harmless as it's a no-op.
+	 */
+	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+		 security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR);
+
+	enable_barrier_nospec(enable);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int barrier_nospec_set(void *data, u64 val)
+{
+	switch (val) {
+	case 0:
+	case 1:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (!!val == !!barrier_nospec_enabled)
+		return 0;
+
+	enable_barrier_nospec(!!val);
+
+	return 0;
+}
+
+static int barrier_nospec_get(void *data, u64 *val)
+{
+	*val = barrier_nospec_enabled ? 1 : 0;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_barrier_nospec,
+			barrier_nospec_get, barrier_nospec_set, "%llu\n");
+
+static __init int barrier_nospec_debugfs_init(void)
+{
+	debugfs_create_file("barrier_nospec", 0600, powerpc_debugfs_root, NULL,
+			    &fops_barrier_nospec);
+	return 0;
+}
+device_initcall(barrier_nospec_debugfs_init);
+#endif /* CONFIG_DEBUG_FS */
+
 ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	bool thread_priv;
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index fa63d3fff14c8..8d0958cc83a87 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -124,6 +124,7 @@ static void pnv_setup_rfi_flush(void)
 		  security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV));
 
 	setup_rfi_flush(type, enable);
+	setup_barrier_nospec();
 }
 
 static void __init pnv_setup_arch(void)
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index b55ad4286dc7f..63b1f0d10ef0f 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -534,6 +534,7 @@ void pseries_setup_rfi_flush(void)
 		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR);
 
 	setup_rfi_flush(types, enable);
+	setup_barrier_nospec();
 }
 
 #ifdef CONFIG_PCI_IOV

From ddf35cf3764b5a182b178105f57515b42e2634f8 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 24 Apr 2018 14:15:58 +1000
Subject: [PATCH 215/251] powerpc: Use barrier_nospec in copy_from_user()

Based on the x86 commit doing the same.

See commit 304ec1b05031 ("x86/uaccess: Use __uaccess_begin_nospec()
and uaccess_try_nospec") and b3bbfb3fb5d2 ("x86: Introduce
__uaccess_begin_nospec() and uaccess_try_nospec") for more detail.

In all cases we are ordering the load from the potentially
user-controlled pointer vs a previous branch based on an access_ok()
check or similar.

Base on a patch from Michal Suchanek.

Signed-off-by: Michal Suchanek <msuchanek@suse.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/uaccess.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index abba80f8ff04a..468653ce844ce 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -258,6 +258,7 @@ do {								\
 	__chk_user_ptr(ptr);					\
 	if (!is_kernel_addr((unsigned long)__gu_addr))		\
 		might_fault();					\
+	barrier_nospec();					\
 	__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
 	(x) = (__typeof__(*(ptr)))__gu_val;			\
 	__gu_err;						\
@@ -269,8 +270,10 @@ do {								\
 	unsigned long  __gu_val = 0;					\
 	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);		\
 	might_fault();							\
-	if (access_ok(VERIFY_READ, __gu_addr, (size)))			\
+	if (access_ok(VERIFY_READ, __gu_addr, (size))) {		\
+		barrier_nospec();					\
 		__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
+	}								\
 	(x) = (__force __typeof__(*(ptr)))__gu_val;				\
 	__gu_err;							\
 })
@@ -281,6 +284,7 @@ do {								\
 	unsigned long __gu_val;					\
 	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);	\
 	__chk_user_ptr(ptr);					\
+	barrier_nospec();					\
 	__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
 	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
 	__gu_err;						\
@@ -308,15 +312,19 @@ static inline unsigned long raw_copy_from_user(void *to,
 
 		switch (n) {
 		case 1:
+			barrier_nospec();
 			__get_user_size(*(u8 *)to, from, 1, ret);
 			break;
 		case 2:
+			barrier_nospec();
 			__get_user_size(*(u16 *)to, from, 2, ret);
 			break;
 		case 4:
+			barrier_nospec();
 			__get_user_size(*(u32 *)to, from, 4, ret);
 			break;
 		case 8:
+			barrier_nospec();
 			__get_user_size(*(u64 *)to, from, 8, ret);
 			break;
 		}
@@ -324,6 +332,7 @@ static inline unsigned long raw_copy_from_user(void *to,
 			return 0;
 	}
 
+	barrier_nospec();
 	return __copy_tofrom_user((__force void __user *)to, from, n);
 }
 

From 51973a815c6b46d7b23b68d6af371ad1c9d503ca Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 24 Apr 2018 14:15:59 +1000
Subject: [PATCH 216/251] powerpc/64: Use barrier_nospec in syscall entry

Our syscall entry is done in assembly so patch in an explicit
barrier_nospec.

Based on a patch by Michal Suchanek.

Signed-off-by: Michal Suchanek <msuchanek@suse.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/entry_64.S | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 3d1af55e09dc5..b10e010212141 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -36,6 +36,7 @@
 #include <asm/context_tracking.h>
 #include <asm/tm.h>
 #include <asm/ppc-opcode.h>
+#include <asm/barrier.h>
 #include <asm/export.h>
 #ifdef CONFIG_PPC_BOOK3S
 #include <asm/exception-64s.h>
@@ -178,6 +179,15 @@ system_call:			/* label this so stack traces look sane */
 	clrldi	r8,r8,32
 15:
 	slwi	r0,r0,4
+
+	barrier_nospec_asm
+	/*
+	 * Prevent the load of the handler below (based on the user-passed
+	 * system call number) being speculatively executed until the test
+	 * against NR_syscalls and branch to .Lsyscall_enosys above has
+	 * committed.
+	 */
+
 	ldx	r12,r11,r0	/* Fetch system call handler [ptr] */
 	mtctr   r12
 	bctrl			/* Call handler */

From a377514519b9a20fa1ea9adddbb4129573129cef Mon Sep 17 00:00:00 2001
From: Michal Suchanek <msuchanek@suse.de>
Date: Mon, 28 May 2018 15:19:14 +0200
Subject: [PATCH 217/251] powerpc/64s: Enhance the information in
 cpu_show_spectre_v1()

We now have barrier_nospec as mitigation so print it in
cpu_show_spectre_v1() when enabled.

Signed-off-by: Michal Suchanek <msuchanek@suse.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/security.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 06d5195f67296..3eb9c45f28d72 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -120,6 +120,9 @@ ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, c
 	if (!security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR))
 		return sprintf(buf, "Not affected\n");
 
+	if (barrier_nospec_enabled)
+		return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+
 	return sprintf(buf, "Vulnerable\n");
 }
 

From 9c2d72d497a32788bf90f05610319a217258129a Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Tue, 29 May 2018 09:22:38 +1000
Subject: [PATCH 218/251] selftests/powerpc: Add perf breakpoint test

This tests perf hardware breakpoints (ie PERF_TYPE_BREAKPOINT) on
powerpc.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 .../selftests/powerpc/ptrace/.gitignore       |   1 +
 .../testing/selftests/powerpc/ptrace/Makefile |   3 +-
 .../selftests/powerpc/ptrace/perf-hwbreak.c   | 195 ++++++++++++++++++
 3 files changed, 198 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c

diff --git a/tools/testing/selftests/powerpc/ptrace/.gitignore b/tools/testing/selftests/powerpc/ptrace/.gitignore
index 9dcc16ea8179c..07ec449a27675 100644
--- a/tools/testing/selftests/powerpc/ptrace/.gitignore
+++ b/tools/testing/selftests/powerpc/ptrace/.gitignore
@@ -9,3 +9,4 @@ ptrace-tm-vsx
 ptrace-tm-spd-vsx
 ptrace-tm-spr
 ptrace-hwbreak
+perf-hwbreak
diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile
index 4f59575389087..28f5b781a553f 100644
--- a/tools/testing/selftests/powerpc/ptrace/Makefile
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -1,7 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
               ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \
-              ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey
+              ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \
+              perf-hwbreak
 
 include ../../lib.mk
 
diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
new file mode 100644
index 0000000000000..60df0b5e628ae
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
@@ -0,0 +1,195 @@
+/*
+ * perf events self profiling example test case for hw breakpoints.
+ *
+ * This tests perf PERF_TYPE_BREAKPOINT parameters
+ * 1) tests all variants of the break on read/write flags
+ * 2) tests exclude_user == 0 and 1
+ * 3) test array matches (if DAWR is supported))
+ * 4) test different numbers of breakpoints matches
+ *
+ * Configure this breakpoint, then read and write the data a number of
+ * times. Then check the output count from perf is as expected.
+ *
+ * Based on:
+ *   http://ozlabs.org/~anton/junkcode/perf_events_example1.c
+ *
+ * Copyright (C) 2018 Michael Neuling, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <unistd.h>
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <elf.h>
+#include <pthread.h>
+#include <sys/syscall.h>
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
+#include "utils.h"
+
+#define MAX_LOOPS 10000
+
+#define DAWR_LENGTH_MAX ((0x3f + 1) * 8)
+
+static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid,
+				      int cpu, int group_fd,
+				      unsigned long flags)
+{
+	attr->size = sizeof(*attr);
+	return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+static inline bool breakpoint_test(int len)
+{
+	struct perf_event_attr attr;
+	int fd;
+
+	/* setup counters */
+	memset(&attr, 0, sizeof(attr));
+	attr.disabled = 1;
+	attr.type = PERF_TYPE_BREAKPOINT;
+	attr.bp_type = HW_BREAKPOINT_R;
+	/* bp_addr can point anywhere but needs to be aligned */
+	attr.bp_addr = (__u64)(&attr) & 0xfffffffffffff800;
+	attr.bp_len = len;
+	fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+	if (fd < 0)
+		return false;
+	close(fd);
+	return true;
+}
+
+static inline bool perf_breakpoint_supported(void)
+{
+	return breakpoint_test(4);
+}
+
+static inline bool dawr_supported(void)
+{
+	return breakpoint_test(DAWR_LENGTH_MAX);
+}
+
+static int runtestsingle(int readwriteflag, int exclude_user, int arraytest)
+{
+	int i,j;
+	struct perf_event_attr attr;
+	size_t res;
+	unsigned long long breaks, needed;
+	int readint;
+	int readintarraybig[2*DAWR_LENGTH_MAX/sizeof(int)];
+	int *readintalign;
+	volatile int *ptr;
+	int break_fd;
+	int loop_num = MAX_LOOPS - (rand() % 100); /* provide some variability */
+	volatile int *k;
+
+	/* align to 0x400 boundary as required by DAWR */
+	readintalign = (int *)(((unsigned long)readintarraybig + 0x7ff) &
+			       0xfffffffffffff800);
+
+	ptr = &readint;
+	if (arraytest)
+		ptr = &readintalign[0];
+
+	/* setup counters */
+	memset(&attr, 0, sizeof(attr));
+	attr.disabled = 1;
+	attr.type = PERF_TYPE_BREAKPOINT;
+	attr.bp_type = readwriteflag;
+	attr.bp_addr = (__u64)ptr;
+	attr.bp_len = sizeof(int);
+	if (arraytest)
+		attr.bp_len = DAWR_LENGTH_MAX;
+	attr.exclude_user = exclude_user;
+	break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+	if (break_fd < 0) {
+		perror("sys_perf_event_open");
+		exit(1);
+	}
+
+	/* start counters */
+	ioctl(break_fd, PERF_EVENT_IOC_ENABLE);
+
+	/* Test a bunch of reads and writes */
+	k = &readint;
+	for (i = 0; i < loop_num; i++) {
+		if (arraytest)
+			k = &(readintalign[i % (DAWR_LENGTH_MAX/sizeof(int))]);
+
+		j = *k;
+		*k = j;
+	}
+
+	/* stop counters */
+	ioctl(break_fd, PERF_EVENT_IOC_DISABLE);
+
+	/* read and check counters */
+	res = read(break_fd, &breaks, sizeof(unsigned long long));
+	assert(res == sizeof(unsigned long long));
+	/* we read and write each loop, so subtract the ones we are counting */
+	needed = 0;
+	if (readwriteflag & HW_BREAKPOINT_R)
+		needed += loop_num;
+	if (readwriteflag & HW_BREAKPOINT_W)
+		needed += loop_num;
+	needed = needed * (1 - exclude_user);
+	printf("TESTED: addr:0x%lx brks:% 8lld loops:% 8i rw:%i !user:%i array:%i\n",
+	       (unsigned long int)ptr, breaks, loop_num, readwriteflag, exclude_user, arraytest);
+	if (breaks != needed) {
+		printf("FAILED: 0x%lx brks:%lld needed:%lli %i %i %i\n\n",
+		       (unsigned long int)ptr, breaks, needed, loop_num, readwriteflag, exclude_user);
+		return 1;
+	}
+	close(break_fd);
+
+	return 0;
+}
+
+static int runtest(void)
+{
+	int rwflag;
+	int exclude_user;
+	int ret;
+
+	/*
+	 * perf defines rwflag as two bits read and write and at least
+	 * one must be set.  So range 1-3.
+	 */
+	for (rwflag = 1 ; rwflag < 4; rwflag++) {
+		for (exclude_user = 0 ; exclude_user < 2; exclude_user++) {
+			ret = runtestsingle(rwflag, exclude_user, 0);
+			if (ret)
+				return ret;
+
+			/* if we have the dawr, we can do an array test */
+			if (!dawr_supported())
+				continue;
+			ret = runtestsingle(rwflag, exclude_user, 1);
+			if (ret)
+				return ret;
+		}
+	}
+	return 0;
+}
+
+
+static int perf_hwbreak(void)
+{
+	srand ( time(NULL) );
+
+	SKIP_IF(!perf_breakpoint_supported());
+
+	return runtest();
+}
+
+int main(int argc, char *argv[], char **envp)
+{
+	return test_harness(perf_hwbreak, "perf_hwbreak");
+}

From e6684d07e4308430b9b6497265781a6fb9fd87a0 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Date: Mon, 21 May 2018 09:51:06 +0530
Subject: [PATCH 219/251] powerpc/sstep: Introduce GETTYPE macro

Replace 'op->type & INSTR_TYPE_MASK' expression with GETTYPE(op->type)
macro.

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/sstep.h | 2 ++
 arch/powerpc/kernel/align.c      | 2 +-
 arch/powerpc/lib/sstep.c         | 6 +++---
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index ab9d849644d05..4547891a684be 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -97,6 +97,8 @@ enum instruction_type {
 #define SIZE(n)		((n) << 12)
 #define GETSIZE(w)	((w) >> 12)
 
+#define GETTYPE(t)	((t) & INSTR_TYPE_MASK)
+
 #define MKOP(t, f, s)	((t) | (f) | SIZE(s))
 
 struct instruction_op {
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 3e6c0744c174a..11550a3d1ac2e 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -339,7 +339,7 @@ int fix_alignment(struct pt_regs *regs)
 	if (r < 0)
 		return -EINVAL;
 
-	type = op.type & INSTR_TYPE_MASK;
+	type = GETTYPE(op.type);
 	if (!OP_IS_LOAD_STORE(type)) {
 		if (op.type != CACHEOP + DCBZ)
 			return -EINVAL;
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 49427a3ee1045..f18d70449255b 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -2642,7 +2642,7 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op)
 	unsigned long next_pc;
 
 	next_pc = truncate_if_32bit(regs->msr, regs->nip + 4);
-	switch (op->type & INSTR_TYPE_MASK) {
+	switch (GETTYPE(op->type)) {
 	case COMPUTE:
 		if (op->type & SETREG)
 			regs->gpr[op->reg] = op->val;
@@ -2740,7 +2740,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op)
 
 	err = 0;
 	size = GETSIZE(op->type);
-	type = op->type & INSTR_TYPE_MASK;
+	type = GETTYPE(op->type);
 	cross_endian = (regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE);
 	ea = truncate_if_32bit(regs->msr, op->ea);
 
@@ -3002,7 +3002,7 @@ int emulate_step(struct pt_regs *regs, unsigned int instr)
 	}
 
 	err = 0;
-	type = op.type & INSTR_TYPE_MASK;
+	type = GETTYPE(op.type);
 
 	if (OP_IS_LOAD_STORE(type)) {
 		err = emulate_loadstore(regs, &op);

From 83afab4ce05660c728f4a5d45f5970d9cc9ee6d2 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Date: Mon, 21 May 2018 09:51:07 +0530
Subject: [PATCH 220/251] powerpc/sstep: Fix kernel crash if VSX is not present

emulate_step() is not checking runtime VSX feature flag before
emulating an instruction. This is causing kernel crash when kernel
is compiled with CONFIG_VSX=y but running on a machine where VSX
is not supported or disabled. Ex, while running emulate_step tests
on P6 machine:

  Oops: Exception in kernel mode, sig: 4 [#1]
  NIP [c000000000095c24] .load_vsrn+0x28/0x54
  LR [c000000000094bdc] .emulate_loadstore+0x167c/0x17b0
  Call Trace:
    0x40fe240c7ae147ae (unreliable)
    .emulate_loadstore+0x167c/0x17b0
    .emulate_step+0x25c/0x5bc
    .test_lxvd2x_stxvd2x+0x64/0x154
    .test_emulate_step+0x38/0x4c
    .do_one_initcall+0x5c/0x2c0
    .kernel_init_freeable+0x314/0x4cc
    .kernel_init+0x24/0x160
    .ret_from_kernel_thread+0x58/0xb4

With fix:
  emulate_step_test: lxvd2x         : FAIL
  emulate_step_test: stxvd2x        : FAIL

Reported-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/lib/sstep.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index f18d70449255b..d81568f783e5c 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -2545,6 +2545,15 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
 #endif /* __powerpc64__ */
 
 	}
+
+#ifdef CONFIG_VSX
+	if ((GETTYPE(op->type) == LOAD_VSX ||
+	     GETTYPE(op->type) == STORE_VSX) &&
+	    !cpu_has_feature(CPU_FTR_VSX)) {
+		return -1;
+	}
+#endif /* CONFIG_VSX */
+
 	return 0;
 
  logical_done:

From 5a61640e322693028a36452f637681d388e4c241 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Date: Mon, 21 May 2018 09:51:08 +0530
Subject: [PATCH 221/251] powerpc/sstep: Fix emulate_step test if VSX not
 present

emulate_step() tests are failing if VSX is not supported or disabled.

  emulate_step_test: lxvd2x         : FAIL
  emulate_step_test: stxvd2x        : FAIL

If !CPU_FTR_VSX, emulate_step() failure is expected and testcase should
PASS with a valid justification. After patch:

  emulate_step_test: lxvd2x         : PASS (!CPU_FTR_VSX)
  emulate_step_test: stxvd2x        : PASS (!CPU_FTR_VSX)

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/lib/test_emulate_step.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c
index 2534c14475546..6c47daa616149 100644
--- a/arch/powerpc/lib/test_emulate_step.c
+++ b/arch/powerpc/lib/test_emulate_step.c
@@ -387,10 +387,14 @@ static void __init test_lxvd2x_stxvd2x(void)
 	/* lxvd2x vsr39, r3, r4 */
 	stepped = emulate_step(&regs, TEST_LXVD2X(39, 3, 4));
 
-	if (stepped == 1)
+	if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) {
 		show_result("lxvd2x", "PASS");
-	else
-		show_result("lxvd2x", "FAIL");
+	} else {
+		if (!cpu_has_feature(CPU_FTR_VSX))
+			show_result("lxvd2x", "PASS (!CPU_FTR_VSX)");
+		else
+			show_result("lxvd2x", "FAIL");
+	}
 
 
 	/*** stxvd2x ***/
@@ -404,10 +408,15 @@ static void __init test_lxvd2x_stxvd2x(void)
 	stepped = emulate_step(&regs, TEST_STXVD2X(39, 3, 4));
 
 	if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] &&
-	    cached_b[2] == c.b[2] && cached_b[3] == c.b[3])
+	    cached_b[2] == c.b[2] && cached_b[3] == c.b[3] &&
+	    cpu_has_feature(CPU_FTR_VSX)) {
 		show_result("stxvd2x", "PASS");
-	else
-		show_result("stxvd2x", "FAIL");
+	} else {
+		if (!cpu_has_feature(CPU_FTR_VSX))
+			show_result("stxvd2x", "PASS (!CPU_FTR_VSX)");
+		else
+			show_result("stxvd2x", "FAIL");
+	}
 }
 #else
 static void __init test_lxvd2x_stxvd2x(void)

From 9887334b804892f10262fa7f805998d554e04367 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Wed, 28 Feb 2018 19:21:45 +0100
Subject: [PATCH 222/251] powerpc/dma: remove unnecessary BUG()

Direction is already checked in all calling functions in
include/linux/dma-mapping.h and also in called function __dma_sync()

So really no need to check it once more here.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/dma.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index da20569de9d43..e1cd6e979348c 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -235,8 +235,6 @@ static inline dma_addr_t dma_nommu_map_page(struct device *dev,
 					     enum dma_data_direction dir,
 					     unsigned long attrs)
 {
-	BUG_ON(dir == DMA_NONE);
-
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
 		__dma_sync_page(page, offset, size, dir);
 

From 8820a44738308a8a1644c6c3d04b477624db2d6b Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Wed, 21 Mar 2018 15:07:47 +0100
Subject: [PATCH 223/251] powerpc/mm: constify FIRST_CONTEXT in
 mmu_context_nohash

First context is now 1 for all supported platforms, so it
can be made a constant.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/mmu_context_nohash.c | 37 ++++++++++++++--------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index be8f5c9d4d088..5051c9363f8cb 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -54,7 +54,9 @@
 
 #include "mmu_decl.h"
 
-static unsigned int first_context, last_context;
+#define FIRST_CONTEXT 1
+
+static unsigned int last_context;
 static unsigned int next_context, nr_free_contexts;
 static unsigned long *context_map;
 static unsigned long *stale_map[NR_CPUS];
@@ -87,7 +89,7 @@ static unsigned int steal_context_smp(unsigned int id)
 	struct mm_struct *mm;
 	unsigned int cpu, max, i;
 
-	max = last_context - first_context;
+	max = last_context - FIRST_CONTEXT;
 
 	/* Attempt to free next_context first and then loop until we manage */
 	while (max--) {
@@ -100,7 +102,7 @@ static unsigned int steal_context_smp(unsigned int id)
 		if (mm->context.active) {
 			id++;
 			if (id > last_context)
-				id = first_context;
+				id = FIRST_CONTEXT;
 			continue;
 		}
 		pr_hardcont(" | steal %d from 0x%p", id, mm);
@@ -142,7 +144,7 @@ static unsigned int steal_all_contexts(void)
 	int cpu = smp_processor_id();
 	unsigned int id;
 
-	for (id = first_context; id <= last_context; id++) {
+	for (id = FIRST_CONTEXT; id <= last_context; id++) {
 		/* Pick up the victim mm */
 		mm = context_mm[id];
 
@@ -150,7 +152,7 @@ static unsigned int steal_all_contexts(void)
 
 		/* Mark this mm as having no context anymore */
 		mm->context.id = MMU_NO_CONTEXT;
-		if (id != first_context) {
+		if (id != FIRST_CONTEXT) {
 			context_mm[id] = NULL;
 			__clear_bit(id, context_map);
 #ifdef DEBUG_MAP_CONSISTENCY
@@ -163,9 +165,9 @@ static unsigned int steal_all_contexts(void)
 	/* Flush the TLB for all contexts (not to be used on SMP) */
 	_tlbil_all();
 
-	nr_free_contexts = last_context - first_context;
+	nr_free_contexts = last_context - FIRST_CONTEXT;
 
-	return first_context;
+	return FIRST_CONTEXT;
 }
 
 /* Note that this will also be called on SMP if all other CPUs are
@@ -201,7 +203,7 @@ static void context_check_map(void)
 	unsigned int id, nrf, nact;
 
 	nrf = nact = 0;
-	for (id = first_context; id <= last_context; id++) {
+	for (id = FIRST_CONTEXT; id <= last_context; id++) {
 		int used = test_bit(id, context_map);
 		if (!used)
 			nrf++;
@@ -219,7 +221,7 @@ static void context_check_map(void)
 	if (nact > num_online_cpus())
 		pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n",
 		       nact, num_online_cpus());
-	if (first_context > 0 && !test_bit(0, context_map))
+	if (FIRST_CONTEXT > 0 && !test_bit(0, context_map))
 		pr_err("MMU: Context 0 has been freed !!!\n");
 }
 #else
@@ -264,7 +266,7 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
 	/* We really don't have a context, let's try to acquire one */
 	id = next_context;
 	if (id > last_context)
-		id = first_context;
+		id = FIRST_CONTEXT;
 	map = context_map;
 
 	/* No more free contexts, let's try to steal one */
@@ -289,7 +291,7 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
 	while (__test_and_set_bit(id, map)) {
 		id = find_next_zero_bit(map, last_context+1, id);
 		if (id > last_context)
-			id = first_context;
+			id = FIRST_CONTEXT;
 	}
  stolen:
 	next_context = id + 1;
@@ -439,15 +441,12 @@ void __init mmu_context_init(void)
 	 *      -- BenH
 	 */
 	if (mmu_has_feature(MMU_FTR_TYPE_8xx)) {
-		first_context = 1;
 		last_context = 16;
 		no_selective_tlbil = true;
 	} else if (mmu_has_feature(MMU_FTR_TYPE_47x)) {
-		first_context = 1;
 		last_context = 65535;
 		no_selective_tlbil = false;
 	} else {
-		first_context = 1;
 		last_context = 255;
 		no_selective_tlbil = false;
 	}
@@ -473,16 +472,16 @@ void __init mmu_context_init(void)
 	printk(KERN_INFO
 	       "MMU: Allocated %zu bytes of context maps for %d contexts\n",
 	       2 * CTX_MAP_SIZE + (sizeof(void *) * (last_context + 1)),
-	       last_context - first_context + 1);
+	       last_context - FIRST_CONTEXT + 1);
 
 	/*
 	 * Some processors have too few contexts to reserve one for
 	 * init_mm, and require using context 0 for a normal task.
 	 * Other processors reserve the use of context zero for the kernel.
-	 * This code assumes first_context < 32.
+	 * This code assumes FIRST_CONTEXT < 32.
 	 */
-	context_map[0] = (1 << first_context) - 1;
-	next_context = first_context;
-	nr_free_contexts = last_context - first_context + 1;
+	context_map[0] = (1 << FIRST_CONTEXT) - 1;
+	next_context = FIRST_CONTEXT;
+	nr_free_contexts = last_context - FIRST_CONTEXT + 1;
 }
 

From 48bdcace599b8e6241735a40fdb418a8471db089 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Wed, 21 Mar 2018 15:07:49 +0100
Subject: [PATCH 224/251] powerpc/mm: Avoid unnecessary test and reduce code
 size

no_selective_tlbil hence the use of either steal_all_contexts()
or steal_context_up() depends on the subarch, it won't change
during run. Only the 8xx uses steal_all_contexts and CONFIG_PPC_8xx
is exclusive of other processors.

This patch replaces the test of no_selective_tlbil global var by
a test of CONFIG_PPC_8xx selection. It avoids the test and
removes unnecessary code.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/mmu_context_nohash.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 5051c9363f8cb..3fbe36266838b 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -62,7 +62,6 @@ static unsigned long *context_map;
 static unsigned long *stale_map[NR_CPUS];
 static struct mm_struct **context_mm;
 static DEFINE_RAW_SPINLOCK(context_lock);
-static bool no_selective_tlbil;
 
 #define CTX_MAP_SIZE	\
 	(sizeof(unsigned long) * (last_context / BITS_PER_LONG + 1))
@@ -279,7 +278,7 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
 			goto stolen;
 		}
 #endif /* CONFIG_SMP */
-		if (no_selective_tlbil)
+		if (IS_ENABLED(CONFIG_PPC_8xx))
 			id = steal_all_contexts();
 		else
 			id = steal_context_up(id);
@@ -440,16 +439,12 @@ void __init mmu_context_init(void)
 	 * present if needed.
 	 *      -- BenH
 	 */
-	if (mmu_has_feature(MMU_FTR_TYPE_8xx)) {
+	if (mmu_has_feature(MMU_FTR_TYPE_8xx))
 		last_context = 16;
-		no_selective_tlbil = true;
-	} else if (mmu_has_feature(MMU_FTR_TYPE_47x)) {
+	else if (mmu_has_feature(MMU_FTR_TYPE_47x))
 		last_context = 65535;
-		no_selective_tlbil = false;
-	} else {
+	else
 		last_context = 255;
-		no_selective_tlbil = false;
-	}
 
 #ifdef DEBUG_CLAMP_LAST_CONTEXT
 	last_context = DEBUG_CLAMP_LAST_CONTEXT;

From 9e4f02e24e106449a5efc964b74a195edb230a57 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Wed, 21 Mar 2018 15:07:51 +0100
Subject: [PATCH 225/251] powerpc/mm: constify LAST_CONTEXT in
 mmu_context_nohash

last_context is 16 on the 8xx, 65535 on the 47x and 255 on other ones.

The kernel is exclusively built for the 8xx, for the 47x or for
another processor so the last context can be defined as a constant
depending on the processor.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
[mpe: Reformat old comment]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/mmu_context_nohash.c | 84 +++++++++++++---------------
 1 file changed, 39 insertions(+), 45 deletions(-)

diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 3fbe36266838b..3cc59697d2533 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -54,9 +54,34 @@
 
 #include "mmu_decl.h"
 
+/*
+ * The MPC8xx has only 16 contexts. We rotate through them on each task switch.
+ * A better way would be to keep track of tasks that own contexts, and implement
+ * an LRU usage. That way very active tasks don't always have to pay the TLB
+ * reload overhead. The kernel pages are mapped shared, so the kernel can run on
+ * behalf of any task that makes a kernel entry. Shared does not mean they are
+ * not protected, just that the ASID comparison is not performed. -- Dan
+ *
+ * The IBM4xx has 256 contexts, so we can just rotate through these as a way of
+ * "switching" contexts. If the TID of the TLB is zero, the PID/TID comparison
+ * is disabled, so we can use a TID of zero to represent all kernel pages as
+ * shared among all contexts. -- Dan
+ *
+ * The IBM 47x core supports 16-bit PIDs, thus 65535 contexts. We should
+ * normally never have to steal though the facility is present if needed.
+ * -- BenH
+ */
 #define FIRST_CONTEXT 1
+#ifdef DEBUG_CLAMP_LAST_CONTEXT
+#define LAST_CONTEXT DEBUG_CLAMP_LAST_CONTEXT
+#elif defined(CONFIG_PPC_8xx)
+#define LAST_CONTEXT 16
+#elif defined(CONFIG_PPC_47x)
+#define LAST_CONTEXT 65535
+#else
+#define LAST_CONTEXT 255
+#endif
 
-static unsigned int last_context;
 static unsigned int next_context, nr_free_contexts;
 static unsigned long *context_map;
 static unsigned long *stale_map[NR_CPUS];
@@ -64,7 +89,7 @@ static struct mm_struct **context_mm;
 static DEFINE_RAW_SPINLOCK(context_lock);
 
 #define CTX_MAP_SIZE	\
-	(sizeof(unsigned long) * (last_context / BITS_PER_LONG + 1))
+	(sizeof(unsigned long) * (LAST_CONTEXT / BITS_PER_LONG + 1))
 
 
 /* Steal a context from a task that has one at the moment.
@@ -88,7 +113,7 @@ static unsigned int steal_context_smp(unsigned int id)
 	struct mm_struct *mm;
 	unsigned int cpu, max, i;
 
-	max = last_context - FIRST_CONTEXT;
+	max = LAST_CONTEXT - FIRST_CONTEXT;
 
 	/* Attempt to free next_context first and then loop until we manage */
 	while (max--) {
@@ -100,7 +125,7 @@ static unsigned int steal_context_smp(unsigned int id)
 		 */
 		if (mm->context.active) {
 			id++;
-			if (id > last_context)
+			if (id > LAST_CONTEXT)
 				id = FIRST_CONTEXT;
 			continue;
 		}
@@ -143,7 +168,7 @@ static unsigned int steal_all_contexts(void)
 	int cpu = smp_processor_id();
 	unsigned int id;
 
-	for (id = FIRST_CONTEXT; id <= last_context; id++) {
+	for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
 		/* Pick up the victim mm */
 		mm = context_mm[id];
 
@@ -164,7 +189,7 @@ static unsigned int steal_all_contexts(void)
 	/* Flush the TLB for all contexts (not to be used on SMP) */
 	_tlbil_all();
 
-	nr_free_contexts = last_context - FIRST_CONTEXT;
+	nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT;
 
 	return FIRST_CONTEXT;
 }
@@ -202,7 +227,7 @@ static void context_check_map(void)
 	unsigned int id, nrf, nact;
 
 	nrf = nact = 0;
-	for (id = FIRST_CONTEXT; id <= last_context; id++) {
+	for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
 		int used = test_bit(id, context_map);
 		if (!used)
 			nrf++;
@@ -264,7 +289,7 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
 
 	/* We really don't have a context, let's try to acquire one */
 	id = next_context;
-	if (id > last_context)
+	if (id > LAST_CONTEXT)
 		id = FIRST_CONTEXT;
 	map = context_map;
 
@@ -288,8 +313,8 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
 
 	/* We know there's at least one free context, try to find it */
 	while (__test_and_set_bit(id, map)) {
-		id = find_next_zero_bit(map, last_context+1, id);
-		if (id > last_context)
+		id = find_next_zero_bit(map, LAST_CONTEXT+1, id);
+		if (id > LAST_CONTEXT)
 			id = FIRST_CONTEXT;
 	}
  stolen:
@@ -418,42 +443,11 @@ void __init mmu_context_init(void)
 	 */
 	init_mm.context.active = NR_CPUS;
 
-	/*
-	 *   The MPC8xx has only 16 contexts.  We rotate through them on each
-	 * task switch.  A better way would be to keep track of tasks that
-	 * own contexts, and implement an LRU usage.  That way very active
-	 * tasks don't always have to pay the TLB reload overhead.  The
-	 * kernel pages are mapped shared, so the kernel can run on behalf
-	 * of any task that makes a kernel entry.  Shared does not mean they
-	 * are not protected, just that the ASID comparison is not performed.
-	 *      -- Dan
-	 *
-	 * The IBM4xx has 256 contexts, so we can just rotate through these
-	 * as a way of "switching" contexts.  If the TID of the TLB is zero,
-	 * the PID/TID comparison is disabled, so we can use a TID of zero
-	 * to represent all kernel pages as shared among all contexts.
-	 * 	-- Dan
-	 *
-	 * The IBM 47x core supports 16-bit PIDs, thus 65535 contexts. We
-	 * should normally never have to steal though the facility is
-	 * present if needed.
-	 *      -- BenH
-	 */
-	if (mmu_has_feature(MMU_FTR_TYPE_8xx))
-		last_context = 16;
-	else if (mmu_has_feature(MMU_FTR_TYPE_47x))
-		last_context = 65535;
-	else
-		last_context = 255;
-
-#ifdef DEBUG_CLAMP_LAST_CONTEXT
-	last_context = DEBUG_CLAMP_LAST_CONTEXT;
-#endif
 	/*
 	 * Allocate the maps used by context management
 	 */
 	context_map = memblock_virt_alloc(CTX_MAP_SIZE, 0);
-	context_mm = memblock_virt_alloc(sizeof(void *) * (last_context + 1), 0);
+	context_mm = memblock_virt_alloc(sizeof(void *) * (LAST_CONTEXT + 1), 0);
 #ifndef CONFIG_SMP
 	stale_map[0] = memblock_virt_alloc(CTX_MAP_SIZE, 0);
 #else
@@ -466,8 +460,8 @@ void __init mmu_context_init(void)
 
 	printk(KERN_INFO
 	       "MMU: Allocated %zu bytes of context maps for %d contexts\n",
-	       2 * CTX_MAP_SIZE + (sizeof(void *) * (last_context + 1)),
-	       last_context - FIRST_CONTEXT + 1);
+	       2 * CTX_MAP_SIZE + (sizeof(void *) * (LAST_CONTEXT + 1)),
+	       LAST_CONTEXT - FIRST_CONTEXT + 1);
 
 	/*
 	 * Some processors have too few contexts to reserve one for
@@ -477,6 +471,6 @@ void __init mmu_context_init(void)
 	 */
 	context_map[0] = (1 << FIRST_CONTEXT) - 1;
 	next_context = FIRST_CONTEXT;
-	nr_free_contexts = last_context - FIRST_CONTEXT + 1;
+	nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1;
 }
 

From c865c955878eb56d4f37d7ab82438b68fbac4201 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Wed, 21 Mar 2018 15:07:53 +0100
Subject: [PATCH 226/251] powerpc/mm: Remove stale_map[] handling on non SMP
 processors

stale_map[] bits are only set in steal_context_smp() so
on UP processors this map is useless. Only manage it for SMP
processors.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/mmu_context_nohash.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 3cc59697d2533..4d80239ef83c3 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -84,7 +84,9 @@
 
 static unsigned int next_context, nr_free_contexts;
 static unsigned long *context_map;
+#ifdef CONFIG_SMP
 static unsigned long *stale_map[NR_CPUS];
+#endif
 static struct mm_struct **context_mm;
 static DEFINE_RAW_SPINLOCK(context_lock);
 
@@ -165,7 +167,9 @@ static unsigned int steal_context_smp(unsigned int id)
 static unsigned int steal_all_contexts(void)
 {
 	struct mm_struct *mm;
+#ifdef CONFIG_SMP
 	int cpu = smp_processor_id();
+#endif
 	unsigned int id;
 
 	for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
@@ -183,7 +187,9 @@ static unsigned int steal_all_contexts(void)
 			mm->context.active = 0;
 #endif
 		}
+#ifdef CONFIG_SMP
 		__clear_bit(id, stale_map[cpu]);
+#endif
 	}
 
 	/* Flush the TLB for all contexts (not to be used on SMP) */
@@ -202,7 +208,9 @@ static unsigned int steal_all_contexts(void)
 static unsigned int steal_context_up(unsigned int id)
 {
 	struct mm_struct *mm;
+#ifdef CONFIG_SMP
 	int cpu = smp_processor_id();
+#endif
 
 	/* Pick up the victim mm */
 	mm = context_mm[id];
@@ -216,7 +224,9 @@ static unsigned int steal_context_up(unsigned int id)
 	mm->context.id = MMU_NO_CONTEXT;
 
 	/* XXX This clear should ultimately be part of local_flush_tlb_mm */
+#ifdef CONFIG_SMP
 	__clear_bit(id, stale_map[cpu]);
+#endif
 
 	return id;
 }
@@ -255,7 +265,10 @@ static void context_check_map(void) { }
 void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
 			struct task_struct *tsk)
 {
-	unsigned int i, id, cpu = smp_processor_id();
+	unsigned int id;
+#ifdef CONFIG_SMP
+	unsigned int i, cpu = smp_processor_id();
+#endif
 	unsigned long *map;
 
 	/* No lockless fast path .. yet */
@@ -329,6 +342,7 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
 	/* If that context got marked stale on this CPU, then flush the
 	 * local TLB for it and unmark it before we use it
 	 */
+#ifdef CONFIG_SMP
 	if (test_bit(id, stale_map[cpu])) {
 		pr_hardcont(" | stale flush %d [%d..%d]",
 			    id, cpu_first_thread_sibling(cpu),
@@ -343,6 +357,7 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
 				__clear_bit(id, stale_map[i]);
 		}
 	}
+#endif
 
 	/* Flick the MMU and release lock */
 	pr_hardcont(" -> %d\n", id);
@@ -448,9 +463,7 @@ void __init mmu_context_init(void)
 	 */
 	context_map = memblock_virt_alloc(CTX_MAP_SIZE, 0);
 	context_mm = memblock_virt_alloc(sizeof(void *) * (LAST_CONTEXT + 1), 0);
-#ifndef CONFIG_SMP
-	stale_map[0] = memblock_virt_alloc(CTX_MAP_SIZE, 0);
-#else
+#ifdef CONFIG_SMP
 	stale_map[boot_cpuid] = memblock_virt_alloc(CTX_MAP_SIZE, 0);
 
 	cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,

From 55a0edf083022e402042255a0afb03d0b3a63a9b Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Tue, 10 Apr 2018 08:34:35 +0200
Subject: [PATCH 227/251] powerpc/64: optimises from64to32()

The current implementation of from64to32() gives a poor result:

0000000000000270 <.from64to32>:
 270:	38 00 ff ff 	li      r0,-1
 274:	78 69 00 22 	rldicl  r9,r3,32,32
 278:	78 00 00 20 	clrldi  r0,r0,32
 27c:	7c 60 00 38 	and     r0,r3,r0
 280:	7c 09 02 14 	add     r0,r9,r0
 284:	78 09 00 22 	rldicl  r9,r0,32,32
 288:	7c 00 4a 14 	add     r0,r0,r9
 28c:	78 03 00 20 	clrldi  r3,r0,32
 290:	4e 80 00 20 	blr

This patch modifies from64to32() to operate in the same
spirit as csum_fold()

It swaps the two 32-bit halves of sum then it adds it with the
unswapped sum. If there is a carry from adding the two 32-bit halves,
it will carry from the lower half into the upper half, giving us the
correct sum in the upper half.

The resulting code is:

0000000000000260 <.from64to32>:
 260:	78 60 00 02 	rotldi  r0,r3,32
 264:	7c 60 1a 14 	add     r3,r0,r3
 268:	78 63 00 22 	rldicl  r3,r3,32,32
 26c:	4e 80 00 20 	blr

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/checksum.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index 4e63787dc3bec..54065caa40b31 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -12,6 +12,7 @@
 #ifdef CONFIG_GENERIC_CSUM
 #include <asm-generic/checksum.h>
 #else
+#include <linux/bitops.h>
 /*
  * Computes the checksum of a memory block at src, length len,
  * and adds in "sum" (32-bit), while copying the block to dst.
@@ -55,11 +56,7 @@ static inline __sum16 csum_fold(__wsum sum)
 
 static inline u32 from64to32(u64 x)
 {
-	/* add up 32-bit and 32-bit for 32+c bit */
-	x = (x & 0xffffffff) + (x >> 32);
-	/* add up carry.. */
-	x = (x & 0xffffffff) + (x >> 32);
-	return (u32)x;
+	return (x + ror64(x, 32)) >> 32;
 }
 
 static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,

From 0cc377d16e565b90b43b7550cdf5b3abd7942a75 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Tue, 17 Apr 2018 13:23:10 +0200
Subject: [PATCH 228/251] powerpc/misc: merge reloc_offset() and
 add_reloc_offset()

reloc_offset() is the same as add_reloc_offset(0)

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/misc.S | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 2f18fc1368d0a..0b196cdcd15d4 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -25,23 +25,12 @@
 /*
  * Returns (address we are running at) - (address we were linked at)
  * for use before the text and data are mapped to KERNELBASE.
- */
-
-_GLOBAL(reloc_offset)
-	mflr	r0
-	bl	1f
-1:	mflr	r3
-	PPC_LL	r4,(2f-1b)(r3)
-	subf	r3,r4,r3
-	mtlr	r0
-	blr
 
-	.align	3
-2:	PPC_LONG 1b
-
-/*
  * add_reloc_offset(x) returns x + reloc_offset().
  */
+
+_GLOBAL(reloc_offset)
+	li	r3, 0
 _GLOBAL(add_reloc_offset)
 	mflr	r0
 	bl	1f

From 169f438a7e369226a452e0ac4a54dbdf85e31943 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Tue, 17 Apr 2018 14:36:45 +0200
Subject: [PATCH 229/251] powerpc/boot: remove unused variable in mpc8xx

Variable div is set but never used. Remove it.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/boot/mpc8xx.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/powerpc/boot/mpc8xx.c b/arch/powerpc/boot/mpc8xx.c
index add55a7f184fa..c9bd9285c5489 100644
--- a/arch/powerpc/boot/mpc8xx.c
+++ b/arch/powerpc/boot/mpc8xx.c
@@ -24,7 +24,7 @@ u32 mpc885_get_clock(u32 crystal)
 {
 	u32 *immr;
 	u32 plprcr;
-	int mfi, mfn, mfd, pdf, div;
+	int mfi, mfn, mfd, pdf;
 	u32 ret;
 
 	immr = fsl_get_immr();
@@ -43,7 +43,6 @@ u32 mpc885_get_clock(u32 crystal)
 	}
 
 	pdf = (plprcr >> 1) & 0xf;
-	div = (plprcr >> 20) & 3;
 	mfd = (plprcr >> 22) & 0x1f;
 	mfn = (plprcr >> 27) & 0x1f;
 

From d04f11d2713e736a3740361f8b5bb42c7147c5a2 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Tue, 17 Apr 2018 14:47:35 +0200
Subject: [PATCH 230/251] powerpc/8xx: Remove RTC clock on 88x

The 885 familly processors don't have the Real Time Clock

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/8xx/adder875.c        | 2 --
 arch/powerpc/platforms/8xx/ep88xc.c          | 2 --
 arch/powerpc/platforms/8xx/mpc885ads_setup.c | 2 --
 3 files changed, 6 deletions(-)

diff --git a/arch/powerpc/platforms/8xx/adder875.c b/arch/powerpc/platforms/8xx/adder875.c
index 333dece793940..bcef9f66191e7 100644
--- a/arch/powerpc/platforms/8xx/adder875.c
+++ b/arch/powerpc/platforms/8xx/adder875.c
@@ -111,7 +111,5 @@ define_machine(adder875) {
 	.get_irq = mpc8xx_get_irq,
 	.restart = mpc8xx_restart,
 	.calibrate_decr = generic_calibrate_decr,
-	.set_rtc_time = mpc8xx_set_rtc_time,
-	.get_rtc_time = mpc8xx_get_rtc_time,
 	.progress = udbg_progress,
 };
diff --git a/arch/powerpc/platforms/8xx/ep88xc.c b/arch/powerpc/platforms/8xx/ep88xc.c
index cd0d90f1fb1cf..ebcf34a147894 100644
--- a/arch/powerpc/platforms/8xx/ep88xc.c
+++ b/arch/powerpc/platforms/8xx/ep88xc.c
@@ -170,7 +170,5 @@ define_machine(ep88xc) {
 	.get_irq	= mpc8xx_get_irq,
 	.restart = mpc8xx_restart,
 	.calibrate_decr = mpc8xx_calibrate_decr,
-	.set_rtc_time = mpc8xx_set_rtc_time,
-	.get_rtc_time = mpc8xx_get_rtc_time,
 	.progress = udbg_progress,
 };
diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
index e821a42d5816b..a0c83c1905c67 100644
--- a/arch/powerpc/platforms/8xx/mpc885ads_setup.c
+++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
@@ -220,7 +220,5 @@ define_machine(mpc885_ads) {
 	.get_irq		= mpc8xx_get_irq,
 	.restart		= mpc8xx_restart,
 	.calibrate_decr		= mpc8xx_calibrate_decr,
-	.set_rtc_time		= mpc8xx_set_rtc_time,
-	.get_rtc_time		= mpc8xx_get_rtc_time,
 	.progress		= udbg_progress,
 };

From 56b04d568f880a48d892e840cfaf4efc0f0ce39b Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Tue, 24 Apr 2018 18:04:25 +0200
Subject: [PATCH 231/251] powerpc/signal32: Use fault_in_pages_readable() to
 prefault user context

Use fault_in_pages_readable() to prefault user context
instead of open coding

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/signal_32.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 342ac78f620f5..5eedbb282d42f 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -25,6 +25,7 @@
 #include <linux/errno.h>
 #include <linux/elf.h>
 #include <linux/ptrace.h>
+#include <linux/pagemap.h>
 #include <linux/ratelimit.h>
 #include <linux/syscalls.h>
 #ifdef CONFIG_PPC64
@@ -1049,7 +1050,6 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 #endif
 {
 	struct pt_regs *regs = current_pt_regs();
-	unsigned char tmp __maybe_unused;
 	int ctx_has_vsx_region = 0;
 
 #ifdef CONFIG_PPC64
@@ -1113,9 +1113,8 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 	}
 	if (new_ctx == NULL)
 		return 0;
-	if (!access_ok(VERIFY_READ, new_ctx, ctx_size)
-	    || __get_user(tmp, (u8 __user *) new_ctx)
-	    || __get_user(tmp, (u8 __user *) new_ctx + ctx_size - 1))
+	if (!access_ok(VERIFY_READ, new_ctx, ctx_size) ||
+	    fault_in_pages_readable((u8 __user *)new_ctx, ctx_size))
 		return -EFAULT;
 
 	/*
@@ -1242,7 +1241,6 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx,
 	struct pt_regs *regs = current_pt_regs();
 	struct sig_dbg_op op;
 	int i;
-	unsigned char tmp __maybe_unused;
 	unsigned long new_msr = regs->msr;
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 	unsigned long new_dbcr0 = current->thread.debug.dbcr0;
@@ -1298,9 +1296,8 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx,
 	current->thread.debug.dbcr0 = new_dbcr0;
 #endif
 
-	if (!access_ok(VERIFY_READ, ctx, sizeof(*ctx))
-	    || __get_user(tmp, (u8 __user *) ctx)
-	    || __get_user(tmp, (u8 __user *) (ctx + 1) - 1))
+	if (!access_ok(VERIFY_READ, ctx, sizeof(*ctx)) ||
+	    fault_in_pages_readable((u8 __user *)ctx, sizeof(*ctx)))
 		return -EFAULT;
 
 	/*

From 1128bb7813a896bd608fb622eee3c26aaf33b473 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Fri, 18 May 2018 15:01:16 +0200
Subject: [PATCH 232/251] powerpc/lib: Adjust .balign inside string functions
 for PPC32

commit 87a156fb18fe1 ("Align hot loops of some string functions")
degraded the performance of string functions by adding useless
nops

A simple benchmark on an 8xx calling 100000x a memchr() that
matches the first byte runs in 41668 TB ticks before this patch
and in 35986 TB ticks after this patch. So this gives an
improvement of approx 10%

Another benchmark doing the same with a memchr() matching the 128th
byte runs in 1011365 TB ticks before this patch and 1005682 TB ticks
after this patch, so regardless on the number of loops, removing
those useless nops improves the test by 5683 TB ticks.

Fixes: 87a156fb18fe1 ("Align hot loops of some string functions")
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/cache.h | 3 +++
 arch/powerpc/lib/string.S        | 7 ++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index c1d257aa4c2d3..66298461b640f 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -9,11 +9,14 @@
 #if defined(CONFIG_PPC_8xx) || defined(CONFIG_403GCX)
 #define L1_CACHE_SHIFT		4
 #define MAX_COPY_PREFETCH	1
+#define IFETCH_ALIGN_SHIFT	2
 #elif defined(CONFIG_PPC_E500MC)
 #define L1_CACHE_SHIFT		6
 #define MAX_COPY_PREFETCH	4
+#define IFETCH_ALIGN_SHIFT	3
 #elif defined(CONFIG_PPC32)
 #define MAX_COPY_PREFETCH	4
+#define IFETCH_ALIGN_SHIFT	3	/* 603 fetches 2 insn at a time */
 #if defined(CONFIG_PPC_47x)
 #define L1_CACHE_SHIFT		7
 #else
diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S
index a787776822d86..0378def28d411 100644
--- a/arch/powerpc/lib/string.S
+++ b/arch/powerpc/lib/string.S
@@ -12,6 +12,7 @@
 #include <asm/errno.h>
 #include <asm/ppc_asm.h>
 #include <asm/export.h>
+#include <asm/cache.h>
 
 	.text
 	
@@ -23,7 +24,7 @@ _GLOBAL(strncpy)
 	mtctr	r5
 	addi	r6,r3,-1
 	addi	r4,r4,-1
-	.balign 16
+	.balign IFETCH_ALIGN_BYTES
 1:	lbzu	r0,1(r4)
 	cmpwi	0,r0,0
 	stbu	r0,1(r6)
@@ -43,7 +44,7 @@ _GLOBAL(strncmp)
 	mtctr	r5
 	addi	r5,r3,-1
 	addi	r4,r4,-1
-	.balign 16
+	.balign IFETCH_ALIGN_BYTES
 1:	lbzu	r3,1(r5)
 	cmpwi	1,r3,0
 	lbzu	r0,1(r4)
@@ -77,7 +78,7 @@ _GLOBAL(memchr)
 	beq-	2f
 	mtctr	r5
 	addi	r3,r3,-1
-	.balign 16
+	.balign IFETCH_ALIGN_BYTES
 1:	lbzu	r0,1(r3)
 	cmpw	0,r0,r4
 	bdnzf	2,1b

From 373e098e1e788d7b89ec0f31765a6c08e2ea0f7c Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Thu, 24 May 2018 11:22:27 +0000
Subject: [PATCH 233/251] powerpc/32: Optimise __csum_partial()

Improve __csum_partial by interleaving loads and adds.

On a 8xx, it brings neither improvement nor degradation.
On a 83xx, it brings a 25% improvement.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/lib/checksum_32.S | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
index 9a671c774b225..422d66e938e37 100644
--- a/arch/powerpc/lib/checksum_32.S
+++ b/arch/powerpc/lib/checksum_32.S
@@ -47,16 +47,25 @@ _GLOBAL(__csum_partial)
 	bdnz	2b
 21:	srwi.	r6,r4,4		/* # blocks of 4 words to do */
 	beq	3f
+	lwz	r0,4(r3)
 	mtctr	r6
-22:	lwz	r0,4(r3)
 	lwz	r6,8(r3)
+	adde	r5,r5,r0
 	lwz	r7,12(r3)
+	adde	r5,r5,r6
 	lwzu	r8,16(r3)
+	adde	r5,r5,r7
+	bdz	23f
+22:	lwz	r0,4(r3)
+	adde	r5,r5,r8
+	lwz	r6,8(r3)
 	adde	r5,r5,r0
+	lwz	r7,12(r3)
 	adde	r5,r5,r6
+	lwzu	r8,16(r3)
 	adde	r5,r5,r7
-	adde	r5,r5,r8
 	bdnz	22b
+23:	adde	r5,r5,r8
 3:	andi.	r0,r4,2
 	beq+	4f
 	lhz	r0,4(r3)

From e9c4943a107b56696e4872cdffdba6b0c7277c77 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Thu, 24 May 2018 11:33:18 +0000
Subject: [PATCH 234/251] powerpc: Implement csum_ipv6_magic in assembly

The generic csum_ipv6_magic() generates a pretty bad result

00000000 <csum_ipv6_magic>: (PPC32)
   0:	81 23 00 00 	lwz     r9,0(r3)
   4:	81 03 00 04 	lwz     r8,4(r3)
   8:	7c e7 4a 14 	add     r7,r7,r9
   c:	7d 29 38 10 	subfc   r9,r9,r7
  10:	7d 4a 51 10 	subfe   r10,r10,r10
  14:	7d 27 42 14 	add     r9,r7,r8
  18:	7d 2a 48 50 	subf    r9,r10,r9
  1c:	80 e3 00 08 	lwz     r7,8(r3)
  20:	7d 08 48 10 	subfc   r8,r8,r9
  24:	7d 4a 51 10 	subfe   r10,r10,r10
  28:	7d 29 3a 14 	add     r9,r9,r7
  2c:	81 03 00 0c 	lwz     r8,12(r3)
  30:	7d 2a 48 50 	subf    r9,r10,r9
  34:	7c e7 48 10 	subfc   r7,r7,r9
  38:	7d 4a 51 10 	subfe   r10,r10,r10
  3c:	7d 29 42 14 	add     r9,r9,r8
  40:	7d 2a 48 50 	subf    r9,r10,r9
  44:	80 e4 00 00 	lwz     r7,0(r4)
  48:	7d 08 48 10 	subfc   r8,r8,r9
  4c:	7d 4a 51 10 	subfe   r10,r10,r10
  50:	7d 29 3a 14 	add     r9,r9,r7
  54:	7d 2a 48 50 	subf    r9,r10,r9
  58:	81 04 00 04 	lwz     r8,4(r4)
  5c:	7c e7 48 10 	subfc   r7,r7,r9
  60:	7d 4a 51 10 	subfe   r10,r10,r10
  64:	7d 29 42 14 	add     r9,r9,r8
  68:	7d 2a 48 50 	subf    r9,r10,r9
  6c:	80 e4 00 08 	lwz     r7,8(r4)
  70:	7d 08 48 10 	subfc   r8,r8,r9
  74:	7d 4a 51 10 	subfe   r10,r10,r10
  78:	7d 29 3a 14 	add     r9,r9,r7
  7c:	7d 2a 48 50 	subf    r9,r10,r9
  80:	81 04 00 0c 	lwz     r8,12(r4)
  84:	7c e7 48 10 	subfc   r7,r7,r9
  88:	7d 4a 51 10 	subfe   r10,r10,r10
  8c:	7d 29 42 14 	add     r9,r9,r8
  90:	7d 2a 48 50 	subf    r9,r10,r9
  94:	7d 08 48 10 	subfc   r8,r8,r9
  98:	7d 4a 51 10 	subfe   r10,r10,r10
  9c:	7d 29 2a 14 	add     r9,r9,r5
  a0:	7d 2a 48 50 	subf    r9,r10,r9
  a4:	7c a5 48 10 	subfc   r5,r5,r9
  a8:	7c 63 19 10 	subfe   r3,r3,r3
  ac:	7d 29 32 14 	add     r9,r9,r6
  b0:	7d 23 48 50 	subf    r9,r3,r9
  b4:	7c c6 48 10 	subfc   r6,r6,r9
  b8:	7c 63 19 10 	subfe   r3,r3,r3
  bc:	7c 63 48 50 	subf    r3,r3,r9
  c0:	54 6a 80 3e 	rotlwi  r10,r3,16
  c4:	7c 63 52 14 	add     r3,r3,r10
  c8:	7c 63 18 f8 	not     r3,r3
  cc:	54 63 84 3e 	rlwinm  r3,r3,16,16,31
  d0:	4e 80 00 20 	blr

0000000000000000 <.csum_ipv6_magic>: (PPC64)
   0:	81 23 00 00 	lwz     r9,0(r3)
   4:	80 03 00 04 	lwz     r0,4(r3)
   8:	81 63 00 08 	lwz     r11,8(r3)
   c:	7c e7 4a 14 	add     r7,r7,r9
  10:	7f 89 38 40 	cmplw   cr7,r9,r7
  14:	7d 47 02 14 	add     r10,r7,r0
  18:	7d 30 10 26 	mfocrf  r9,1
  1c:	55 29 f7 fe 	rlwinm  r9,r9,30,31,31
  20:	7d 4a 4a 14 	add     r10,r10,r9
  24:	7f 80 50 40 	cmplw   cr7,r0,r10
  28:	7d 2a 5a 14 	add     r9,r10,r11
  2c:	80 03 00 0c 	lwz     r0,12(r3)
  30:	81 44 00 00 	lwz     r10,0(r4)
  34:	7d 10 10 26 	mfocrf  r8,1
  38:	55 08 f7 fe 	rlwinm  r8,r8,30,31,31
  3c:	7d 29 42 14 	add     r9,r9,r8
  40:	81 04 00 04 	lwz     r8,4(r4)
  44:	7f 8b 48 40 	cmplw   cr7,r11,r9
  48:	7d 29 02 14 	add     r9,r9,r0
  4c:	7d 70 10 26 	mfocrf  r11,1
  50:	55 6b f7 fe 	rlwinm  r11,r11,30,31,31
  54:	7d 29 5a 14 	add     r9,r9,r11
  58:	7f 80 48 40 	cmplw   cr7,r0,r9
  5c:	7d 29 52 14 	add     r9,r9,r10
  60:	7c 10 10 26 	mfocrf  r0,1
  64:	54 00 f7 fe 	rlwinm  r0,r0,30,31,31
  68:	7d 69 02 14 	add     r11,r9,r0
  6c:	7f 8a 58 40 	cmplw   cr7,r10,r11
  70:	7c 0b 42 14 	add     r0,r11,r8
  74:	81 44 00 08 	lwz     r10,8(r4)
  78:	7c f0 10 26 	mfocrf  r7,1
  7c:	54 e7 f7 fe 	rlwinm  r7,r7,30,31,31
  80:	7c 00 3a 14 	add     r0,r0,r7
  84:	7f 88 00 40 	cmplw   cr7,r8,r0
  88:	7d 20 52 14 	add     r9,r0,r10
  8c:	80 04 00 0c 	lwz     r0,12(r4)
  90:	7d 70 10 26 	mfocrf  r11,1
  94:	55 6b f7 fe 	rlwinm  r11,r11,30,31,31
  98:	7d 29 5a 14 	add     r9,r9,r11
  9c:	7f 8a 48 40 	cmplw   cr7,r10,r9
  a0:	7d 29 02 14 	add     r9,r9,r0
  a4:	7d 70 10 26 	mfocrf  r11,1
  a8:	55 6b f7 fe 	rlwinm  r11,r11,30,31,31
  ac:	7d 29 5a 14 	add     r9,r9,r11
  b0:	7f 80 48 40 	cmplw   cr7,r0,r9
  b4:	7d 29 2a 14 	add     r9,r9,r5
  b8:	7c 10 10 26 	mfocrf  r0,1
  bc:	54 00 f7 fe 	rlwinm  r0,r0,30,31,31
  c0:	7d 29 02 14 	add     r9,r9,r0
  c4:	7f 85 48 40 	cmplw   cr7,r5,r9
  c8:	7c 09 32 14 	add     r0,r9,r6
  cc:	7d 50 10 26 	mfocrf  r10,1
  d0:	55 4a f7 fe 	rlwinm  r10,r10,30,31,31
  d4:	7c 00 52 14 	add     r0,r0,r10
  d8:	7f 80 30 40 	cmplw   cr7,r0,r6
  dc:	7d 30 10 26 	mfocrf  r9,1
  e0:	55 29 ef fe 	rlwinm  r9,r9,29,31,31
  e4:	7c 09 02 14 	add     r0,r9,r0
  e8:	54 03 80 3e 	rotlwi  r3,r0,16
  ec:	7c 03 02 14 	add     r0,r3,r0
  f0:	7c 03 00 f8 	not     r3,r0
  f4:	78 63 84 22 	rldicl  r3,r3,48,48
  f8:	4e 80 00 20 	blr

This patch implements it in assembly for both PPC32 and PPC64

Link: https://github.com/linuxppc/linux/issues/9
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/checksum.h |  6 ++++++
 arch/powerpc/lib/checksum_32.S      | 33 +++++++++++++++++++++++++++++
 arch/powerpc/lib/checksum_64.S      | 28 ++++++++++++++++++++++++
 3 files changed, 67 insertions(+)

diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index 54065caa40b31..a78a57e5058d9 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -13,6 +13,7 @@
 #include <asm-generic/checksum.h>
 #else
 #include <linux/bitops.h>
+#include <linux/in6.h>
 /*
  * Computes the checksum of a memory block at src, length len,
  * and adds in "sum" (32-bit), while copying the block to dst.
@@ -211,6 +212,11 @@ static inline __sum16 ip_compute_csum(const void *buff, int len)
 	return csum_fold(csum_partial(buff, len, 0));
 }
 
+#define _HAVE_ARCH_IPV6_CSUM
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+			const struct in6_addr *daddr,
+			__u32 len, __u8 proto, __wsum sum);
+
 #endif
 #endif /* __KERNEL__ */
 #endif
diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
index 422d66e938e37..aa224069f93a7 100644
--- a/arch/powerpc/lib/checksum_32.S
+++ b/arch/powerpc/lib/checksum_32.S
@@ -302,3 +302,36 @@ dst_error:
 	EX_TABLE(51b, dst_error);
 
 EXPORT_SYMBOL(csum_partial_copy_generic)
+
+/*
+ * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ *			   const struct in6_addr *daddr,
+ *			   __u32 len, __u8 proto, __wsum sum)
+ */
+
+_GLOBAL(csum_ipv6_magic)
+	lwz	r8, 0(r3)
+	lwz	r9, 4(r3)
+	addc	r0, r7, r8
+	lwz	r10, 8(r3)
+	adde	r0, r0, r9
+	lwz	r11, 12(r3)
+	adde	r0, r0, r10
+	lwz	r8, 0(r4)
+	adde	r0, r0, r11
+	lwz	r9, 4(r4)
+	adde	r0, r0, r8
+	lwz	r10, 8(r4)
+	adde	r0, r0, r9
+	lwz	r11, 12(r4)
+	adde	r0, r0, r10
+	add	r5, r5, r6	/* assumption: len + proto doesn't carry */
+	adde	r0, r0, r11
+	adde	r0, r0, r5
+	addze	r0, r0
+	rotlwi	r3, r0, 16
+	add	r3, r0, r3
+	not	r3, r3
+	rlwinm	r3, r3, 16, 16, 31
+	blr
+EXPORT_SYMBOL(csum_ipv6_magic)
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
index d7f1a966136ea..886ed94b9c133 100644
--- a/arch/powerpc/lib/checksum_64.S
+++ b/arch/powerpc/lib/checksum_64.S
@@ -429,3 +429,31 @@ dstnr;	stb	r6,0(r4)
 	stw	r6,0(r8)
 	blr
 EXPORT_SYMBOL(csum_partial_copy_generic)
+
+/*
+ * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ *			   const struct in6_addr *daddr,
+ *			   __u32 len, __u8 proto, __wsum sum)
+ */
+
+_GLOBAL(csum_ipv6_magic)
+	ld	r8, 0(r3)
+	ld	r9, 8(r3)
+	add	r5, r5, r6
+	addc	r0, r8, r9
+	ld	r10, 0(r4)
+	ld	r11, 8(r4)
+	adde	r0, r0, r10
+	add	r5, r5, r7
+	adde	r0, r0, r11
+	adde	r0, r0, r5
+	addze	r0, r0
+	rotldi  r3, r0, 32		/* fold two 32 bit halves together */
+	add	r3, r0, r3
+	srdi	r0, r3, 32
+	rotlwi	r3, r0, 16		/* fold two 16 bit halves together */
+	add	r3, r0, r3
+	not	r3, r3
+	rlwinm	r3, r3, 16, 16, 31
+	blr
+EXPORT_SYMBOL(csum_ipv6_magic)

From 1c38976334c0efce1b285369a6037f205e196299 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Mon, 28 May 2018 06:08:34 +0000
Subject: [PATCH 235/251] powerpc/Makefile: set -mcpu=860 flag for the 8xx

When compiled with GCC 8.1, vmlinux is significantly bigger than
with GCC 4.8.

When looking at the generated code with objdump, we notice that
all functions and loops when a 16 bytes alignment. This significantly
increases the size of the kernel. It is pointless and even
counterproductive as on the 8xx 'nop' also consumes one clock cycle.

Size of vmlinux with GCC 4.8:
   text	   data	    bss	    dec	    hex	filename
5801948	1626076	 457796	7885820	 7853fc	vmlinux

Size of vmlinux with GCC 8.1:
   text	   data	    bss	    dec	    hex	filename
6764592	1630652	 456476	8851720	 871108	vmlinux

Size of vmlinux with GCC 8.1 and this patch:
   text	   data	    bss	    dec	    hex	filename
6331544	1631756	 456476	8419776	 8079c0	vmlinux

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 6faf1d6ad9dd2..9b52e42e581b2 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -181,6 +181,7 @@ CFLAGS-$(CONFIG_POWER6_CPU) += $(call cc-option,-mcpu=power6)
 CFLAGS-$(CONFIG_POWER7_CPU) += $(call cc-option,-mcpu=power7)
 CFLAGS-$(CONFIG_POWER8_CPU) += $(call cc-option,-mcpu=power8)
 CFLAGS-$(CONFIG_POWER9_CPU) += $(call cc-option,-mcpu=power9)
+CFLAGS-$(CONFIG_PPC_8xx) += $(call cc-option,-mcpu=860)
 
 # Altivec option not allowed with e500mc64 in GCC.
 ifeq ($(CONFIG_ALTIVEC),y)

From 60f1d2893ee6de65cdea609c84950b133e76a769 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Tue, 29 May 2018 16:19:14 +0000
Subject: [PATCH 236/251] powerpc/time: inline arch_vtime_task_switch()

arch_vtime_task_switch() is a small function which is called
only from vtime_common_task_switch(), so it is worth inlining

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/cputime.h | 16 +++++++++++++++-
 arch/powerpc/kernel/time.c         | 21 ---------------------
 2 files changed, 15 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index 99b541865d8dd..bc4903badb3ff 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -47,9 +47,23 @@ static inline unsigned long cputime_to_usecs(const cputime_t ct)
  * has to be populated in the new task
  */
 #ifdef CONFIG_PPC64
+#define get_accounting(tsk)	(&get_paca()->accounting)
 static inline void arch_vtime_task_switch(struct task_struct *tsk) { }
 #else
-void arch_vtime_task_switch(struct task_struct *tsk);
+#define get_accounting(tsk)	(&task_thread_info(tsk)->accounting)
+/*
+ * Called from the context switch with interrupts disabled, to charge all
+ * accumulated times to the current process, and to prepare accounting on
+ * the next process.
+ */
+static inline void arch_vtime_task_switch(struct task_struct *prev)
+{
+	struct cpu_accounting_data *acct = get_accounting(current);
+	struct cpu_accounting_data *acct0 = get_accounting(prev);
+
+	acct->starttime = acct0->starttime;
+	acct->startspurr = acct0->startspurr;
+}
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 2530cf60b8392..70f145e024877 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -163,12 +163,6 @@ EXPORT_SYMBOL(__cputime_usec_factor);
 void (*dtl_consumer)(struct dtl_entry *, u64);
 #endif
 
-#ifdef CONFIG_PPC64
-#define get_accounting(tsk)	(&get_paca()->accounting)
-#else
-#define get_accounting(tsk)	(&task_thread_info(tsk)->accounting)
-#endif
-
 static void calc_cputime_factors(void)
 {
 	struct div_result res;
@@ -421,21 +415,6 @@ void vtime_flush(struct task_struct *tsk)
 	acct->softirq_time = 0;
 }
 
-#ifdef CONFIG_PPC32
-/*
- * Called from the context switch with interrupts disabled, to charge all
- * accumulated times to the current process, and to prepare accounting on
- * the next process.
- */
-void arch_vtime_task_switch(struct task_struct *prev)
-{
-	struct cpu_accounting_data *acct = get_accounting(current);
-
-	acct->starttime = get_accounting(prev)->starttime;
-	acct->startspurr = get_accounting(prev)->startspurr;
-}
-#endif /* CONFIG_PPC32 */
-
 #else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 #define calc_cputime_factors()
 #endif

From f36bbf21e8b911b3c629fd36d4d217105b47a20e Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Wed, 30 May 2018 07:06:13 +0000
Subject: [PATCH 237/251] powerpc/lib: optimise 32 bits __clear_user()

Rewrite clear_user() on the same principle as memset(0), making use
of dcbz to clear complete cache lines.

This code is a copy/paste of memset(), with some modifications
in order to retrieve remaining number of bytes to be cleared,
as it needs to be returned in case of error.

On the same way as done on PPC64 in commit 17968fbbd19f1
("powerpc: 64bit optimised __clear_user"), the patch moves
__clear_user() into a dedicated file string_32.S

On a MPC885, throughput is almost doubled:

Before:
~# dd if=/dev/zero of=/dev/null bs=1M count=1000
1048576000 bytes (1000.0MB) copied, 18.990779 seconds, 52.7MB/s

After:
~# dd if=/dev/zero of=/dev/null bs=1M count=1000
1048576000 bytes (1000.0MB) copied, 9.611468 seconds, 104.0MB/s

On a MPC8321, throughput is multiplied by 2.12:

Before:
root@vgoippro:~# dd if=/dev/zero of=/dev/null bs=1M count=1000
1048576000 bytes (1000.0MB) copied, 6.844352 seconds, 146.1MB/s

After:
root@vgoippro:~# dd if=/dev/zero of=/dev/null bs=1M count=1000
1048576000 bytes (1000.0MB) copied, 3.218854 seconds, 310.7MB/s

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/lib/Makefile    |  5 +-
 arch/powerpc/lib/string.S    | 46 ------------------
 arch/powerpc/lib/string_32.S | 90 ++++++++++++++++++++++++++++++++++++
 3 files changed, 93 insertions(+), 48 deletions(-)
 create mode 100644 arch/powerpc/lib/string_32.S

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 653901042ad7d..2c9b8c0adf229 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -26,13 +26,14 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
 			       memcpy_power7.o
 
 obj64-y	+= copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
-	   string_64.o memcpy_64.o memcmp_64.o pmem.o
+	   memcpy_64.o memcmp_64.o pmem.o
 
 obj64-$(CONFIG_SMP)	+= locks.o
 obj64-$(CONFIG_ALTIVEC)	+= vmx-helper.o
 obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o
 
-obj-y			+= checksum_$(BITS).o checksum_wrappers.o
+obj-y			+= checksum_$(BITS).o checksum_wrappers.o \
+			   string_$(BITS).o
 
 obj-y			+= sstep.o ldstfp.o quad.o
 obj64-y			+= quad.o
diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S
index 0378def28d411..5343a88e619e9 100644
--- a/arch/powerpc/lib/string.S
+++ b/arch/powerpc/lib/string.S
@@ -8,8 +8,6 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
-#include <asm/processor.h>
-#include <asm/errno.h>
 #include <asm/ppc_asm.h>
 #include <asm/export.h>
 #include <asm/cache.h>
@@ -86,47 +84,3 @@ _GLOBAL(memchr)
 2:	li	r3,0
 	blr
 EXPORT_SYMBOL(memchr)
-
-#ifdef CONFIG_PPC32
-_GLOBAL(__clear_user)
-	addi	r6,r3,-4
-	li	r3,0
-	li	r5,0
-	cmplwi	0,r4,4
-	blt	7f
-	/* clear a single word */
-11:	stwu	r5,4(r6)
-	beqlr
-	/* clear word sized chunks */
-	andi.	r0,r6,3
-	add	r4,r0,r4
-	subf	r6,r0,r6
-	srwi	r0,r4,2
-	andi.	r4,r4,3
-	mtctr	r0
-	bdz	7f
-1:	stwu	r5,4(r6)
-	bdnz	1b
-	/* clear byte sized chunks */
-7:	cmpwi	0,r4,0
-	beqlr
-	mtctr	r4
-	addi	r6,r6,3
-8:	stbu	r5,1(r6)
-	bdnz	8b
-	blr
-90:	mr	r3,r4
-	blr
-91:	mfctr	r3
-	slwi	r3,r3,2
-	add	r3,r3,r4
-	blr
-92:	mfctr	r3
-	blr
-
-	EX_TABLE(11b, 90b)
-	EX_TABLE(1b, 91b)
-	EX_TABLE(8b, 92b)
-
-EXPORT_SYMBOL(__clear_user)
-#endif
diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S
new file mode 100644
index 0000000000000..f69a6aab7bfbb
--- /dev/null
+++ b/arch/powerpc/lib/string_32.S
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * String handling functions for PowerPC32
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/export.h>
+#include <asm/cache.h>
+
+	.text
+
+CACHELINE_BYTES = L1_CACHE_BYTES
+LG_CACHELINE_BYTES = L1_CACHE_SHIFT
+CACHELINE_MASK = (L1_CACHE_BYTES-1)
+
+_GLOBAL(__clear_user)
+/*
+ * Use dcbz on the complete cache lines in the destination
+ * to set them to zero.  This requires that the destination
+ * area is cacheable.
+ */
+	cmplwi	cr0, r4, 4
+	mr	r10, r3
+	li	r3, 0
+	blt	7f
+
+11:	stw	r3, 0(r10)
+	beqlr
+	andi.	r0, r10, 3
+	add	r11, r0, r4
+	subf	r6, r0, r10
+
+	clrlwi	r7, r6, 32 - LG_CACHELINE_BYTES
+	add	r8, r7, r11
+	srwi	r9, r8, LG_CACHELINE_BYTES
+	addic.	r9, r9, -1	/* total number of complete cachelines */
+	ble	2f
+	xori	r0, r7, CACHELINE_MASK & ~3
+	srwi.	r0, r0, 2
+	beq	3f
+	mtctr	r0
+4:	stwu	r3, 4(r6)
+	bdnz	4b
+3:	mtctr	r9
+	li	r7, 4
+10:	dcbz	r7, r6
+	addi	r6, r6, CACHELINE_BYTES
+	bdnz	10b
+	clrlwi	r11, r8, 32 - LG_CACHELINE_BYTES
+	addi	r11, r11, 4
+
+2:	srwi	r0 ,r11 ,2
+	mtctr	r0
+	bdz	6f
+1:	stwu	r3, 4(r6)
+	bdnz	1b
+6:	andi.	r11, r11, 3
+	beqlr
+	mtctr	r11
+	addi	r6, r6, 3
+8:	stbu	r3, 1(r6)
+	bdnz	8b
+	blr
+
+7:	cmpwi	cr0, r4, 0
+	beqlr
+	mtctr	r4
+	addi	r6, r10, -1
+9:	stbu	r3, 1(r6)
+	bdnz	9b
+	blr
+
+90:	mr	r3, r4
+	blr
+91:	add	r3, r10, r4
+	subf	r3, r6, r3
+	blr
+
+	EX_TABLE(11b, 90b)
+	EX_TABLE(4b, 91b)
+	EX_TABLE(10b, 91b)
+	EX_TABLE(1b, 91b)
+	EX_TABLE(8b, 91b)
+	EX_TABLE(9b, 91b)
+
+EXPORT_SYMBOL(__clear_user)

From 2676b89eb82f56825d7289edf21f17a6461388b7 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Wed, 30 May 2018 07:06:15 +0000
Subject: [PATCH 238/251] powerpc/lib: optimise PPC32 memcmp

At the time being, memcmp() compares two chunks of memory
byte per byte.

This patch optimises the comparison by comparing word by word.

On the same way as commit 15c2d45d17418 ("powerpc: Add 64bit
optimised memcmp"), this patch moves memcmp() into a dedicated
file named memcmp_32.S

A small benchmark performed on an 8xx comparing two chuncks
of 512 bytes performed 100000 times gives:

Before : 5852274 TB ticks
After:   1488638 TB ticks

This is almost 4 times faster

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/lib/Makefile    |  4 ++--
 arch/powerpc/lib/memcmp_32.S | 45 ++++++++++++++++++++++++++++++++++++
 arch/powerpc/lib/string.S    | 17 --------------
 3 files changed, 47 insertions(+), 19 deletions(-)
 create mode 100644 arch/powerpc/lib/memcmp_32.S

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 2c9b8c0adf229..d0ca13ad82310 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -26,14 +26,14 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
 			       memcpy_power7.o
 
 obj64-y	+= copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
-	   memcpy_64.o memcmp_64.o pmem.o
+	   memcpy_64.o pmem.o
 
 obj64-$(CONFIG_SMP)	+= locks.o
 obj64-$(CONFIG_ALTIVEC)	+= vmx-helper.o
 obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o
 
 obj-y			+= checksum_$(BITS).o checksum_wrappers.o \
-			   string_$(BITS).o
+			   string_$(BITS).o memcmp_$(BITS).o
 
 obj-y			+= sstep.o ldstfp.o quad.o
 obj64-y			+= quad.o
diff --git a/arch/powerpc/lib/memcmp_32.S b/arch/powerpc/lib/memcmp_32.S
new file mode 100644
index 0000000000000..5010e376f7b83
--- /dev/null
+++ b/arch/powerpc/lib/memcmp_32.S
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * memcmp for PowerPC32
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/export.h>
+
+	.text
+
+_GLOBAL(memcmp)
+	srawi.	r7, r5, 2		/* Divide len by 4 */
+	mr	r6, r3
+	beq-	3f
+	mtctr	r7
+	li	r7, 0
+1:	lwzx	r3, r6, r7
+	lwzx	r0, r4, r7
+	addi	r7, r7, 4
+	cmplw	cr0, r3, r0
+	bdnzt	eq, 1b
+	bne	5f
+3:	andi.	r3, r5, 3
+	beqlr
+	cmplwi	cr1, r3, 2
+	blt-	cr1, 4f
+	lhzx	r3, r6, r7
+	lhzx	r0, r4, r7
+	addi	r7, r7, 2
+	subf.	r3, r0, r3
+	beqlr	cr1
+	bnelr
+4:	lbzx	r3, r6, r7
+	lbzx	r0, r4, r7
+	subf.	r3, r0, r3
+	blr
+5:	li	r3, 1
+	bgtlr
+	li	r3, -1
+	blr
+EXPORT_SYMBOL(memcmp)
diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S
index 5343a88e619e9..4b41970e9ed84 100644
--- a/arch/powerpc/lib/string.S
+++ b/arch/powerpc/lib/string.S
@@ -54,23 +54,6 @@ _GLOBAL(strncmp)
 	blr
 EXPORT_SYMBOL(strncmp)
 
-#ifdef CONFIG_PPC32
-_GLOBAL(memcmp)
-	PPC_LCMPI 0,r5,0
-	beq-	2f
-	mtctr	r5
-	addi	r6,r3,-1
-	addi	r4,r4,-1
-1:	lbzu	r3,1(r6)
-	lbzu	r0,1(r4)
-	subf.	r3,r0,r3
-	bdnzt	2,1b
-	blr
-2:	li	r3,0
-	blr
-EXPORT_SYMBOL(memcmp)
-#endif
-
 _GLOBAL(memchr)
 	PPC_LCMPI 0,r5,0
 	beq-	2f

From 92c8c16f345759e87c5d5b771d438ff18ec7593f Mon Sep 17 00:00:00 2001
From: Mark Greer <mgreer@animalcreek.com>
Date: Thu, 5 Apr 2018 18:17:16 -0700
Subject: [PATCH 239/251] powerpc/embedded6xx: Remove C2K board support

The C2K platform appears to be orphaned so remove code supporting it.

CC: Remi Machet <rmachet@nvidia.com>
Signed-off-by: Mark Greer <mgreer@animalcreek.com>
Acked-by: Remi Machet <remi@machet.us>
Signed-off-by: Mark Greer <mgreer@animalcreek.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/boot/Makefile                  |   5 +-
 arch/powerpc/boot/cuboot-c2k.c              | 189 ----------
 arch/powerpc/boot/dts/c2k.dts               | 366 ------------------
 arch/powerpc/configs/c2k_defconfig          | 389 --------------------
 arch/powerpc/platforms/embedded6xx/Kconfig  |  10 -
 arch/powerpc/platforms/embedded6xx/Makefile |   1 -
 arch/powerpc/platforms/embedded6xx/c2k.c    | 148 --------
 7 files changed, 2 insertions(+), 1106 deletions(-)
 delete mode 100644 arch/powerpc/boot/cuboot-c2k.c
 delete mode 100644 arch/powerpc/boot/dts/c2k.dts
 delete mode 100644 arch/powerpc/configs/c2k_defconfig
 delete mode 100644 arch/powerpc/platforms/embedded6xx/c2k.c

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 49767e06202cc..b5620ef685577 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -149,8 +149,8 @@ src-plat-$(CONFIG_PPC_82xx) += cuboot-pq2.c fixed-head.S ep8248e.c cuboot-824x.c
 src-plat-$(CONFIG_PPC_83xx) += cuboot-83xx.c fixed-head.S redboot-83xx.c
 src-plat-$(CONFIG_FSL_SOC_BOOKE) += cuboot-85xx.c cuboot-85xx-cpm2.c
 src-plat-$(CONFIG_EMBEDDED6xx) += cuboot-pq2.c cuboot-mpc7448hpc2.c \
-					cuboot-c2k.c gamecube-head.S \
-					gamecube.c wii-head.S wii.c holly.c \
+					gamecube-head.S gamecube.c \
+					wii-head.S wii.c holly.c \
 					fixed-head.S mvme5100.c
 src-plat-$(CONFIG_AMIGAONE) += cuboot-amigaone.c
 src-plat-$(CONFIG_PPC_PS3) += ps3-head.S ps3-hvcall.S ps3.c
@@ -345,7 +345,6 @@ image-$(CONFIG_MVME7100)                += dtbImage.mvme7100
 # Board ports in arch/powerpc/platform/embedded6xx/Kconfig
 image-$(CONFIG_STORCENTER)		+= cuImage.storcenter
 image-$(CONFIG_MPC7448HPC2)		+= cuImage.mpc7448hpc2
-image-$(CONFIG_PPC_C2K)			+= cuImage.c2k
 image-$(CONFIG_GAMECUBE)		+= dtbImage.gamecube
 image-$(CONFIG_WII)			+= dtbImage.wii
 image-$(CONFIG_MVME5100)		+= dtbImage.mvme5100
diff --git a/arch/powerpc/boot/cuboot-c2k.c b/arch/powerpc/boot/cuboot-c2k.c
deleted file mode 100644
index 9309c51f1d65d..0000000000000
--- a/arch/powerpc/boot/cuboot-c2k.c
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- * GEFanuc C2K platform code.
- *
- * Author: Remi Machet <rmachet@slac.stanford.edu>
- *
- * Originated from prpmc2800.c
- *
- * 2008 (c) Stanford University
- * 2007 (c) MontaVista, Software, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- */
-
-#include "types.h"
-#include "stdio.h"
-#include "io.h"
-#include "ops.h"
-#include "elf.h"
-#include "mv64x60.h"
-#include "cuboot.h"
-#include "ppcboot.h"
-
-static u8 *bridge_base;
-
-static void c2k_bridge_setup(u32 mem_size)
-{
-	u32 i, v[30], enables, acc_bits;
-	u32 pci_base_hi, pci_base_lo, size, buf[2];
-	unsigned long cpu_base;
-	int rc;
-	void *devp, *mv64x60_devp;
-	u8 *bridge_pbase, is_coherent;
-	struct mv64x60_cpu2pci_win *tbl;
-	int bus;
-
-	bridge_pbase = mv64x60_get_bridge_pbase();
-	is_coherent = mv64x60_is_coherent();
-
-	if (is_coherent)
-		acc_bits = MV64x60_PCI_ACC_CNTL_SNOOP_WB
-			| MV64x60_PCI_ACC_CNTL_SWAP_NONE
-			| MV64x60_PCI_ACC_CNTL_MBURST_32_BYTES
-			| MV64x60_PCI_ACC_CNTL_RDSIZE_32_BYTES;
-	else
-		acc_bits = MV64x60_PCI_ACC_CNTL_SNOOP_NONE
-			| MV64x60_PCI_ACC_CNTL_SWAP_NONE
-			| MV64x60_PCI_ACC_CNTL_MBURST_128_BYTES
-			| MV64x60_PCI_ACC_CNTL_RDSIZE_256_BYTES;
-
-	mv64x60_config_ctlr_windows(bridge_base, bridge_pbase, is_coherent);
-	mv64x60_devp = find_node_by_compatible(NULL, "marvell,mv64360");
-	if (mv64x60_devp == NULL)
-		fatal("Error: Missing marvell,mv64360 device tree node\n\r");
-
-	enables = in_le32((u32 *)(bridge_base + MV64x60_CPU_BAR_ENABLE));
-	enables |= 0x007ffe00; /* Disable all cpu->pci windows */
-	out_le32((u32 *)(bridge_base + MV64x60_CPU_BAR_ENABLE), enables);
-
-	/* Get the cpu -> pci i/o & mem mappings from the device tree */
-	devp = NULL;
-	for (bus = 0; ; bus++) {
-		char name[] = "pci ";
-
-		name[strlen(name)-1] = bus+'0';
-
-		devp = find_node_by_alias(name);
-		if (devp == NULL)
-			break;
-
-		if (bus >= 2)
-			fatal("Error: Only 2 PCI controllers are supported at" \
-				" this time.\n");
-
-		mv64x60_config_pci_windows(bridge_base, bridge_pbase, bus, 0,
-				mem_size, acc_bits);
-
-		rc = getprop(devp, "ranges", v, sizeof(v));
-		if (rc == 0)
-			fatal("Error: Can't find marvell,mv64360-pci ranges"
-				" property\n\r");
-
-		/* Get the cpu -> pci i/o & mem mappings from the device tree */
-
-		for (i = 0; i < rc; i += 6) {
-			switch (v[i] & 0xff000000) {
-			case 0x01000000: /* PCI I/O Space */
-				tbl = mv64x60_cpu2pci_io;
-				break;
-			case 0x02000000: /* PCI MEM Space */
-				tbl = mv64x60_cpu2pci_mem;
-				break;
-			default:
-				continue;
-			}
-
-			pci_base_hi = v[i+1];
-			pci_base_lo = v[i+2];
-			cpu_base = v[i+3];
-			size = v[i+5];
-
-			buf[0] = cpu_base;
-			buf[1] = size;
-
-			if (!dt_xlate_addr(devp, buf, sizeof(buf), &cpu_base))
-				fatal("Error: Can't translate PCI address " \
-						"0x%x\n\r", (u32)cpu_base);
-
-			mv64x60_config_cpu2pci_window(bridge_base, bus,
-				pci_base_hi, pci_base_lo, cpu_base, size, tbl);
-		}
-
-		enables &= ~(3<<(9+bus*5)); /* Enable cpu->pci<bus> i/o,
-						cpu->pci<bus> mem0 */
-		out_le32((u32 *)(bridge_base + MV64x60_CPU_BAR_ENABLE),
-			enables);
-	};
-}
-
-static void c2k_fixups(void)
-{
-	u32 mem_size;
-
-	mem_size = mv64x60_get_mem_size(bridge_base);
-	c2k_bridge_setup(mem_size); /* Do necessary bridge setup */
-}
-
-#define MV64x60_MPP_CNTL_0	0xf000
-#define MV64x60_MPP_CNTL_2	0xf008
-#define MV64x60_GPP_IO_CNTL	0xf100
-#define MV64x60_GPP_LEVEL_CNTL	0xf110
-#define MV64x60_GPP_VALUE_SET	0xf118
-
-static void c2k_reset(void)
-{
-	u32 temp;
-
-	udelay(5000000);
-
-	if (bridge_base != 0) {
-		temp = in_le32((u32 *)(bridge_base + MV64x60_MPP_CNTL_0));
-		temp &= 0xFFFF0FFF;
-		out_le32((u32 *)(bridge_base + MV64x60_MPP_CNTL_0), temp);
-
-		temp = in_le32((u32 *)(bridge_base + MV64x60_GPP_LEVEL_CNTL));
-		temp |= 0x00000004;
-		out_le32((u32 *)(bridge_base + MV64x60_GPP_LEVEL_CNTL), temp);
-
-		temp = in_le32((u32 *)(bridge_base + MV64x60_GPP_IO_CNTL));
-		temp |= 0x00000004;
-		out_le32((u32 *)(bridge_base + MV64x60_GPP_IO_CNTL), temp);
-
-		temp = in_le32((u32 *)(bridge_base + MV64x60_MPP_CNTL_2));
-		temp &= 0xFFFF0FFF;
-		out_le32((u32 *)(bridge_base + MV64x60_MPP_CNTL_2), temp);
-
-		temp = in_le32((u32 *)(bridge_base + MV64x60_GPP_LEVEL_CNTL));
-		temp |= 0x00080000;
-		out_le32((u32 *)(bridge_base + MV64x60_GPP_LEVEL_CNTL), temp);
-
-		temp = in_le32((u32 *)(bridge_base + MV64x60_GPP_IO_CNTL));
-		temp |= 0x00080000;
-		out_le32((u32 *)(bridge_base + MV64x60_GPP_IO_CNTL), temp);
-
-		out_le32((u32 *)(bridge_base + MV64x60_GPP_VALUE_SET),
-				0x00080004);
-	}
-
-	for (;;);
-}
-
-static bd_t bd;
-
-void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
-			unsigned long r6, unsigned long r7)
-{
-	CUBOOT_INIT();
-
-	fdt_init(_dtb_start);
-
-	bridge_base = mv64x60_get_bridge_base();
-
-	platform_ops.fixups = c2k_fixups;
-	platform_ops.exit = c2k_reset;
-
-	if (serial_console_init() < 0)
-		exit();
-}
diff --git a/arch/powerpc/boot/dts/c2k.dts b/arch/powerpc/boot/dts/c2k.dts
deleted file mode 100644
index c5beb72d18b7a..0000000000000
--- a/arch/powerpc/boot/dts/c2k.dts
+++ /dev/null
@@ -1,366 +0,0 @@
-/* Device Tree Source for GEFanuc C2K
- *
- * Author: Remi Machet <rmachet@slac.stanford.edu>
- *
- * Originated from prpmc2800.dts
- *
- * 2008 (c) Stanford University
- * 2007 (c) MontaVista, Software, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- */
-
-/dts-v1/;
-
-/ {
-	#address-cells = <1>;
-	#size-cells = <1>;
-	model = "C2K";
-	compatible = "GEFanuc,C2K";
-	coherency-off;
-
-	aliases {
-		pci0 = &PCI0;
-		pci1 = &PCI1;
-	};
-
-	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		cpu@0 {
-			device_type = "cpu";
-			compatible = "PowerPC,7447";
-			reg = <0>;
-			clock-frequency = <996000000>;	/* 996 MHz */
-			bus-frequency = <166666667>;	/* 166.6666 MHz */
-			timebase-frequency = <41666667>;	/* 166.6666/4 MHz */
-			i-cache-line-size = <32>;
-			d-cache-line-size = <32>;
-			i-cache-size = <32768>;
-			d-cache-size = <32768>;
-		};
-	};
-
-	memory {
-		device_type = "memory";
-		reg = <0x00000000 0x40000000>;	/* 1GB */
-	};
-
-	system-controller@d8000000 { /* Marvell Discovery */
-		#address-cells = <1>;
-		#size-cells = <1>;
-		model = "mv64460";
-		compatible = "marvell,mv64360";
-		clock-frequency = <166666667>;		/* 166.66... MHz */
-		reg = <0xd8000000 0x00010000>;
-		virtual-reg = <0xd8000000>;
-		ranges = <0xd4000000 0xd4000000 0x01000000	/* PCI 0 I/O Space */
-			  0x80000000 0x80000000 0x08000000	/* PCI 0 MEM Space */
-			  0xd0000000 0xd0000000 0x01000000	/* PCI 1 I/O Space */
-			  0xa0000000 0xa0000000 0x08000000	/* PCI 1 MEM Space */
-			  0xd8100000 0xd8100000 0x00010000	/* FPGA */
-			  0xd8110000 0xd8110000 0x00010000	/* FPGA USARTs */
-			  0xf8000000 0xf8000000 0x08000000	/* User FLASH */
-			  0x00000000 0xd8000000 0x00010000	/* Bridge's regs */
-			  0xd8140000 0xd8140000 0x00040000>;	/* Integrated SRAM */
-
-		mdio@2000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "marvell,mv64360-mdio";
-			reg = <0x2000 4>;
-			PHY0: ethernet-phy@0 {
-				interrupts = <76>;	/* GPP 12 */
-				interrupt-parent = <&PIC>;
-				reg = <0>;
-			};
-			PHY1: ethernet-phy@1 {
-				interrupts = <76>;	/* GPP 12 */
-				interrupt-parent = <&PIC>;
-				reg = <1>;
-			};
-			PHY2: ethernet-phy@2 {
-				interrupts = <76>;	/* GPP 12 */
-				interrupt-parent = <&PIC>;
-				reg = <2>;
-			};
-		};
-
-		ethernet-group@2000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "marvell,mv64360-eth-group";
-			reg = <0x2000 0x2000>;
-			ethernet@0 {
-				device_type = "network";
-				compatible = "marvell,mv64360-eth";
-				reg = <0>;
-				interrupts = <32>;
-				interrupt-parent = <&PIC>;
-				phy = <&PHY0>;
-				local-mac-address = [ 00 00 00 00 00 00 ];
-			};
-			ethernet@1 {
-				device_type = "network";
-				compatible = "marvell,mv64360-eth";
-				reg = <1>;
-				interrupts = <33>;
-				interrupt-parent = <&PIC>;
-				phy = <&PHY1>;
-				local-mac-address = [ 00 00 00 00 00 00 ];
-			};
-			ethernet@2 {
-				device_type = "network";
-				compatible = "marvell,mv64360-eth";
-				reg = <2>;
-				interrupts = <34>;
-				interrupt-parent = <&PIC>;
-				phy = <&PHY2>;
-				local-mac-address = [ 00 00 00 00 00 00 ];
-			};
-		};
-
-		SDMA0: sdma@4000 {
-			compatible = "marvell,mv64360-sdma";
-			reg = <0x4000 0xc18>;
-			virtual-reg = <0xd8004000>;
-			interrupt-base = <0>;
-			interrupts = <36>;
-			interrupt-parent = <&PIC>;
-		};
-
-		SDMA1: sdma@6000 {
-			compatible = "marvell,mv64360-sdma";
-			reg = <0x6000 0xc18>;
-			virtual-reg = <0xd8006000>;
-			interrupt-base = <0>;
-			interrupts = <38>;
-			interrupt-parent = <&PIC>;
-		};
-
-		BRG0: brg@b200 {
-			compatible = "marvell,mv64360-brg";
-			reg = <0xb200 0x8>;
-			clock-src = <8>;
-			clock-frequency = <133333333>;
-			current-speed = <115200>;
-		};
-
-		BRG1: brg@b208 {
-			compatible = "marvell,mv64360-brg";
-			reg = <0xb208 0x8>;
-			clock-src = <8>;
-			clock-frequency = <133333333>;
-			current-speed = <115200>;
-		};
-
-		CUNIT: cunit@f200 {
-			reg = <0xf200 0x200>;
-		};
-
-		MPSCROUTING: mpscrouting@b400 {
-			reg = <0xb400 0xc>;
-		};
-
-		MPSCINTR: mpscintr@b800 {
-			reg = <0xb800 0x100>;
-			virtual-reg = <0xd800b800>;
-		};
-
-		MPSC0: mpsc@8000 {
-			compatible = "marvell,mv64360-mpsc";
-			reg = <0x8000 0x38>;
-			virtual-reg = <0xd8008000>;
-			sdma = <&SDMA0>;
-			brg = <&BRG0>;
-			cunit = <&CUNIT>;
-			mpscrouting = <&MPSCROUTING>;
-			mpscintr = <&MPSCINTR>;
-			cell-index = <0>;
-			interrupts = <40>;
-			interrupt-parent = <&PIC>;
-		};
-
-		MPSC1: mpsc@9000 {
-			compatible = "marvell,mv64360-mpsc";
-			reg = <0x9000 0x38>;
-			virtual-reg = <0xd8009000>;
-			sdma = <&SDMA1>;
-			brg = <&BRG1>;
-			cunit = <&CUNIT>;
-			mpscrouting = <&MPSCROUTING>;
-			mpscintr = <&MPSCINTR>;
-			cell-index = <1>;
-			interrupts = <42>;
-			interrupt-parent = <&PIC>;
-		};
-
-		wdt@b410 {			/* watchdog timer */
-			compatible = "marvell,mv64360-wdt";
-			reg = <0xb410 0x8>;
-		};
-
-		i2c@c000 {
-			compatible = "marvell,mv64360-i2c";
-			reg = <0xc000 0x20>;
-			virtual-reg = <0xd800c000>;
-			interrupts = <37>;
-			interrupt-parent = <&PIC>;
-		};
-
-		PIC: pic {
-			#interrupt-cells = <1>;
-			#address-cells = <0>;
-			compatible = "marvell,mv64360-pic";
-			reg = <0x0000 0x88>;
-			interrupt-controller;
-		};
-
-		mpp@f000 {
-			compatible = "marvell,mv64360-mpp";
-			reg = <0xf000 0x10>;
-		};
-
-		gpp@f100 {
-			compatible = "marvell,mv64360-gpp";
-			reg = <0xf100 0x20>;
-		};
-
-		PCI0: pci@80000000 {
-			#address-cells = <3>;
-			#size-cells = <2>;
-			#interrupt-cells = <1>;
-			device_type = "pci";
-			compatible = "marvell,mv64360-pci";
-			reg = <0x0cf8 0x8>;
-			ranges = <0x01000000 0x0 0x00000000 0xd4000000 0x0 0x01000000
-				  0x02000000 0x0 0x80000000 0x80000000 0x0 0x08000000>;
-			bus-range = <0 255>;
-			clock-frequency = <66000000>;
-			interrupt-pci-iack = <0x0c34>;
-			interrupt-parent = <&PIC>;
-			interrupt-map-mask = <0x0000 0x0 0x0 0x7>;
-			interrupt-map = <
-				/* Only one interrupt line for PMC0 slot (INTA) */
-				0x0000 0 0 1 &PIC 88
-			>;
-		};
-
-
-		PCI1: pci@a0000000 {
-			#address-cells = <3>;
-			#size-cells = <2>;
-			#interrupt-cells = <1>;
-			device_type = "pci";
-			compatible = "marvell,mv64360-pci";
-			reg = <0x0c78 0x8>;
-			ranges = <0x01000000 0x0 0x00000000 0xd0000000 0x0 0x01000000
-				  0x02000000 0x0 0x80000000 0xa0000000 0x0 0x08000000>;
-			bus-range = <0 255>;
-			clock-frequency = <66000000>;
-			interrupt-pci-iack = <0x0cb4>;
-			interrupt-parent = <&PIC>;
-			interrupt-map-mask = <0xf800 0x00 0x00 0x7>;
-			interrupt-map = <
-				/* IDSEL 0x01: PMC1 ? */
-				0x0800 0 0 1 &PIC 88
-				/* IDSEL 0x02: cPCI bridge */
-				0x1000 0 0 1 &PIC 88
-				/* IDSEL 0x03: USB controller */
-				0x1800 0 0 1 &PIC 91
-				/* IDSEL 0x04: SATA controller */
-				0x2000 0 0 1 &PIC 95
-			>;
-		};
-
-		cpu-error@70 {
-			compatible = "marvell,mv64360-cpu-error";
-			reg = <0x0070 0x10 0x0128 0x28>;
-			interrupts = <3>;
-			interrupt-parent = <&PIC>;
-		};
-
-		sram-ctrl@380 {
-			compatible = "marvell,mv64360-sram-ctrl";
-			reg = <0x0380 0x80>;
-			interrupts = <13>;
-			interrupt-parent = <&PIC>;
-		};
-
-		pci-error@1d40 {
-			compatible = "marvell,mv64360-pci-error";
-			reg = <0x1d40 0x40 0x0c28 0x4>;
-			interrupts = <12>;
-			interrupt-parent = <&PIC>;
-		};
-
-		pci-error@1dc0 {
-			compatible = "marvell,mv64360-pci-error";
-			reg = <0x1dc0 0x40 0x0ca8 0x4>;
-			interrupts = <16>;
-			interrupt-parent = <&PIC>;
-		};
-
-		mem-ctrl@1400 {
-			compatible = "marvell,mv64360-mem-ctrl";
-			reg = <0x1400 0x60>;
-			interrupts = <17>;
-			interrupt-parent = <&PIC>;
-		};
-		/* Devices attached to the device controller */
-		devicebus@45c {
-			#address-cells = <2>;
-			#size-cells = <1>;
-			compatible = "marvell,mv64306-devctrl";
-			reg = <0x45C 0x88>;
-			interrupts = <1>;
-			interrupt-parent = <&PIC>;
-			ranges = 	<0 0 0xd8100000 0x10000
-					 2 0 0xd8110000 0x10000
-					 4 0 0xf8000000 0x8000000>;
-			fpga@0,0 {
-				compatible = "sbs,fpga-c2k";
-				reg = <0 0 0x10000>;
-			};
-			fpga_usart@2,0 {
-				compatible = "sbs,fpga_usart-c2k";
-				reg = <2 0 0x10000>;
-			};
-			nor_flash@4,0 {
-				compatible = "cfi-flash";
-				reg = <4 0 0x8000000>; /* 128MB */
-				bank-width = <4>;
-				device-width = <1>;
-				#address-cells = <1>;
-				#size-cells = <1>;
-				partition@0 {
-					label = "boot";
-					reg = <0x00000000 0x00080000>;
-				};
-				partition@40000 {
-					label = "kernel";
-					reg = <0x00080000 0x00400000>;
-				};
-				partition@440000 {
-					label = "initrd";
-					reg = <0x00480000 0x00B80000>;
-				};
-				partition@1000000 {
-					label = "rootfs";
-					reg = <0x01000000 0x06800000>;
-				};
-				partition@7800000 {
-					label = "recovery";
-					reg = <0x07800000 0x00800000>;
-					read-only;
-				};
-			};
-		};
-	};
-	chosen {
-		stdout-path = &MPSC0;
-	};
-};
diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig
deleted file mode 100644
index 6c1196b0f81ef..0000000000000
--- a/arch/powerpc/configs/c2k_defconfig
+++ /dev/null
@@ -1,389 +0,0 @@
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_AUDIT=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_PROFILING=y
-CONFIG_OPROFILE=m
-CONFIG_KPROBES=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_OSF_PARTITION=y
-CONFIG_MAC_PARTITION=y
-CONFIG_BSD_DISKLABEL=y
-CONFIG_MINIX_SUBPARTITION=y
-CONFIG_SOLARIS_X86_PARTITION=y
-CONFIG_UNIXWARE_DISKLABEL=y
-CONFIG_SGI_PARTITION=y
-CONFIG_SUN_PARTITION=y
-# CONFIG_PPC_CHRP is not set
-# CONFIG_PPC_PMAC is not set
-CONFIG_EMBEDDED6xx=y
-CONFIG_PPC_C2K=y
-CONFIG_CPU_FREQ=y
-CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
-CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
-CONFIG_CPU_FREQ_GOV_POWERSAVE=m
-CONFIG_CPU_FREQ_GOV_ONDEMAND=m
-CONFIG_GEN_RTC=y
-CONFIG_HIGHMEM=y
-CONFIG_PREEMPT_VOLUNTARY=y
-CONFIG_BINFMT_MISC=y
-CONFIG_PM=y
-CONFIG_PCI_MSI=y
-CONFIG_HOTPLUG_PCI=y
-CONFIG_HOTPLUG_PCI_SHPC=m
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=y
-CONFIG_NET_KEY=m
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_NET_IPIP=m
-CONFIG_IP_MROUTE=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_IPCOMP=m
-CONFIG_INET6_AH=m
-CONFIG_INET6_ESP=m
-CONFIG_INET6_IPCOMP=m
-CONFIG_IPV6_TUNNEL=m
-CONFIG_NETFILTER=y
-# CONFIG_NETFILTER_XT_MATCH_SCTP is not set
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_ECN=m
-CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_ARPTABLES=m
-CONFIG_IP_NF_ARPFILTER=m
-CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP6_NF_MATCH_EUI64=m
-CONFIG_IP6_NF_MATCH_FRAG=m
-CONFIG_IP6_NF_MATCH_OPTS=m
-CONFIG_IP6_NF_MATCH_HL=m
-CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-CONFIG_IP6_NF_MATCH_RT=m
-CONFIG_IP6_NF_FILTER=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_IP6_NF_RAW=m
-CONFIG_BRIDGE_NF_EBTABLES=m
-CONFIG_BRIDGE_EBT_BROUTE=m
-CONFIG_BRIDGE_EBT_T_FILTER=m
-CONFIG_BRIDGE_EBT_T_NAT=m
-CONFIG_BRIDGE_EBT_802_3=m
-CONFIG_BRIDGE_EBT_AMONG=m
-CONFIG_BRIDGE_EBT_ARP=m
-CONFIG_BRIDGE_EBT_IP=m
-CONFIG_BRIDGE_EBT_LIMIT=m
-CONFIG_BRIDGE_EBT_MARK=m
-CONFIG_BRIDGE_EBT_PKTTYPE=m
-CONFIG_BRIDGE_EBT_STP=m
-CONFIG_BRIDGE_EBT_VLAN=m
-CONFIG_BRIDGE_EBT_ARPREPLY=m
-CONFIG_BRIDGE_EBT_DNAT=m
-CONFIG_BRIDGE_EBT_MARK_T=m
-CONFIG_BRIDGE_EBT_REDIRECT=m
-CONFIG_BRIDGE_EBT_SNAT=m
-CONFIG_BRIDGE_EBT_LOG=m
-CONFIG_IP_SCTP=m
-CONFIG_ATM=m
-CONFIG_ATM_CLIP=m
-CONFIG_ATM_LANE=m
-CONFIG_ATM_BR2684=m
-CONFIG_BRIDGE=m
-CONFIG_VLAN_8021Q=m
-CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_CBQ=m
-CONFIG_NET_SCH_HTB=m
-CONFIG_NET_SCH_HFSC=m
-CONFIG_NET_SCH_ATM=m
-CONFIG_NET_SCH_PRIO=m
-CONFIG_NET_SCH_RED=m
-CONFIG_NET_SCH_SFQ=m
-CONFIG_NET_SCH_TEQL=m
-CONFIG_NET_SCH_TBF=m
-CONFIG_NET_SCH_GRED=m
-CONFIG_NET_SCH_DSMARK=m
-CONFIG_NET_SCH_NETEM=m
-CONFIG_NET_CLS_TCINDEX=m
-CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_FW=m
-CONFIG_NET_CLS_U32=m
-CONFIG_CLS_U32_PERF=y
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_CLS_IND=y
-CONFIG_BT=m
-CONFIG_BT_RFCOMM=m
-CONFIG_BT_RFCOMM_TTY=y
-CONFIG_BT_BNEP=m
-CONFIG_BT_BNEP_MC_FILTER=y
-CONFIG_BT_BNEP_PROTO_FILTER=y
-CONFIG_BT_HIDP=m
-CONFIG_BT_HCIUART=m
-CONFIG_BT_HCIUART_H4=y
-CONFIG_BT_HCIUART_BCSP=y
-CONFIG_BT_HCIBCM203X=m
-CONFIG_BT_HCIBFUSB=m
-CONFIG_BT_HCIVHCI=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_MTD=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_COMPLEX_MAPPINGS=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=16384
-CONFIG_SCSI=m
-CONFIG_BLK_DEV_SD=m
-CONFIG_CHR_DEV_ST=m
-CONFIG_CHR_DEV_OSST=m
-CONFIG_BLK_DEV_SR=m
-CONFIG_BLK_DEV_SR_VENDOR=y
-CONFIG_CHR_DEV_SG=m
-CONFIG_SCSI_CONSTANTS=y
-CONFIG_SCSI_LOGGING=y
-CONFIG_SCSI_ISCSI_ATTRS=m
-CONFIG_BLK_DEV_3W_XXXX_RAID=m
-CONFIG_SCSI_3W_9XXX=m
-CONFIG_SCSI_ACARD=m
-CONFIG_SCSI_AACRAID=m
-CONFIG_SCSI_AIC7XXX=m
-CONFIG_AIC7XXX_CMDS_PER_DEVICE=4
-CONFIG_AIC7XXX_RESET_DELAY_MS=15000
-# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
-# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
-CONFIG_SCSI_AIC79XX=m
-CONFIG_AIC79XX_CMDS_PER_DEVICE=4
-CONFIG_AIC79XX_RESET_DELAY_MS=15000
-# CONFIG_AIC79XX_DEBUG_ENABLE is not set
-# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
-CONFIG_SCSI_ARCMSR=m
-CONFIG_MEGARAID_NEWGEN=y
-CONFIG_MEGARAID_MM=m
-CONFIG_MEGARAID_MAILBOX=m
-CONFIG_MEGARAID_SAS=m
-CONFIG_SCSI_GDTH=m
-CONFIG_SCSI_IPS=m
-CONFIG_SCSI_INITIO=m
-CONFIG_SCSI_SYM53C8XX_2=m
-CONFIG_SCSI_QLOGIC_1280=m
-CONFIG_NETDEVICES=y
-CONFIG_BONDING=m
-CONFIG_DUMMY=m
-CONFIG_NETCONSOLE=m
-CONFIG_TUN=m
-# CONFIG_ATM_DRIVERS is not set
-CONFIG_MV643XX_ETH=y
-CONFIG_VITESSE_PHY=y
-CONFIG_INPUT_EVDEV=y
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-CONFIG_INPUT_MISC=y
-CONFIG_INPUT_UINPUT=m
-# CONFIG_SERIO is not set
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_SERIAL_NONSTANDARD=y
-CONFIG_SERIAL_MPSC=y
-CONFIG_SERIAL_MPSC_CONSOLE=y
-CONFIG_NVRAM=m
-CONFIG_RAW_DRIVER=y
-CONFIG_MAX_RAW_DEVS=8192
-CONFIG_I2C=m
-CONFIG_I2C_CHARDEV=m
-CONFIG_I2C_MV64XXX=m
-CONFIG_HWMON=m
-CONFIG_SENSORS_ADM1021=m
-CONFIG_SENSORS_ADM1025=m
-CONFIG_SENSORS_ADM1026=m
-CONFIG_SENSORS_ADM1031=m
-CONFIG_SENSORS_DS1621=m
-CONFIG_SENSORS_GL518SM=m
-CONFIG_SENSORS_MAX1619=m
-CONFIG_SENSORS_LM75=m
-CONFIG_SENSORS_LM77=m
-CONFIG_SENSORS_LM78=m
-CONFIG_SENSORS_LM80=m
-CONFIG_SENSORS_LM83=m
-CONFIG_SENSORS_LM85=m
-CONFIG_SENSORS_LM87=m
-CONFIG_SENSORS_LM90=m
-CONFIG_SENSORS_PCF8591=m
-CONFIG_SENSORS_VIA686A=m
-CONFIG_SENSORS_W83781D=m
-CONFIG_SENSORS_W83L785TS=m
-CONFIG_WATCHDOG=y
-CONFIG_SOFT_WATCHDOG=m
-CONFIG_PCIPCWATCHDOG=m
-CONFIG_WDTPCI=m
-CONFIG_USBPCWATCHDOG=m
-# CONFIG_VGA_CONSOLE is not set
-CONFIG_USB=m
-CONFIG_USB_MON=m
-CONFIG_USB_EHCI_HCD=m
-CONFIG_USB_EHCI_ROOT_HUB_TT=y
-CONFIG_USB_OHCI_HCD=m
-CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
-CONFIG_USB_UHCI_HCD=m
-CONFIG_USB_ACM=m
-CONFIG_USB_PRINTER=m
-CONFIG_USB_STORAGE=m
-CONFIG_USB_STORAGE_DATAFAB=m
-CONFIG_USB_STORAGE_FREECOM=m
-CONFIG_USB_STORAGE_ISD200=m
-CONFIG_USB_STORAGE_SDDR09=m
-CONFIG_USB_STORAGE_SDDR55=m
-CONFIG_USB_STORAGE_JUMPSHOT=m
-CONFIG_USB_MDC800=m
-CONFIG_USB_MICROTEK=m
-CONFIG_USB_SERIAL=m
-CONFIG_USB_SERIAL_GENERIC=y
-CONFIG_USB_SERIAL_BELKIN=m
-CONFIG_USB_SERIAL_WHITEHEAT=m
-CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
-CONFIG_USB_SERIAL_EMPEG=m
-CONFIG_USB_SERIAL_FTDI_SIO=m
-CONFIG_USB_SERIAL_VISOR=m
-CONFIG_USB_SERIAL_IPAQ=m
-CONFIG_USB_SERIAL_IR=m
-CONFIG_USB_SERIAL_EDGEPORT=m
-CONFIG_USB_SERIAL_EDGEPORT_TI=m
-CONFIG_USB_SERIAL_KEYSPAN_PDA=m
-CONFIG_USB_SERIAL_KEYSPAN=m
-CONFIG_USB_SERIAL_KLSI=m
-CONFIG_USB_SERIAL_KOBIL_SCT=m
-CONFIG_USB_SERIAL_MCT_U232=m
-CONFIG_USB_SERIAL_PL2303=m
-CONFIG_USB_SERIAL_SAFE=m
-CONFIG_USB_SERIAL_SAFE_PADDED=y
-CONFIG_USB_SERIAL_CYBERJACK=m
-CONFIG_USB_SERIAL_XIRCOM=m
-CONFIG_USB_SERIAL_OMNINET=m
-CONFIG_USB_EMI62=m
-CONFIG_USB_RIO500=m
-CONFIG_USB_LEGOTOWER=m
-CONFIG_USB_LCD=m
-CONFIG_USB_LED=m
-CONFIG_USB_TEST=m
-CONFIG_USB_ATM=m
-CONFIG_USB_SPEEDTOUCH=m
-CONFIG_INFINIBAND=m
-CONFIG_INFINIBAND_USER_MAD=m
-CONFIG_INFINIBAND_USER_ACCESS=m
-CONFIG_INFINIBAND_MTHCA=m
-CONFIG_INFINIBAND_IPOIB=m
-CONFIG_INFINIBAND_IPOIB_CM=y
-CONFIG_INFINIBAND_SRP=m
-CONFIG_DMADEVICES=y
-CONFIG_EXT4_FS=m
-CONFIG_EXT4_FS_POSIX_ACL=y
-CONFIG_EXT4_FS_SECURITY=y
-CONFIG_QUOTA=y
-CONFIG_QFMT_V2=y
-CONFIG_AUTOFS4_FS=m
-CONFIG_UDF_FS=m
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=m
-CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_HFS_FS=m
-CONFIG_HFSPLUS_FS=m
-CONFIG_JFFS2_FS=y
-CONFIG_CRAMFS=m
-CONFIG_VXFS_FS=m
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=y
-CONFIG_ROOT_NFS=y
-CONFIG_CIFS=m
-CONFIG_CIFS_XATTR=y
-CONFIG_CIFS_POSIX=y
-CONFIG_NLS=y
-CONFIG_NLS_DEFAULT="utf8"
-CONFIG_NLS_CODEPAGE_437=y
-CONFIG_NLS_CODEPAGE_737=m
-CONFIG_NLS_CODEPAGE_775=m
-CONFIG_NLS_CODEPAGE_850=m
-CONFIG_NLS_CODEPAGE_852=m
-CONFIG_NLS_CODEPAGE_855=m
-CONFIG_NLS_CODEPAGE_857=m
-CONFIG_NLS_CODEPAGE_860=m
-CONFIG_NLS_CODEPAGE_861=m
-CONFIG_NLS_CODEPAGE_862=m
-CONFIG_NLS_CODEPAGE_863=m
-CONFIG_NLS_CODEPAGE_864=m
-CONFIG_NLS_CODEPAGE_865=m
-CONFIG_NLS_CODEPAGE_866=m
-CONFIG_NLS_CODEPAGE_869=m
-CONFIG_NLS_CODEPAGE_936=m
-CONFIG_NLS_CODEPAGE_950=m
-CONFIG_NLS_CODEPAGE_932=m
-CONFIG_NLS_CODEPAGE_949=m
-CONFIG_NLS_CODEPAGE_874=m
-CONFIG_NLS_ISO8859_8=m
-CONFIG_NLS_CODEPAGE_1250=m
-CONFIG_NLS_CODEPAGE_1251=m
-CONFIG_NLS_ASCII=y
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_ISO8859_2=m
-CONFIG_NLS_ISO8859_3=m
-CONFIG_NLS_ISO8859_4=m
-CONFIG_NLS_ISO8859_5=m
-CONFIG_NLS_ISO8859_6=m
-CONFIG_NLS_ISO8859_7=m
-CONFIG_NLS_ISO8859_9=m
-CONFIG_NLS_ISO8859_13=m
-CONFIG_NLS_ISO8859_14=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_NLS_KOI8_R=m
-CONFIG_NLS_KOI8_U=m
-CONFIG_CRC_CCITT=m
-CONFIG_CRC_T10DIF=m
-CONFIG_DEBUG_INFO=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_STACK_USAGE=y
-CONFIG_DEBUG_HIGHMEM=y
-CONFIG_DEBUG_STACKOVERFLOW=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_SPINLOCK=y
-CONFIG_BOOTX_TEXT=y
-CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_SECURITY=y
-CONFIG_SECURITY_NETWORK=y
-CONFIG_SECURITY_SELINUX=y
-CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-CONFIG_SECURITY_SELINUX_DISABLE=y
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_SHA1=y
-CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
diff --git a/arch/powerpc/platforms/embedded6xx/Kconfig b/arch/powerpc/platforms/embedded6xx/Kconfig
index 9fb2d5912c5aa..8ea16db5ff48a 100644
--- a/arch/powerpc/platforms/embedded6xx/Kconfig
+++ b/arch/powerpc/platforms/embedded6xx/Kconfig
@@ -48,16 +48,6 @@ config PPC_HOLLY
 	  Select PPC_HOLLY if configuring for an IBM 750GX/CL Eval
 	  Board with TSI108/9 bridge (Hickory/Holly)
 
-config PPC_C2K
-	bool "SBS/GEFanuc C2K board"
-	depends on EMBEDDED6xx
-	select MV64X60
-	select NOT_COHERENT_CACHE
-	select MTD_CFI_I4
-	help
-	  This option enables support for the GE Fanuc C2K board (formerly
-	  an SBS board).
-
 config MVME5100
 	bool "Motorola/Emerson MVME5100"
 	depends on EMBEDDED6xx
diff --git a/arch/powerpc/platforms/embedded6xx/Makefile b/arch/powerpc/platforms/embedded6xx/Makefile
index 12154e3257ad2..e656ae9f23c64 100644
--- a/arch/powerpc/platforms/embedded6xx/Makefile
+++ b/arch/powerpc/platforms/embedded6xx/Makefile
@@ -6,7 +6,6 @@ obj-$(CONFIG_MPC7448HPC2)	+= mpc7448_hpc2.o
 obj-$(CONFIG_LINKSTATION)	+= linkstation.o ls_uart.o
 obj-$(CONFIG_STORCENTER)	+= storcenter.o
 obj-$(CONFIG_PPC_HOLLY)		+= holly.o
-obj-$(CONFIG_PPC_C2K)		+= c2k.o
 obj-$(CONFIG_USBGECKO_UDBG)	+= usbgecko_udbg.o
 obj-$(CONFIG_GAMECUBE_COMMON)	+= flipper-pic.o
 obj-$(CONFIG_GAMECUBE)		+= gamecube.o
diff --git a/arch/powerpc/platforms/embedded6xx/c2k.c b/arch/powerpc/platforms/embedded6xx/c2k.c
deleted file mode 100644
index d19e4e759597f..0000000000000
--- a/arch/powerpc/platforms/embedded6xx/c2k.c
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Board setup routines for the GEFanuc C2K board
- *
- * Author: Remi Machet <rmachet@slac.stanford.edu>
- *
- * Originated from prpmc2800.c
- *
- * 2008 (c) Stanford University
- * 2007 (c) MontaVista, Software, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- */
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/seq_file.h>
-#include <linux/time.h>
-#include <linux/of.h>
-
-#include <asm/machdep.h>
-#include <asm/prom.h>
-#include <asm/time.h>
-
-#include <mm/mmu_decl.h>
-
-#include <sysdev/mv64x60.h>
-
-#define MV64x60_MPP_CNTL_0	0x0000
-#define MV64x60_MPP_CNTL_2	0x0008
-
-#define MV64x60_GPP_IO_CNTL	0x0000
-#define MV64x60_GPP_LEVEL_CNTL	0x0010
-#define MV64x60_GPP_VALUE_SET	0x0018
-
-static void __iomem *mv64x60_mpp_reg_base;
-static void __iomem *mv64x60_gpp_reg_base;
-
-static void __init c2k_setup_arch(void)
-{
-	struct device_node *np;
-	phys_addr_t paddr;
-	const unsigned int *reg;
-
-	/*
-	 * ioremap mpp and gpp registers in case they are later
-	 * needed by c2k_reset_board().
-	 */
-	np = of_find_compatible_node(NULL, NULL, "marvell,mv64360-mpp");
-	reg = of_get_property(np, "reg", NULL);
-	paddr = of_translate_address(np, reg);
-	of_node_put(np);
-	mv64x60_mpp_reg_base = ioremap(paddr, reg[1]);
-
-	np = of_find_compatible_node(NULL, NULL, "marvell,mv64360-gpp");
-	reg = of_get_property(np, "reg", NULL);
-	paddr = of_translate_address(np, reg);
-	of_node_put(np);
-	mv64x60_gpp_reg_base = ioremap(paddr, reg[1]);
-
-#ifdef CONFIG_PCI
-	mv64x60_pci_init();
-#endif
-}
-
-static void c2k_reset_board(void)
-{
-	u32 temp;
-
-	local_irq_disable();
-
-	temp = in_le32(mv64x60_mpp_reg_base + MV64x60_MPP_CNTL_0);
-	temp &= 0xFFFF0FFF;
-	out_le32(mv64x60_mpp_reg_base + MV64x60_MPP_CNTL_0, temp);
-
-	temp = in_le32(mv64x60_gpp_reg_base + MV64x60_GPP_LEVEL_CNTL);
-	temp |= 0x00000004;
-	out_le32(mv64x60_gpp_reg_base + MV64x60_GPP_LEVEL_CNTL, temp);
-
-	temp = in_le32(mv64x60_gpp_reg_base + MV64x60_GPP_IO_CNTL);
-	temp |= 0x00000004;
-	out_le32(mv64x60_gpp_reg_base + MV64x60_GPP_IO_CNTL, temp);
-
-	temp = in_le32(mv64x60_mpp_reg_base + MV64x60_MPP_CNTL_2);
-	temp &= 0xFFFF0FFF;
-	out_le32(mv64x60_mpp_reg_base + MV64x60_MPP_CNTL_2, temp);
-
-	temp = in_le32(mv64x60_gpp_reg_base + MV64x60_GPP_LEVEL_CNTL);
-	temp |= 0x00080000;
-	out_le32(mv64x60_gpp_reg_base + MV64x60_GPP_LEVEL_CNTL, temp);
-
-	temp = in_le32(mv64x60_gpp_reg_base + MV64x60_GPP_IO_CNTL);
-	temp |= 0x00080000;
-	out_le32(mv64x60_gpp_reg_base + MV64x60_GPP_IO_CNTL, temp);
-
-	out_le32(mv64x60_gpp_reg_base + MV64x60_GPP_VALUE_SET, 0x00080004);
-}
-
-static void __noreturn c2k_restart(char *cmd)
-{
-	c2k_reset_board();
-	msleep(100);
-	panic("restart failed\n");
-}
-
-#ifdef CONFIG_NOT_COHERENT_CACHE
-#define COHERENCY_SETTING "off"
-#else
-#define COHERENCY_SETTING "on"
-#endif
-
-void c2k_show_cpuinfo(struct seq_file *m)
-{
-	seq_printf(m, "Vendor\t\t: GEFanuc\n");
-	seq_printf(m, "coherency\t: %s\n", COHERENCY_SETTING);
-}
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init c2k_probe(void)
-{
-	if (!of_machine_is_compatible("GEFanuc,C2K"))
-		return 0;
-
-	printk(KERN_INFO "Detected a GEFanuc C2K board\n");
-
-	_set_L2CR(0);
-	_set_L2CR(L2CR_L2E | L2CR_L2PE | L2CR_L2I);
-
-	mv64x60_init_early();
-
-	return 1;
-}
-
-define_machine(c2k) {
-	.name			= "C2K",
-	.probe			= c2k_probe,
-	.setup_arch		= c2k_setup_arch,
-	.show_cpuinfo		= c2k_show_cpuinfo,
-	.init_IRQ		= mv64x60_init_irq,
-	.get_irq		= mv64x60_get_irq,
-	.restart		= c2k_restart,
-	.calibrate_decr		= generic_calibrate_decr,
-};

From 30f4bbe0472a9644d79a1624fde7f7ba4e44df68 Mon Sep 17 00:00:00 2001
From: Mark Greer <mgreer@animalcreek.com>
Date: Thu, 5 Apr 2018 18:17:17 -0700
Subject: [PATCH 240/251] powerpc/boot: Remove support for Marvell MPSC serial
 controller

There are no longer any platforms that use Marvell's MPSC serial
controller so remove its driver.

Signed-off-by: Mark Greer <mgreer@animalcreek.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/boot/Makefile |   2 +-
 arch/powerpc/boot/mpsc.c   | 169 -------------------------------------
 arch/powerpc/boot/ops.h    |   1 -
 arch/powerpc/boot/serial.c |   4 -
 4 files changed, 1 insertion(+), 175 deletions(-)
 delete mode 100644 arch/powerpc/boot/mpsc.c

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index b5620ef685577..523ad27e2d402 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -126,7 +126,7 @@ src-wlib-$(CONFIG_40x) += 4xx.c planetcore.c
 src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c
 src-wlib-$(CONFIG_PPC_8xx) += mpc8xx.c planetcore.c fsl-soc.c
 src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c
-src-wlib-$(CONFIG_EMBEDDED6xx) += mpsc.c mv64x60.c mv64x60_i2c.c ugecon.c fsl-soc.c
+src-wlib-$(CONFIG_EMBEDDED6xx) += mv64x60.c mv64x60_i2c.c ugecon.c fsl-soc.c
 src-wlib-$(CONFIG_XILINX_VIRTEX) += uartlite.c
 src-wlib-$(CONFIG_CPM) += cpm-serial.c
 
diff --git a/arch/powerpc/boot/mpsc.c b/arch/powerpc/boot/mpsc.c
deleted file mode 100644
index 425ad88cce8d9..0000000000000
--- a/arch/powerpc/boot/mpsc.c
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * MPSC/UART driver for the Marvell mv64360, mv64460, ...
- *
- * Author: Mark A. Greer <mgreer@mvista.com>
- *
- * 2007 (c) MontaVista Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- */
-
-#include <stdarg.h>
-#include <stddef.h>
-#include "types.h"
-#include "string.h"
-#include "stdio.h"
-#include "io.h"
-#include "ops.h"
-
-
-#define MPSC_CHR_1		0x000c
-
-#define MPSC_CHR_2		0x0010
-#define MPSC_CHR_2_TA		(1<<7)
-#define MPSC_CHR_2_TCS		(1<<9)
-#define MPSC_CHR_2_RA		(1<<23)
-#define MPSC_CHR_2_CRD		(1<<25)
-#define MPSC_CHR_2_EH		(1<<31)
-
-#define MPSC_CHR_4		0x0018
-#define MPSC_CHR_4_Z		(1<<29)
-
-#define MPSC_CHR_5		0x001c
-#define MPSC_CHR_5_CTL1_INTR	(1<<12)
-#define MPSC_CHR_5_CTL1_VALID	(1<<15)
-
-#define MPSC_CHR_10		0x0030
-
-#define MPSC_INTR_CAUSE		0x0000
-#define MPSC_INTR_CAUSE_RCC	(1<<6)
-#define MPSC_INTR_MASK		0x0080
-
-#define SDMA_SDCM		0x0008
-#define SDMA_SDCM_AR		(1<<15)
-#define SDMA_SDCM_AT		(1<<31)
-
-static volatile char *mpsc_base;
-static volatile char *mpscintr_base;
-static u32 chr1, chr2;
-
-static int mpsc_open(void)
-{
-	chr1 = in_le32((u32 *)(mpsc_base + MPSC_CHR_1)) & 0x00ff0000;
-	chr2 = in_le32((u32 *)(mpsc_base + MPSC_CHR_2)) & ~(MPSC_CHR_2_TA
-			| MPSC_CHR_2_TCS | MPSC_CHR_2_RA | MPSC_CHR_2_CRD
-			| MPSC_CHR_2_EH);
-	out_le32((u32 *)(mpsc_base + MPSC_CHR_4), MPSC_CHR_4_Z);
-	out_le32((u32 *)(mpsc_base + MPSC_CHR_5),
-			MPSC_CHR_5_CTL1_INTR | MPSC_CHR_5_CTL1_VALID);
-	out_le32((u32 *)(mpsc_base + MPSC_CHR_2), chr2 | MPSC_CHR_2_EH);
-	return 0;
-}
-
-static void mpsc_putc(unsigned char c)
-{
-	while (in_le32((u32 *)(mpsc_base + MPSC_CHR_2)) & MPSC_CHR_2_TCS);
-
-	out_le32((u32 *)(mpsc_base + MPSC_CHR_1), chr1 | c);
-	out_le32((u32 *)(mpsc_base + MPSC_CHR_2), chr2 | MPSC_CHR_2_TCS);
-}
-
-static unsigned char mpsc_getc(void)
-{
-	u32 cause = 0;
-	unsigned char c;
-
-	while (!(cause & MPSC_INTR_CAUSE_RCC))
-		cause = in_le32((u32 *)(mpscintr_base + MPSC_INTR_CAUSE));
-
-	c = in_8((u8 *)(mpsc_base + MPSC_CHR_10 + 2));
-	out_8((u8 *)(mpsc_base + MPSC_CHR_10 + 2), c);
-	out_le32((u32 *)(mpscintr_base + MPSC_INTR_CAUSE),
-			cause & ~MPSC_INTR_CAUSE_RCC);
-
-	return c;
-}
-
-static u8 mpsc_tstc(void)
-{
-	return (u8)((in_le32((u32 *)(mpscintr_base + MPSC_INTR_CAUSE))
-				& MPSC_INTR_CAUSE_RCC) != 0);
-}
-
-static void mpsc_stop_dma(volatile char *sdma_base)
-{
-	out_le32((u32 *)(mpsc_base + MPSC_CHR_2),MPSC_CHR_2_TA | MPSC_CHR_2_RA);
-	out_le32((u32 *)(sdma_base + SDMA_SDCM), SDMA_SDCM_AR | SDMA_SDCM_AT);
-
-	while ((in_le32((u32 *)(sdma_base + SDMA_SDCM))
-				& (SDMA_SDCM_AR | SDMA_SDCM_AT)) != 0)
-		udelay(100);
-}
-
-static volatile char *mpsc_get_virtreg_of_phandle(void *devp, char *prop)
-{
-	void *v;
-	int n;
-
-	n = getprop(devp, prop, &v, sizeof(v));
-	if (n != sizeof(v))
-		goto err_out;
-
-	devp = find_node_by_linuxphandle((u32)v);
-	if (devp == NULL)
-		goto err_out;
-
-	n = getprop(devp, "virtual-reg", &v, sizeof(v));
-	if (n == sizeof(v))
-		return v;
-
-err_out:
-	return NULL;
-}
-
-int mpsc_console_init(void *devp, struct serial_console_data *scdp)
-{
-	void *v;
-	int n, reg_set;
-	volatile char *sdma_base;
-
-	n = getprop(devp, "virtual-reg", &v, sizeof(v));
-	if (n != sizeof(v))
-		goto err_out;
-	mpsc_base = v;
-
-	sdma_base = mpsc_get_virtreg_of_phandle(devp, "sdma");
-	if (sdma_base == NULL)
-		goto err_out;
-
-	mpscintr_base = mpsc_get_virtreg_of_phandle(devp, "mpscintr");
-	if (mpscintr_base == NULL)
-		goto err_out;
-
-	n = getprop(devp, "cell-index", &v, sizeof(v));
-	if (n != sizeof(v))
-		goto err_out;
-	reg_set = (int)v;
-
-	mpscintr_base += (reg_set == 0) ? 0x4 : 0xc;
-
-	/* Make sure the mpsc ctlrs are shutdown */
-	out_le32((u32 *)(mpscintr_base + MPSC_INTR_CAUSE), 0);
-	out_le32((u32 *)(mpscintr_base + MPSC_INTR_CAUSE), 0);
-	out_le32((u32 *)(mpscintr_base + MPSC_INTR_MASK), 0);
-	out_le32((u32 *)(mpscintr_base + MPSC_INTR_MASK), 0);
-
-	mpsc_stop_dma(sdma_base);
-
-	scdp->open = mpsc_open;
-	scdp->putc = mpsc_putc;
-	scdp->getc = mpsc_getc;
-	scdp->tstc = mpsc_tstc;
-	scdp->close = NULL;
-
-	return 0;
-
-err_out:
-	return -1;
-}
diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h
index fad1862f4b2d3..cd043726ed88c 100644
--- a/arch/powerpc/boot/ops.h
+++ b/arch/powerpc/boot/ops.h
@@ -86,7 +86,6 @@ void start(void);
 void fdt_init(void *blob);
 int serial_console_init(void);
 int ns16550_console_init(void *devp, struct serial_console_data *scdp);
-int mpsc_console_init(void *devp, struct serial_console_data *scdp);
 int cpm_console_init(void *devp, struct serial_console_data *scdp);
 int mpc5200_psc_console_init(void *devp, struct serial_console_data *scdp);
 int uartlite_console_init(void *devp, struct serial_console_data *scdp);
diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c
index 88955095ec07d..48e3743faedfd 100644
--- a/arch/powerpc/boot/serial.c
+++ b/arch/powerpc/boot/serial.c
@@ -120,10 +120,6 @@ int serial_console_init(void)
 	if (dt_is_compatible(devp, "ns16550") ||
 	    dt_is_compatible(devp, "pnpPNP,501"))
 		rc = ns16550_console_init(devp, &serial_cd);
-#ifdef CONFIG_EMBEDDED6xx
-	else if (dt_is_compatible(devp, "marvell,mv64360-mpsc"))
-		rc = mpsc_console_init(devp, &serial_cd);
-#endif
 #ifdef CONFIG_CPM
 	else if (dt_is_compatible(devp, "fsl,cpm1-scc-uart") ||
 	         dt_is_compatible(devp, "fsl,cpm1-smc-uart") ||

From 6bf17a83e6e5980313f88ce3ed937894d9a7a956 Mon Sep 17 00:00:00 2001
From: Mark Greer <mgreer@animalcreek.com>
Date: Thu, 5 Apr 2018 18:17:18 -0700
Subject: [PATCH 241/251] powerpc/boot: Remove support for Marvell mv64x60 i2c
 controller

There are no longer any platforms that use Marvell's mv64x60's i2c
controller so remove its driver.

Signed-off-by: Mark Greer <mgreer@animalcreek.com>
Acked-by: Dale Farnsworth <dale@farnsworth.org>
Signed-off-by: Mark Greer &lt;<a href="mailto:mgreer@animalcreek.com">mgreer@animalcreek.com</a>&gt;<br>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/boot/Makefile      |   2 +-
 arch/powerpc/boot/mv64x60_i2c.c | 204 --------------------------------
 2 files changed, 1 insertion(+), 205 deletions(-)
 delete mode 100644 arch/powerpc/boot/mv64x60_i2c.c

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 523ad27e2d402..50acbf67b2642 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -126,7 +126,7 @@ src-wlib-$(CONFIG_40x) += 4xx.c planetcore.c
 src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c
 src-wlib-$(CONFIG_PPC_8xx) += mpc8xx.c planetcore.c fsl-soc.c
 src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c
-src-wlib-$(CONFIG_EMBEDDED6xx) += mv64x60.c mv64x60_i2c.c ugecon.c fsl-soc.c
+src-wlib-$(CONFIG_EMBEDDED6xx) += mv64x60.c ugecon.c fsl-soc.c
 src-wlib-$(CONFIG_XILINX_VIRTEX) += uartlite.c
 src-wlib-$(CONFIG_CPM) += cpm-serial.c
 
diff --git a/arch/powerpc/boot/mv64x60_i2c.c b/arch/powerpc/boot/mv64x60_i2c.c
deleted file mode 100644
index 52a3212b6638b..0000000000000
--- a/arch/powerpc/boot/mv64x60_i2c.c
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Bootloader version of the i2c driver for the MV64x60.
- *
- * Author: Dale Farnsworth <dfarnsworth@mvista.com>
- * Maintained by: Mark A. Greer <mgreer@mvista.com>
- *
- * 2003, 2007 (c) MontaVista, Software, Inc.  This file is licensed under
- * the terms of the GNU General Public License version 2.  This program is
- * licensed "as is" without any warranty of any kind, whether express or
- * implied.
- */
-
-#include <stdarg.h>
-#include <stddef.h>
-#include "types.h"
-#include "elf.h"
-#include "page.h"
-#include "string.h"
-#include "stdio.h"
-#include "io.h"
-#include "ops.h"
-#include "mv64x60.h"
-
-/* Register defines */
-#define MV64x60_I2C_REG_SLAVE_ADDR			0x00
-#define MV64x60_I2C_REG_DATA				0x04
-#define MV64x60_I2C_REG_CONTROL				0x08
-#define MV64x60_I2C_REG_STATUS				0x0c
-#define MV64x60_I2C_REG_BAUD				0x0c
-#define MV64x60_I2C_REG_EXT_SLAVE_ADDR			0x10
-#define MV64x60_I2C_REG_SOFT_RESET			0x1c
-
-#define MV64x60_I2C_CONTROL_ACK				0x04
-#define MV64x60_I2C_CONTROL_IFLG			0x08
-#define MV64x60_I2C_CONTROL_STOP			0x10
-#define MV64x60_I2C_CONTROL_START			0x20
-#define MV64x60_I2C_CONTROL_TWSIEN			0x40
-#define MV64x60_I2C_CONTROL_INTEN			0x80
-
-#define MV64x60_I2C_STATUS_BUS_ERR			0x00
-#define MV64x60_I2C_STATUS_MAST_START			0x08
-#define MV64x60_I2C_STATUS_MAST_REPEAT_START		0x10
-#define MV64x60_I2C_STATUS_MAST_WR_ADDR_ACK		0x18
-#define MV64x60_I2C_STATUS_MAST_WR_ADDR_NO_ACK		0x20
-#define MV64x60_I2C_STATUS_MAST_WR_ACK			0x28
-#define MV64x60_I2C_STATUS_MAST_WR_NO_ACK		0x30
-#define MV64x60_I2C_STATUS_MAST_LOST_ARB		0x38
-#define MV64x60_I2C_STATUS_MAST_RD_ADDR_ACK		0x40
-#define MV64x60_I2C_STATUS_MAST_RD_ADDR_NO_ACK		0x48
-#define MV64x60_I2C_STATUS_MAST_RD_DATA_ACK		0x50
-#define MV64x60_I2C_STATUS_MAST_RD_DATA_NO_ACK		0x58
-#define MV64x60_I2C_STATUS_MAST_WR_ADDR_2_ACK		0xd0
-#define MV64x60_I2C_STATUS_MAST_WR_ADDR_2_NO_ACK	0xd8
-#define MV64x60_I2C_STATUS_MAST_RD_ADDR_2_ACK		0xe0
-#define MV64x60_I2C_STATUS_MAST_RD_ADDR_2_NO_ACK	0xe8
-#define MV64x60_I2C_STATUS_NO_STATUS			0xf8
-
-static u8 *ctlr_base;
-
-static int mv64x60_i2c_wait_for_status(int wanted)
-{
-	int i;
-	int status;
-
-	for (i=0; i<1000; i++) {
-		udelay(10);
-		status = in_le32((u32 *)(ctlr_base + MV64x60_I2C_REG_STATUS))
-			& 0xff;
-		if (status == wanted)
-			return status;
-	}
-	return -status;
-}
-
-static int mv64x60_i2c_control(int control, int status)
-{
-	out_le32((u32 *)(ctlr_base + MV64x60_I2C_REG_CONTROL), control & 0xff);
-	return mv64x60_i2c_wait_for_status(status);
-}
-
-static int mv64x60_i2c_read_byte(int control, int status)
-{
-	out_le32((u32 *)(ctlr_base + MV64x60_I2C_REG_CONTROL), control & 0xff);
-	if (mv64x60_i2c_wait_for_status(status) < 0)
-		return -1;
-	return in_le32((u32 *)(ctlr_base + MV64x60_I2C_REG_DATA)) & 0xff;
-}
-
-static int mv64x60_i2c_write_byte(int data, int control, int status)
-{
-	out_le32((u32 *)(ctlr_base + MV64x60_I2C_REG_DATA), data & 0xff);
-	out_le32((u32 *)(ctlr_base + MV64x60_I2C_REG_CONTROL), control & 0xff);
-	return mv64x60_i2c_wait_for_status(status);
-}
-
-int mv64x60_i2c_read(u32 devaddr, u8 *buf, u32 offset, u32 offset_size,
-		 u32 count)
-{
-	int i;
-	int data;
-	int control;
-	int status;
-
-	if (ctlr_base == NULL)
-		return -1;
-
-	/* send reset */
-	out_le32((u32 *)(ctlr_base + MV64x60_I2C_REG_SOFT_RESET), 0);
-	out_le32((u32 *)(ctlr_base + MV64x60_I2C_REG_SLAVE_ADDR), 0);
-	out_le32((u32 *)(ctlr_base + MV64x60_I2C_REG_EXT_SLAVE_ADDR), 0);
-	out_le32((u32 *)(ctlr_base + MV64x60_I2C_REG_BAUD), (4 << 3) | 0x4);
-
-	if (mv64x60_i2c_control(MV64x60_I2C_CONTROL_TWSIEN,
-				MV64x60_I2C_STATUS_NO_STATUS) < 0)
-		return -1;
-
-	/* send start */
-	control = MV64x60_I2C_CONTROL_START | MV64x60_I2C_CONTROL_TWSIEN;
-	status = MV64x60_I2C_STATUS_MAST_START;
-	if (mv64x60_i2c_control(control, status) < 0)
-		return -1;
-
-	/* select device for writing */
-	data = devaddr & ~0x1;
-	control = MV64x60_I2C_CONTROL_TWSIEN;
-	status = MV64x60_I2C_STATUS_MAST_WR_ADDR_ACK;
-	if (mv64x60_i2c_write_byte(data, control, status) < 0)
-		return -1;
-
-	/* send offset of data */
-	control = MV64x60_I2C_CONTROL_TWSIEN;
-	status = MV64x60_I2C_STATUS_MAST_WR_ACK;
-	if (offset_size > 1) {
-		if (mv64x60_i2c_write_byte(offset >> 8, control, status) < 0)
-			return -1;
-	}
-	if (mv64x60_i2c_write_byte(offset, control, status) < 0)
-		return -1;
-
-	/* resend start */
-	control = MV64x60_I2C_CONTROL_START | MV64x60_I2C_CONTROL_TWSIEN;
-	status = MV64x60_I2C_STATUS_MAST_REPEAT_START;
-	if (mv64x60_i2c_control(control, status) < 0)
-		return -1;
-
-	/* select device for reading */
-	data = devaddr | 0x1;
-	control = MV64x60_I2C_CONTROL_TWSIEN;
-	status = MV64x60_I2C_STATUS_MAST_RD_ADDR_ACK;
-	if (mv64x60_i2c_write_byte(data, control, status) < 0)
-		return -1;
-
-	/* read all but last byte of data */
-	control = MV64x60_I2C_CONTROL_ACK | MV64x60_I2C_CONTROL_TWSIEN;
-	status = MV64x60_I2C_STATUS_MAST_RD_DATA_ACK;
-
-	for (i=1; i<count; i++) {
-		data = mv64x60_i2c_read_byte(control, status);
-		if (data < 0) {
-			printf("errors on iteration %d\n", i);
-			return -1;
-		}
-		*buf++ = data;
-	}
-
-	/* read last byte of data */
-	control = MV64x60_I2C_CONTROL_TWSIEN;
-	status = MV64x60_I2C_STATUS_MAST_RD_DATA_NO_ACK;
-	data = mv64x60_i2c_read_byte(control, status);
-	if (data < 0)
-		return -1;
-	*buf++ = data;
-
-	/* send stop */
-	control = MV64x60_I2C_CONTROL_STOP | MV64x60_I2C_CONTROL_TWSIEN;
-	status = MV64x60_I2C_STATUS_NO_STATUS;
-	if (mv64x60_i2c_control(control, status) < 0)
-		return -1;
-
-	return count;
-}
-
-int mv64x60_i2c_open(void)
-{
-	u32 v;
-	void *devp;
-
-	devp = find_node_by_compatible(NULL, "marvell,mv64360-i2c");
-	if (devp == NULL)
-		goto err_out;
-	if (getprop(devp, "virtual-reg", &v, sizeof(v)) != sizeof(v))
-		goto err_out;
-
-	ctlr_base = (u8 *)v;
-	return 0;
-
-err_out:
-	return -1;
-}
-
-void mv64x60_i2c_close(void)
-{
-	ctlr_base = NULL;
-}

From 297f83140180b46e8bc7698d7caca560088eaaec Mon Sep 17 00:00:00 2001
From: Mark Greer <mgreer@animalcreek.com>
Date: Thu, 5 Apr 2018 18:17:19 -0700
Subject: [PATCH 242/251] powerpc/boot: Remove core support for Marvell mv64x60
 hostbridges

There are no longer any platforms that use Marvell's mv64x60
hostbridges so remove the supporting boot code.

Signed-off-by: Mark Greer <mgreer@animalcreek.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/boot/Makefile  |   2 +-
 arch/powerpc/boot/mv64x60.c | 581 ------------------------------------
 arch/powerpc/boot/mv64x60.h |  70 -----
 3 files changed, 1 insertion(+), 652 deletions(-)
 delete mode 100644 arch/powerpc/boot/mv64x60.c
 delete mode 100644 arch/powerpc/boot/mv64x60.h

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 50acbf67b2642..deea20c334df4 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -126,7 +126,7 @@ src-wlib-$(CONFIG_40x) += 4xx.c planetcore.c
 src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c
 src-wlib-$(CONFIG_PPC_8xx) += mpc8xx.c planetcore.c fsl-soc.c
 src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c
-src-wlib-$(CONFIG_EMBEDDED6xx) += mv64x60.c ugecon.c fsl-soc.c
+src-wlib-$(CONFIG_EMBEDDED6xx) += ugecon.c fsl-soc.c
 src-wlib-$(CONFIG_XILINX_VIRTEX) += uartlite.c
 src-wlib-$(CONFIG_CPM) += cpm-serial.c
 
diff --git a/arch/powerpc/boot/mv64x60.c b/arch/powerpc/boot/mv64x60.c
deleted file mode 100644
index d9bb302b91d28..0000000000000
--- a/arch/powerpc/boot/mv64x60.c
+++ /dev/null
@@ -1,581 +0,0 @@
-/*
- * Marvell hostbridge routines
- *
- * Author: Mark A. Greer <source@mvista.com>
- *
- * 2004, 2005, 2007 (c) MontaVista Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- */
-
-#include <stdarg.h>
-#include <stddef.h>
-#include "types.h"
-#include "elf.h"
-#include "page.h"
-#include "string.h"
-#include "stdio.h"
-#include "io.h"
-#include "ops.h"
-#include "mv64x60.h"
-
-#define PCI_DEVFN(slot,func)	((((slot) & 0x1f) << 3) | ((func) & 0x07))
-
-#define MV64x60_CPU2MEM_WINDOWS			4
-#define MV64x60_CPU2MEM_0_BASE			0x0008
-#define MV64x60_CPU2MEM_0_SIZE			0x0010
-#define MV64x60_CPU2MEM_1_BASE			0x0208
-#define MV64x60_CPU2MEM_1_SIZE			0x0210
-#define MV64x60_CPU2MEM_2_BASE			0x0018
-#define MV64x60_CPU2MEM_2_SIZE			0x0020
-#define MV64x60_CPU2MEM_3_BASE			0x0218
-#define MV64x60_CPU2MEM_3_SIZE			0x0220
-
-#define MV64x60_ENET2MEM_BAR_ENABLE		0x2290
-#define MV64x60_ENET2MEM_0_BASE			0x2200
-#define MV64x60_ENET2MEM_0_SIZE			0x2204
-#define MV64x60_ENET2MEM_1_BASE			0x2208
-#define MV64x60_ENET2MEM_1_SIZE			0x220c
-#define MV64x60_ENET2MEM_2_BASE			0x2210
-#define MV64x60_ENET2MEM_2_SIZE			0x2214
-#define MV64x60_ENET2MEM_3_BASE			0x2218
-#define MV64x60_ENET2MEM_3_SIZE			0x221c
-#define MV64x60_ENET2MEM_4_BASE			0x2220
-#define MV64x60_ENET2MEM_4_SIZE			0x2224
-#define MV64x60_ENET2MEM_5_BASE			0x2228
-#define MV64x60_ENET2MEM_5_SIZE			0x222c
-#define MV64x60_ENET2MEM_ACC_PROT_0		0x2294
-#define MV64x60_ENET2MEM_ACC_PROT_1		0x2298
-#define MV64x60_ENET2MEM_ACC_PROT_2		0x229c
-
-#define MV64x60_MPSC2MEM_BAR_ENABLE		0xf250
-#define MV64x60_MPSC2MEM_0_BASE			0xf200
-#define MV64x60_MPSC2MEM_0_SIZE			0xf204
-#define MV64x60_MPSC2MEM_1_BASE			0xf208
-#define MV64x60_MPSC2MEM_1_SIZE			0xf20c
-#define MV64x60_MPSC2MEM_2_BASE			0xf210
-#define MV64x60_MPSC2MEM_2_SIZE			0xf214
-#define MV64x60_MPSC2MEM_3_BASE			0xf218
-#define MV64x60_MPSC2MEM_3_SIZE			0xf21c
-#define MV64x60_MPSC_0_REMAP			0xf240
-#define MV64x60_MPSC_1_REMAP			0xf244
-#define MV64x60_MPSC2MEM_ACC_PROT_0		0xf254
-#define MV64x60_MPSC2MEM_ACC_PROT_1		0xf258
-#define MV64x60_MPSC2REGS_BASE			0xf25c
-
-#define MV64x60_IDMA2MEM_BAR_ENABLE		0x0a80
-#define MV64x60_IDMA2MEM_0_BASE			0x0a00
-#define MV64x60_IDMA2MEM_0_SIZE			0x0a04
-#define MV64x60_IDMA2MEM_1_BASE			0x0a08
-#define MV64x60_IDMA2MEM_1_SIZE			0x0a0c
-#define MV64x60_IDMA2MEM_2_BASE			0x0a10
-#define MV64x60_IDMA2MEM_2_SIZE			0x0a14
-#define MV64x60_IDMA2MEM_3_BASE			0x0a18
-#define MV64x60_IDMA2MEM_3_SIZE			0x0a1c
-#define MV64x60_IDMA2MEM_4_BASE			0x0a20
-#define MV64x60_IDMA2MEM_4_SIZE			0x0a24
-#define MV64x60_IDMA2MEM_5_BASE			0x0a28
-#define MV64x60_IDMA2MEM_5_SIZE			0x0a2c
-#define MV64x60_IDMA2MEM_6_BASE			0x0a30
-#define MV64x60_IDMA2MEM_6_SIZE			0x0a34
-#define MV64x60_IDMA2MEM_7_BASE			0x0a38
-#define MV64x60_IDMA2MEM_7_SIZE			0x0a3c
-#define MV64x60_IDMA2MEM_ACC_PROT_0		0x0a70
-#define MV64x60_IDMA2MEM_ACC_PROT_1		0x0a74
-#define MV64x60_IDMA2MEM_ACC_PROT_2		0x0a78
-#define MV64x60_IDMA2MEM_ACC_PROT_3		0x0a7c
-
-#define MV64x60_PCI_ACC_CNTL_WINDOWS		6
-#define MV64x60_PCI0_PCI_DECODE_CNTL		0x0d3c
-#define MV64x60_PCI1_PCI_DECODE_CNTL		0x0dbc
-
-#define MV64x60_PCI0_BAR_ENABLE			0x0c3c
-#define MV64x60_PCI02MEM_0_SIZE			0x0c08
-#define MV64x60_PCI0_ACC_CNTL_0_BASE_LO		0x1e00
-#define MV64x60_PCI0_ACC_CNTL_0_BASE_HI		0x1e04
-#define MV64x60_PCI0_ACC_CNTL_0_SIZE		0x1e08
-#define MV64x60_PCI0_ACC_CNTL_1_BASE_LO		0x1e10
-#define MV64x60_PCI0_ACC_CNTL_1_BASE_HI		0x1e14
-#define MV64x60_PCI0_ACC_CNTL_1_SIZE		0x1e18
-#define MV64x60_PCI0_ACC_CNTL_2_BASE_LO		0x1e20
-#define MV64x60_PCI0_ACC_CNTL_2_BASE_HI		0x1e24
-#define MV64x60_PCI0_ACC_CNTL_2_SIZE		0x1e28
-#define MV64x60_PCI0_ACC_CNTL_3_BASE_LO		0x1e30
-#define MV64x60_PCI0_ACC_CNTL_3_BASE_HI		0x1e34
-#define MV64x60_PCI0_ACC_CNTL_3_SIZE		0x1e38
-#define MV64x60_PCI0_ACC_CNTL_4_BASE_LO		0x1e40
-#define MV64x60_PCI0_ACC_CNTL_4_BASE_HI		0x1e44
-#define MV64x60_PCI0_ACC_CNTL_4_SIZE		0x1e48
-#define MV64x60_PCI0_ACC_CNTL_5_BASE_LO		0x1e50
-#define MV64x60_PCI0_ACC_CNTL_5_BASE_HI		0x1e54
-#define MV64x60_PCI0_ACC_CNTL_5_SIZE		0x1e58
-
-#define MV64x60_PCI1_BAR_ENABLE			0x0cbc
-#define MV64x60_PCI12MEM_0_SIZE			0x0c88
-#define MV64x60_PCI1_ACC_CNTL_0_BASE_LO		0x1e80
-#define MV64x60_PCI1_ACC_CNTL_0_BASE_HI		0x1e84
-#define MV64x60_PCI1_ACC_CNTL_0_SIZE		0x1e88
-#define MV64x60_PCI1_ACC_CNTL_1_BASE_LO		0x1e90
-#define MV64x60_PCI1_ACC_CNTL_1_BASE_HI		0x1e94
-#define MV64x60_PCI1_ACC_CNTL_1_SIZE		0x1e98
-#define MV64x60_PCI1_ACC_CNTL_2_BASE_LO		0x1ea0
-#define MV64x60_PCI1_ACC_CNTL_2_BASE_HI		0x1ea4
-#define MV64x60_PCI1_ACC_CNTL_2_SIZE		0x1ea8
-#define MV64x60_PCI1_ACC_CNTL_3_BASE_LO		0x1eb0
-#define MV64x60_PCI1_ACC_CNTL_3_BASE_HI		0x1eb4
-#define MV64x60_PCI1_ACC_CNTL_3_SIZE		0x1eb8
-#define MV64x60_PCI1_ACC_CNTL_4_BASE_LO		0x1ec0
-#define MV64x60_PCI1_ACC_CNTL_4_BASE_HI		0x1ec4
-#define MV64x60_PCI1_ACC_CNTL_4_SIZE		0x1ec8
-#define MV64x60_PCI1_ACC_CNTL_5_BASE_LO		0x1ed0
-#define MV64x60_PCI1_ACC_CNTL_5_BASE_HI		0x1ed4
-#define MV64x60_PCI1_ACC_CNTL_5_SIZE		0x1ed8
-
-#define MV64x60_CPU2PCI_SWAP_NONE		0x01000000
-
-#define MV64x60_CPU2PCI0_IO_BASE		0x0048
-#define MV64x60_CPU2PCI0_IO_SIZE		0x0050
-#define MV64x60_CPU2PCI0_IO_REMAP		0x00f0
-#define MV64x60_CPU2PCI0_MEM_0_BASE		0x0058
-#define MV64x60_CPU2PCI0_MEM_0_SIZE		0x0060
-#define MV64x60_CPU2PCI0_MEM_0_REMAP_LO		0x00f8
-#define MV64x60_CPU2PCI0_MEM_0_REMAP_HI		0x0320
-
-#define MV64x60_CPU2PCI1_IO_BASE		0x0090
-#define MV64x60_CPU2PCI1_IO_SIZE		0x0098
-#define MV64x60_CPU2PCI1_IO_REMAP		0x0108
-#define MV64x60_CPU2PCI1_MEM_0_BASE		0x00a0
-#define MV64x60_CPU2PCI1_MEM_0_SIZE		0x00a8
-#define MV64x60_CPU2PCI1_MEM_0_REMAP_LO		0x0110
-#define MV64x60_CPU2PCI1_MEM_0_REMAP_HI		0x0340
-
-struct mv64x60_mem_win {
-	u32 hi;
-	u32 lo;
-	u32 size;
-};
-
-struct mv64x60_pci_win {
-	u32 fcn;
-	u32 hi;
-	u32 lo;
-	u32 size;
-};
-
-/* PCI config access routines */
-struct {
-	u32 addr;
-	u32 data;
-} static mv64x60_pci_cfgio[2] = {
-	{ /* hose 0 */
-		.addr	= 0xcf8,
-		.data	= 0xcfc,
-	},
-	{ /* hose 1 */
-		.addr	= 0xc78,
-		.data	= 0xc7c,
-	}
-};
-
-u32 mv64x60_cfg_read(u8 *bridge_base, u8 hose, u8 bus, u8 devfn, u8 offset)
-{
-	out_le32((u32 *)(bridge_base + mv64x60_pci_cfgio[hose].addr),
-			(1 << 31) | (bus << 16) | (devfn << 8) | offset);
-	return in_le32((u32 *)(bridge_base + mv64x60_pci_cfgio[hose].data));
-}
-
-void mv64x60_cfg_write(u8 *bridge_base, u8 hose, u8 bus, u8 devfn, u8 offset,
-		u32 val)
-{
-	out_le32((u32 *)(bridge_base + mv64x60_pci_cfgio[hose].addr),
-			(1 << 31) | (bus << 16) | (devfn << 8) | offset);
-	out_le32((u32 *)(bridge_base + mv64x60_pci_cfgio[hose].data), val);
-}
-
-/* I/O ctlr -> system memory setup */
-static struct mv64x60_mem_win mv64x60_cpu2mem[MV64x60_CPU2MEM_WINDOWS] = {
-	{
-		.lo	= MV64x60_CPU2MEM_0_BASE,
-		.size	= MV64x60_CPU2MEM_0_SIZE,
-	},
-	{
-		.lo	= MV64x60_CPU2MEM_1_BASE,
-		.size	= MV64x60_CPU2MEM_1_SIZE,
-	},
-	{
-		.lo	= MV64x60_CPU2MEM_2_BASE,
-		.size	= MV64x60_CPU2MEM_2_SIZE,
-	},
-	{
-		.lo	= MV64x60_CPU2MEM_3_BASE,
-		.size	= MV64x60_CPU2MEM_3_SIZE,
-	},
-};
-
-static struct mv64x60_mem_win mv64x60_enet2mem[MV64x60_CPU2MEM_WINDOWS] = {
-	{
-		.lo	= MV64x60_ENET2MEM_0_BASE,
-		.size	= MV64x60_ENET2MEM_0_SIZE,
-	},
-	{
-		.lo	= MV64x60_ENET2MEM_1_BASE,
-		.size	= MV64x60_ENET2MEM_1_SIZE,
-	},
-	{
-		.lo	= MV64x60_ENET2MEM_2_BASE,
-		.size	= MV64x60_ENET2MEM_2_SIZE,
-	},
-	{
-		.lo	= MV64x60_ENET2MEM_3_BASE,
-		.size	= MV64x60_ENET2MEM_3_SIZE,
-	},
-};
-
-static struct mv64x60_mem_win mv64x60_mpsc2mem[MV64x60_CPU2MEM_WINDOWS] = {
-	{
-		.lo	= MV64x60_MPSC2MEM_0_BASE,
-		.size	= MV64x60_MPSC2MEM_0_SIZE,
-	},
-	{
-		.lo	= MV64x60_MPSC2MEM_1_BASE,
-		.size	= MV64x60_MPSC2MEM_1_SIZE,
-	},
-	{
-		.lo	= MV64x60_MPSC2MEM_2_BASE,
-		.size	= MV64x60_MPSC2MEM_2_SIZE,
-	},
-	{
-		.lo	= MV64x60_MPSC2MEM_3_BASE,
-		.size	= MV64x60_MPSC2MEM_3_SIZE,
-	},
-};
-
-static struct mv64x60_mem_win mv64x60_idma2mem[MV64x60_CPU2MEM_WINDOWS] = {
-	{
-		.lo	= MV64x60_IDMA2MEM_0_BASE,
-		.size	= MV64x60_IDMA2MEM_0_SIZE,
-	},
-	{
-		.lo	= MV64x60_IDMA2MEM_1_BASE,
-		.size	= MV64x60_IDMA2MEM_1_SIZE,
-	},
-	{
-		.lo	= MV64x60_IDMA2MEM_2_BASE,
-		.size	= MV64x60_IDMA2MEM_2_SIZE,
-	},
-	{
-		.lo	= MV64x60_IDMA2MEM_3_BASE,
-		.size	= MV64x60_IDMA2MEM_3_SIZE,
-	},
-};
-
-static u32 mv64x60_dram_selects[MV64x60_CPU2MEM_WINDOWS] = {0xe,0xd,0xb,0x7};
-
-/*
- * ENET, MPSC, and IDMA ctlrs on the MV64x60 have separate windows that
- * must be set up so that the respective ctlr can access system memory.
- * Configure them to be same as cpu->memory windows.
- */
-void mv64x60_config_ctlr_windows(u8 *bridge_base, u8 *bridge_pbase,
-		u8 is_coherent)
-{
-	u32 i, base, size, enables, prot = 0, snoop_bits = 0;
-
-	/* Disable ctlr->mem windows */
-	out_le32((u32 *)(bridge_base + MV64x60_ENET2MEM_BAR_ENABLE), 0x3f);
-	out_le32((u32 *)(bridge_base + MV64x60_MPSC2MEM_BAR_ENABLE), 0xf);
-	out_le32((u32 *)(bridge_base + MV64x60_ENET2MEM_BAR_ENABLE), 0xff);
-
-	if (is_coherent)
-		snoop_bits = 0x2 << 12; /* Writeback */
-
-	enables = in_le32((u32 *)(bridge_base + MV64x60_CPU_BAR_ENABLE)) & 0xf;
-
-	for (i=0; i<MV64x60_CPU2MEM_WINDOWS; i++) {
-		if (enables & (1 << i)) /* Set means disabled */
-			continue;
-
-		base = in_le32((u32 *)(bridge_base + mv64x60_cpu2mem[i].lo))
-			<< 16;
-		base |= snoop_bits | (mv64x60_dram_selects[i] << 8);
-		size = in_le32((u32 *)(bridge_base + mv64x60_cpu2mem[i].size))
-			<< 16;
-		prot |= (0x3 << (i << 1)); /* RW access */
-
-		out_le32((u32 *)(bridge_base + mv64x60_enet2mem[i].lo), base);
-		out_le32((u32 *)(bridge_base + mv64x60_enet2mem[i].size), size);
-		out_le32((u32 *)(bridge_base + mv64x60_mpsc2mem[i].lo), base);
-		out_le32((u32 *)(bridge_base + mv64x60_mpsc2mem[i].size), size);
-		out_le32((u32 *)(bridge_base + mv64x60_idma2mem[i].lo), base);
-		out_le32((u32 *)(bridge_base + mv64x60_idma2mem[i].size), size);
-	}
-
-	out_le32((u32 *)(bridge_base + MV64x60_ENET2MEM_ACC_PROT_0), prot);
-	out_le32((u32 *)(bridge_base + MV64x60_ENET2MEM_ACC_PROT_1), prot);
-	out_le32((u32 *)(bridge_base + MV64x60_ENET2MEM_ACC_PROT_2), prot);
-	out_le32((u32 *)(bridge_base + MV64x60_MPSC2MEM_ACC_PROT_0), prot);
-	out_le32((u32 *)(bridge_base + MV64x60_MPSC2MEM_ACC_PROT_1), prot);
-	out_le32((u32 *)(bridge_base + MV64x60_IDMA2MEM_ACC_PROT_0), prot);
-	out_le32((u32 *)(bridge_base + MV64x60_IDMA2MEM_ACC_PROT_1), prot);
-	out_le32((u32 *)(bridge_base + MV64x60_IDMA2MEM_ACC_PROT_2), prot);
-	out_le32((u32 *)(bridge_base + MV64x60_IDMA2MEM_ACC_PROT_3), prot);
-
-	/* Set mpsc->bridge's reg window to the bridge's internal registers. */
-	out_le32((u32 *)(bridge_base + MV64x60_MPSC2REGS_BASE),
-			(u32)bridge_pbase);
-
-	out_le32((u32 *)(bridge_base + MV64x60_ENET2MEM_BAR_ENABLE), enables);
-	out_le32((u32 *)(bridge_base + MV64x60_MPSC2MEM_BAR_ENABLE), enables);
-	out_le32((u32 *)(bridge_base + MV64x60_IDMA2MEM_BAR_ENABLE), enables);
-}
-
-/* PCI MEM -> system memory, et. al. setup */
-static struct mv64x60_pci_win mv64x60_pci2mem[2] = {
-	{ /* hose 0 */
-		.fcn	= 0,
-		.hi	= 0x14,
-		.lo	= 0x10,
-		.size	= MV64x60_PCI02MEM_0_SIZE,
-	},
-	{ /* hose 1 */
-		.fcn	= 0,
-		.hi	= 0x94,
-		.lo	= 0x90,
-		.size	= MV64x60_PCI12MEM_0_SIZE,
-	},
-};
-
-static struct
-mv64x60_mem_win mv64x60_pci_acc[2][MV64x60_PCI_ACC_CNTL_WINDOWS] = {
-	{ /* hose 0 */
-		{
-			.hi	= MV64x60_PCI0_ACC_CNTL_0_BASE_HI,
-			.lo	= MV64x60_PCI0_ACC_CNTL_0_BASE_LO,
-			.size	= MV64x60_PCI0_ACC_CNTL_0_SIZE,
-		},
-		{
-			.hi	= MV64x60_PCI0_ACC_CNTL_1_BASE_HI,
-			.lo	= MV64x60_PCI0_ACC_CNTL_1_BASE_LO,
-			.size	= MV64x60_PCI0_ACC_CNTL_1_SIZE,
-		},
-		{
-			.hi	= MV64x60_PCI0_ACC_CNTL_2_BASE_HI,
-			.lo	= MV64x60_PCI0_ACC_CNTL_2_BASE_LO,
-			.size	= MV64x60_PCI0_ACC_CNTL_2_SIZE,
-		},
-		{
-			.hi	= MV64x60_PCI0_ACC_CNTL_3_BASE_HI,
-			.lo	= MV64x60_PCI0_ACC_CNTL_3_BASE_LO,
-			.size	= MV64x60_PCI0_ACC_CNTL_3_SIZE,
-		},
-	},
-	{ /* hose 1 */
-		{
-			.hi	= MV64x60_PCI1_ACC_CNTL_0_BASE_HI,
-			.lo	= MV64x60_PCI1_ACC_CNTL_0_BASE_LO,
-			.size	= MV64x60_PCI1_ACC_CNTL_0_SIZE,
-		},
-		{
-			.hi	= MV64x60_PCI1_ACC_CNTL_1_BASE_HI,
-			.lo	= MV64x60_PCI1_ACC_CNTL_1_BASE_LO,
-			.size	= MV64x60_PCI1_ACC_CNTL_1_SIZE,
-		},
-		{
-			.hi	= MV64x60_PCI1_ACC_CNTL_2_BASE_HI,
-			.lo	= MV64x60_PCI1_ACC_CNTL_2_BASE_LO,
-			.size	= MV64x60_PCI1_ACC_CNTL_2_SIZE,
-		},
-		{
-			.hi	= MV64x60_PCI1_ACC_CNTL_3_BASE_HI,
-			.lo	= MV64x60_PCI1_ACC_CNTL_3_BASE_LO,
-			.size	= MV64x60_PCI1_ACC_CNTL_3_SIZE,
-		},
-	},
-};
-
-static struct mv64x60_mem_win mv64x60_pci2reg[2] = {
-	{
-		.hi	= 0x24,
-		.lo	= 0x20,
-		.size	= 0,
-	},
-	{
-		.hi	= 0xa4,
-		.lo	= 0xa0,
-		.size	= 0,
-	},
-};
-
-/* Only need to use 1 window (per hose) to get access to all of system memory */
-void mv64x60_config_pci_windows(u8 *bridge_base, u8 *bridge_pbase, u8 hose,
-		u8 bus, u32 mem_size, u32 acc_bits)
-{
-	u32 i, offset, bar_enable, enables;
-
-	/* Disable all windows but PCI MEM -> Bridge's regs window */
-	enables = ~(1 << 9);
-	bar_enable = hose ? MV64x60_PCI1_BAR_ENABLE : MV64x60_PCI0_BAR_ENABLE;
-	out_le32((u32 *)(bridge_base + bar_enable), enables);
-
-	for (i=0; i<MV64x60_PCI_ACC_CNTL_WINDOWS; i++)
-		out_le32((u32 *)(bridge_base + mv64x60_pci_acc[hose][i].lo), 0);
-
-	/* If mem_size is 0, leave windows disabled */
-	if (mem_size == 0)
-		return;
-
-	/* Cause automatic updates of PCI remap regs */
-	offset = hose ?
-		MV64x60_PCI1_PCI_DECODE_CNTL : MV64x60_PCI0_PCI_DECODE_CNTL;
-	i = in_le32((u32 *)(bridge_base + offset));
-	out_le32((u32 *)(bridge_base + offset), i & ~0x1);
-
-	mem_size = (mem_size - 1) & 0xfffff000;
-
-	/* Map PCI MEM addr 0 -> System Mem addr 0 */
-	mv64x60_cfg_write(bridge_base, hose, bus,
-			PCI_DEVFN(0, mv64x60_pci2mem[hose].fcn),
-			mv64x60_pci2mem[hose].hi, 0);
-	mv64x60_cfg_write(bridge_base, hose, bus,
-			PCI_DEVFN(0, mv64x60_pci2mem[hose].fcn),
-			mv64x60_pci2mem[hose].lo, 0);
-	out_le32((u32 *)(bridge_base + mv64x60_pci2mem[hose].size),mem_size);
-
-	acc_bits |= MV64x60_PCI_ACC_CNTL_ENABLE;
-	out_le32((u32 *)(bridge_base + mv64x60_pci_acc[hose][0].hi), 0);
-	out_le32((u32 *)(bridge_base + mv64x60_pci_acc[hose][0].lo), acc_bits);
-	out_le32((u32 *)(bridge_base + mv64x60_pci_acc[hose][0].size),mem_size);
-
-	/* Set PCI MEM->bridge's reg window to where they are in CPU mem map */
-	i = (u32)bridge_base;
-	i &= 0xffff0000;
-	i |= (0x2 << 1);
-	mv64x60_cfg_write(bridge_base, hose, bus, PCI_DEVFN(0,0),
-			mv64x60_pci2reg[hose].hi, 0);
-	mv64x60_cfg_write(bridge_base, hose, bus, PCI_DEVFN(0,0),
-			mv64x60_pci2reg[hose].lo, i);
-
-	enables &= ~0x1; /* Enable PCI MEM -> System Mem window 0 */
-	out_le32((u32 *)(bridge_base + bar_enable), enables);
-}
-
-/* CPU -> PCI I/O & MEM setup */
-struct mv64x60_cpu2pci_win mv64x60_cpu2pci_io[2] = {
-	{ /* hose 0 */
-		.lo		= MV64x60_CPU2PCI0_IO_BASE,
-		.size		= MV64x60_CPU2PCI0_IO_SIZE,
-		.remap_hi	= 0,
-		.remap_lo	= MV64x60_CPU2PCI0_IO_REMAP,
-	},
-	{ /* hose 1 */
-		.lo		= MV64x60_CPU2PCI1_IO_BASE,
-		.size		= MV64x60_CPU2PCI1_IO_SIZE,
-		.remap_hi	= 0,
-		.remap_lo	= MV64x60_CPU2PCI1_IO_REMAP,
-	},
-};
-
-struct mv64x60_cpu2pci_win mv64x60_cpu2pci_mem[2] = {
-	{ /* hose 0 */
-		.lo		= MV64x60_CPU2PCI0_MEM_0_BASE,
-		.size		= MV64x60_CPU2PCI0_MEM_0_SIZE,
-		.remap_hi	= MV64x60_CPU2PCI0_MEM_0_REMAP_HI,
-		.remap_lo	= MV64x60_CPU2PCI0_MEM_0_REMAP_LO,
-	},
-	{ /* hose 1 */
-		.lo		= MV64x60_CPU2PCI1_MEM_0_BASE,
-		.size		= MV64x60_CPU2PCI1_MEM_0_SIZE,
-		.remap_hi	= MV64x60_CPU2PCI1_MEM_0_REMAP_HI,
-		.remap_lo	= MV64x60_CPU2PCI1_MEM_0_REMAP_LO,
-	},
-};
-
-/* Only need to set up 1 window to pci mem space */
-void mv64x60_config_cpu2pci_window(u8 *bridge_base, u8 hose, u32 pci_base_hi,
-		u32 pci_base_lo, u32 cpu_base, u32 size,
-		struct mv64x60_cpu2pci_win *offset_tbl)
-{
-	cpu_base >>= 16;
-	cpu_base |= MV64x60_CPU2PCI_SWAP_NONE;
-	out_le32((u32 *)(bridge_base + offset_tbl[hose].lo), cpu_base);
-
-	if (offset_tbl[hose].remap_hi != 0)
-		out_le32((u32 *)(bridge_base + offset_tbl[hose].remap_hi),
-				pci_base_hi);
-	out_le32((u32 *)(bridge_base + offset_tbl[hose].remap_lo),
-			pci_base_lo >> 16);
-
-	size = (size - 1) >> 16;
-	out_le32((u32 *)(bridge_base + offset_tbl[hose].size), size);
-}
-
-/* Read mem ctlr to get the amount of mem in system */
-u32 mv64x60_get_mem_size(u8 *bridge_base)
-{
-	u32 enables, i, v;
-	u32 mem = 0;
-
-	enables = in_le32((u32 *)(bridge_base + MV64x60_CPU_BAR_ENABLE)) & 0xf;
-
-	for (i=0; i<MV64x60_CPU2MEM_WINDOWS; i++)
-		if (!(enables & (1<<i))) {
-			v = in_le32((u32*)(bridge_base
-						+ mv64x60_cpu2mem[i].size));
-			v = ((v & 0xffff) + 1) << 16;
-			mem += v;
-		}
-
-	return mem;
-}
-
-/* Get physical address of bridge's registers */
-u8 *mv64x60_get_bridge_pbase(void)
-{
-	u32 v[2];
-	void *devp;
-
-	devp = find_node_by_compatible(NULL, "marvell,mv64360");
-	if (devp == NULL)
-		goto err_out;
-	if (getprop(devp, "reg", v, sizeof(v)) != sizeof(v))
-		goto err_out;
-
-	return (u8 *)v[0];
-
-err_out:
-	return 0;
-}
-
-/* Get virtual address of bridge's registers */
-u8 *mv64x60_get_bridge_base(void)
-{
-	u32 v;
-	void *devp;
-
-	devp = find_node_by_compatible(NULL, "marvell,mv64360");
-	if (devp == NULL)
-		goto err_out;
-	if (getprop(devp, "virtual-reg", &v, sizeof(v)) != sizeof(v))
-		goto err_out;
-
-	return (u8 *)v;
-
-err_out:
-	return 0;
-}
-
-u8 mv64x60_is_coherent(void)
-{
-	u32 v;
-	void *devp;
-
-	devp = finddevice("/");
-	if (devp == NULL)
-		return 1; /* Assume coherency on */
-
-	if (getprop(devp, "coherency-off", &v, sizeof(v)) < 0)
-		return 1; /* Coherency on */
-	else
-		return 0;
-}
diff --git a/arch/powerpc/boot/mv64x60.h b/arch/powerpc/boot/mv64x60.h
deleted file mode 100644
index b827105e6e543..0000000000000
--- a/arch/powerpc/boot/mv64x60.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Author: Mark A. Greer <source@mvista.com>
- *
- * 2007 (c) MontaVista Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- */
-
-#ifndef _PPC_BOOT_MV64x60_H_
-#define _PPC_BOOT_MV64x60_H_
-
-#define MV64x60_CPU_BAR_ENABLE			0x0278
-
-#define MV64x60_PCI_ACC_CNTL_ENABLE		(1<<0)
-#define MV64x60_PCI_ACC_CNTL_REQ64		(1<<1)
-#define MV64x60_PCI_ACC_CNTL_SNOOP_NONE		0x00000000
-#define MV64x60_PCI_ACC_CNTL_SNOOP_WT		0x00000004
-#define MV64x60_PCI_ACC_CNTL_SNOOP_WB		0x00000008
-#define MV64x60_PCI_ACC_CNTL_SNOOP_MASK		0x0000000c
-#define MV64x60_PCI_ACC_CNTL_ACCPROT		(1<<4)
-#define MV64x60_PCI_ACC_CNTL_WRPROT		(1<<5)
-#define MV64x60_PCI_ACC_CNTL_SWAP_BYTE		0x00000000
-#define MV64x60_PCI_ACC_CNTL_SWAP_NONE		0x00000040
-#define MV64x60_PCI_ACC_CNTL_SWAP_BYTE_WORD	0x00000080
-#define MV64x60_PCI_ACC_CNTL_SWAP_WORD		0x000000c0
-#define MV64x60_PCI_ACC_CNTL_SWAP_MASK		0x000000c0
-#define MV64x60_PCI_ACC_CNTL_MBURST_32_BYTES	0x00000000
-#define MV64x60_PCI_ACC_CNTL_MBURST_64_BYTES	0x00000100
-#define MV64x60_PCI_ACC_CNTL_MBURST_128_BYTES	0x00000200
-#define MV64x60_PCI_ACC_CNTL_MBURST_MASK	0x00000300
-#define MV64x60_PCI_ACC_CNTL_RDSIZE_32_BYTES	0x00000000
-#define MV64x60_PCI_ACC_CNTL_RDSIZE_64_BYTES	0x00000400
-#define MV64x60_PCI_ACC_CNTL_RDSIZE_128_BYTES	0x00000800
-#define MV64x60_PCI_ACC_CNTL_RDSIZE_256_BYTES	0x00000c00
-#define MV64x60_PCI_ACC_CNTL_RDSIZE_MASK	0x00000c00
-
-struct mv64x60_cpu2pci_win {
-	u32 lo;
-	u32 size;
-	u32 remap_hi;
-	u32 remap_lo;
-};
-
-extern struct mv64x60_cpu2pci_win mv64x60_cpu2pci_io[2];
-extern struct mv64x60_cpu2pci_win mv64x60_cpu2pci_mem[2];
-
-u32 mv64x60_cfg_read(u8 *bridge_base, u8 hose, u8 bus, u8 devfn,
-		u8 offset);
-void mv64x60_cfg_write(u8 *bridge_base, u8 hose, u8 bus, u8 devfn,
-		u8 offset, u32 val);
-
-void mv64x60_config_ctlr_windows(u8 *bridge_base, u8 *bridge_pbase,
-		u8 is_coherent);
-void mv64x60_config_pci_windows(u8 *bridge_base, u8 *bridge_pbase, u8 hose,
-		u8 bus, u32 mem_size, u32 acc_bits);
-void mv64x60_config_cpu2pci_window(u8 *bridge_base, u8 hose, u32 pci_base_hi,
-		u32 pci_base_lo, u32 cpu_base, u32 size,
-		struct mv64x60_cpu2pci_win *offset_tbl);
-u32 mv64x60_get_mem_size(u8 *bridge_base);
-u8 *mv64x60_get_bridge_pbase(void);
-u8 *mv64x60_get_bridge_base(void);
-u8 mv64x60_is_coherent(void);
-
-int mv64x60_i2c_open(void);
-int mv64x60_i2c_read(u32 devaddr, u8 *buf, u32 offset, u32 offset_size,
-		u32 count);
-void mv64x60_i2c_close(void);
-
-#endif /* _PPC_BOOT_MV64x60_H_ */

From 04debf21fa1741e7d752c37550f7e4816e4067c4 Mon Sep 17 00:00:00 2001
From: Mark Greer <mgreer@animalcreek.com>
Date: Thu, 5 Apr 2018 18:17:20 -0700
Subject: [PATCH 243/251] powerpc: Remove core support for Marvell mv64x60
 hostbridges

There are no longer any platforms that use Marvell's mv64x60
hostbridges so remove the supporting kernel code.

CC: Dale Farnsworth <dale@farnsworth.org>
Signed-off-by: Mark Greer <mgreer@animalcreek.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 Documentation/devicetree/bindings/marvell.txt | 516 -----------------
 arch/powerpc/sysdev/Makefile                  |   3 -
 arch/powerpc/sysdev/mv64x60.h                 |  13 -
 arch/powerpc/sysdev/mv64x60_dev.c             | 535 ------------------
 arch/powerpc/sysdev/mv64x60_pci.c             | 171 ------
 arch/powerpc/sysdev/mv64x60_pic.c             | 297 ----------
 arch/powerpc/sysdev/mv64x60_udbg.c            | 152 -----
 7 files changed, 1687 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/marvell.txt
 delete mode 100644 arch/powerpc/sysdev/mv64x60.h
 delete mode 100644 arch/powerpc/sysdev/mv64x60_dev.c
 delete mode 100644 arch/powerpc/sysdev/mv64x60_pci.c
 delete mode 100644 arch/powerpc/sysdev/mv64x60_pic.c
 delete mode 100644 arch/powerpc/sysdev/mv64x60_udbg.c

diff --git a/Documentation/devicetree/bindings/marvell.txt b/Documentation/devicetree/bindings/marvell.txt
deleted file mode 100644
index 7f722316458a3..0000000000000
--- a/Documentation/devicetree/bindings/marvell.txt
+++ /dev/null
@@ -1,516 +0,0 @@
-Marvell Discovery mv64[345]6x System Controller chips
-===========================================================
-
-The Marvell mv64[345]60 series of system controller chips contain
-many of the peripherals needed to implement a complete computer
-system.  In this section, we define device tree nodes to describe
-the system controller chip itself and each of the peripherals
-which it contains.  Compatible string values for each node are
-prefixed with the string "marvell,", for Marvell Technology Group Ltd.
-
-1) The /system-controller node
-
-  This node is used to represent the system-controller and must be
-  present when the system uses a system controller chip. The top-level
-  system-controller node contains information that is global to all
-  devices within the system controller chip. The node name begins
-  with "system-controller" followed by the unit address, which is
-  the base address of the memory-mapped register set for the system
-  controller chip.
-
-  Required properties:
-
-    - ranges : Describes the translation of system controller addresses
-      for memory mapped registers.
-    - clock-frequency: Contains the main clock frequency for the system
-      controller chip.
-    - reg : This property defines the address and size of the
-      memory-mapped registers contained within the system controller
-      chip.  The address specified in the "reg" property should match
-      the unit address of the system-controller node.
-    - #address-cells : Address representation for system controller
-      devices.  This field represents the number of cells needed to
-      represent the address of the memory-mapped registers of devices
-      within the system controller chip.
-    - #size-cells : Size representation for the memory-mapped
-      registers within the system controller chip.
-    - #interrupt-cells : Defines the width of cells used to represent
-      interrupts.
-
-  Optional properties:
-
-    - model : The specific model of the system controller chip.  Such
-      as, "mv64360", "mv64460", or "mv64560".
-    - compatible : A string identifying the compatibility identifiers
-      of the system controller chip.
-
-  The system-controller node contains child nodes for each system
-  controller device that the platform uses.  Nodes should not be created
-  for devices which exist on the system controller chip but are not used
-
-  Example Marvell Discovery mv64360 system-controller node:
-
-    system-controller@f1000000 { /* Marvell Discovery mv64360 */
-	    #address-cells = <1>;
-	    #size-cells = <1>;
-	    model = "mv64360";                      /* Default */
-	    compatible = "marvell,mv64360";
-	    clock-frequency = <133333333>;
-	    reg = <0xf1000000 0x10000>;
-	    virtual-reg = <0xf1000000>;
-	    ranges = <0x88000000 0x88000000 0x1000000 /* PCI 0 I/O Space */
-		    0x80000000 0x80000000 0x8000000 /* PCI 0 MEM Space */
-		    0xa0000000 0xa0000000 0x4000000 /* User FLASH */
-		    0x00000000 0xf1000000 0x0010000 /* Bridge's regs */
-		    0xf2000000 0xf2000000 0x0040000>;/* Integrated SRAM */
-
-	    [ child node definitions... ]
-    }
-
-2) Child nodes of /system-controller
-
-   a) Marvell Discovery MDIO bus
-
-   The MDIO is a bus to which the PHY devices are connected.  For each
-   device that exists on this bus, a child node should be created.  See
-   the definition of the PHY node below for an example of how to define
-   a PHY.
-
-   Required properties:
-     - #address-cells : Should be <1>
-     - #size-cells : Should be <0>
-     - compatible : Should be "marvell,mv64360-mdio"
-
-   Example:
-
-     mdio {
-	     #address-cells = <1>;
-	     #size-cells = <0>;
-	     compatible = "marvell,mv64360-mdio";
-
-	     ethernet-phy@0 {
-		     ......
-	     };
-     };
-
-
-   b) Marvell Discovery ethernet controller
-
-   The Discover ethernet controller is described with two levels
-   of nodes.  The first level describes an ethernet silicon block
-   and the second level describes up to 3 ethernet nodes within
-   that block.  The reason for the multiple levels is that the
-   registers for the node are interleaved within a single set
-   of registers.  The "ethernet-block" level describes the
-   shared register set, and the "ethernet" nodes describe ethernet
-   port-specific properties.
-
-   Ethernet block node
-
-   Required properties:
-     - #address-cells : <1>
-     - #size-cells : <0>
-     - compatible : "marvell,mv64360-eth-block"
-     - reg : Offset and length of the register set for this block
-
-   Optional properties:
-     - clocks : Phandle to the clock control device and gate bit
-
-   Example Discovery Ethernet block node:
-     ethernet-block@2000 {
-	     #address-cells = <1>;
-	     #size-cells = <0>;
-	     compatible = "marvell,mv64360-eth-block";
-	     reg = <0x2000 0x2000>;
-	     ethernet@0 {
-		     .......
-	     };
-     };
-
-   Ethernet port node
-
-   Required properties:
-     - compatible : Should be "marvell,mv64360-eth".
-     - reg : Should be <0>, <1>, or <2>, according to which registers
-       within the silicon block the device uses.
-     - interrupts : <a> where a is the interrupt number for the port.
-     - interrupt-parent : the phandle for the interrupt controller
-       that services interrupts for this device.
-     - phy : the phandle for the PHY connected to this ethernet
-       controller.
-     - local-mac-address : 6 bytes, MAC address
-
-   Example Discovery Ethernet port node:
-     ethernet@0 {
-	     compatible = "marvell,mv64360-eth";
-	     reg = <0>;
-	     interrupts = <32>;
-	     interrupt-parent = <&PIC>;
-	     phy = <&PHY0>;
-	     local-mac-address = [ 00 00 00 00 00 00 ];
-     };
-
-
-
-   c) Marvell Discovery PHY nodes
-
-   Required properties:
-     - interrupts : <a> where a is the interrupt number for this phy.
-     - interrupt-parent : the phandle for the interrupt controller that
-       services interrupts for this device.
-     - reg : The ID number for the phy, usually a small integer
-
-   Example Discovery PHY node:
-     ethernet-phy@1 {
-	     compatible = "broadcom,bcm5421";
-	     interrupts = <76>;      /* GPP 12 */
-	     interrupt-parent = <&PIC>;
-	     reg = <1>;
-     };
-
-
-   d) Marvell Discovery SDMA nodes
-
-   Represent DMA hardware associated with the MPSC (multiprotocol
-   serial controllers).
-
-   Required properties:
-     - compatible : "marvell,mv64360-sdma"
-     - reg : Offset and length of the register set for this device
-     - interrupts : <a> where a is the interrupt number for the DMA
-       device.
-     - interrupt-parent : the phandle for the interrupt controller
-       that services interrupts for this device.
-
-   Example Discovery SDMA node:
-     sdma@4000 {
-	     compatible = "marvell,mv64360-sdma";
-	     reg = <0x4000 0xc18>;
-	     virtual-reg = <0xf1004000>;
-	     interrupts = <36>;
-	     interrupt-parent = <&PIC>;
-     };
-
-
-   e) Marvell Discovery BRG nodes
-
-   Represent baud rate generator hardware associated with the MPSC
-   (multiprotocol serial controllers).
-
-   Required properties:
-     - compatible : "marvell,mv64360-brg"
-     - reg : Offset and length of the register set for this device
-     - clock-src : A value from 0 to 15 which selects the clock
-       source for the baud rate generator.  This value corresponds
-       to the CLKS value in the BRGx configuration register.  See
-       the mv64x60 User's Manual.
-     - clock-frequence : The frequency (in Hz) of the baud rate
-       generator's input clock.
-     - current-speed : The current speed setting (presumably by
-       firmware) of the baud rate generator.
-
-   Example Discovery BRG node:
-     brg@b200 {
-	     compatible = "marvell,mv64360-brg";
-	     reg = <0xb200 0x8>;
-	     clock-src = <8>;
-	     clock-frequency = <133333333>;
-	     current-speed = <9600>;
-     };
-
-
-   f) Marvell Discovery CUNIT nodes
-
-   Represent the Serial Communications Unit device hardware.
-
-   Required properties:
-     - reg : Offset and length of the register set for this device
-
-   Example Discovery CUNIT node:
-     cunit@f200 {
-	     reg = <0xf200 0x200>;
-     };
-
-
-   g) Marvell Discovery MPSCROUTING nodes
-
-   Represent the Discovery's MPSC routing hardware
-
-   Required properties:
-     - reg : Offset and length of the register set for this device
-
-   Example Discovery CUNIT node:
-     mpscrouting@b500 {
-	     reg = <0xb400 0xc>;
-     };
-
-
-   h) Marvell Discovery MPSCINTR nodes
-
-   Represent the Discovery's MPSC DMA interrupt hardware registers
-   (SDMA cause and mask registers).
-
-   Required properties:
-     - reg : Offset and length of the register set for this device
-
-   Example Discovery MPSCINTR node:
-     mpsintr@b800 {
-	     reg = <0xb800 0x100>;
-     };
-
-
-   i) Marvell Discovery MPSC nodes
-
-   Represent the Discovery's MPSC (Multiprotocol Serial Controller)
-   serial port.
-
-   Required properties:
-     - compatible : "marvell,mv64360-mpsc"
-     - reg : Offset and length of the register set for this device
-     - sdma : the phandle for the SDMA node used by this port
-     - brg : the phandle for the BRG node used by this port
-     - cunit : the phandle for the CUNIT node used by this port
-     - mpscrouting : the phandle for the MPSCROUTING node used by this port
-     - mpscintr : the phandle for the MPSCINTR node used by this port
-     - cell-index : the hardware index of this cell in the MPSC core
-     - max_idle : value needed for MPSC CHR3 (Maximum Frame Length)
-       register
-     - interrupts : <a> where a is the interrupt number for the MPSC.
-     - interrupt-parent : the phandle for the interrupt controller
-       that services interrupts for this device.
-
-   Example Discovery MPSCINTR node:
-     mpsc@8000 {
-	     compatible = "marvell,mv64360-mpsc";
-	     reg = <0x8000 0x38>;
-	     virtual-reg = <0xf1008000>;
-	     sdma = <&SDMA0>;
-	     brg = <&BRG0>;
-	     cunit = <&CUNIT>;
-	     mpscrouting = <&MPSCROUTING>;
-	     mpscintr = <&MPSCINTR>;
-	     cell-index = <0>;
-	     max_idle = <40>;
-	     interrupts = <40>;
-	     interrupt-parent = <&PIC>;
-     };
-
-
-   j) Marvell Discovery Watch Dog Timer nodes
-
-   Represent the Discovery's watchdog timer hardware
-
-   Required properties:
-     - compatible : "marvell,mv64360-wdt"
-     - reg : Offset and length of the register set for this device
-
-   Example Discovery Watch Dog Timer node:
-     wdt@b410 {
-	     compatible = "marvell,mv64360-wdt";
-	     reg = <0xb410 0x8>;
-     };
-
-
-   k) Marvell Discovery I2C nodes
-
-   Represent the Discovery's I2C hardware
-
-   Required properties:
-     - device_type : "i2c"
-     - compatible : "marvell,mv64360-i2c"
-     - reg : Offset and length of the register set for this device
-     - interrupts : <a> where a is the interrupt number for the I2C.
-     - interrupt-parent : the phandle for the interrupt controller
-       that services interrupts for this device.
-
-   Example Discovery I2C node:
-	     compatible = "marvell,mv64360-i2c";
-	     reg = <0xc000 0x20>;
-	     virtual-reg = <0xf100c000>;
-	     interrupts = <37>;
-	     interrupt-parent = <&PIC>;
-     };
-
-
-   l) Marvell Discovery PIC (Programmable Interrupt Controller) nodes
-
-   Represent the Discovery's PIC hardware
-
-   Required properties:
-     - #interrupt-cells : <1>
-     - #address-cells : <0>
-     - compatible : "marvell,mv64360-pic"
-     - reg : Offset and length of the register set for this device
-     - interrupt-controller
-
-   Example Discovery PIC node:
-     pic {
-	     #interrupt-cells = <1>;
-	     #address-cells = <0>;
-	     compatible = "marvell,mv64360-pic";
-	     reg = <0x0 0x88>;
-	     interrupt-controller;
-     };
-
-
-   m) Marvell Discovery MPP (Multipurpose Pins) multiplexing nodes
-
-   Represent the Discovery's MPP hardware
-
-   Required properties:
-     - compatible : "marvell,mv64360-mpp"
-     - reg : Offset and length of the register set for this device
-
-   Example Discovery MPP node:
-     mpp@f000 {
-	     compatible = "marvell,mv64360-mpp";
-	     reg = <0xf000 0x10>;
-     };
-
-
-   n) Marvell Discovery GPP (General Purpose Pins) nodes
-
-   Represent the Discovery's GPP hardware
-
-   Required properties:
-     - compatible : "marvell,mv64360-gpp"
-     - reg : Offset and length of the register set for this device
-
-   Example Discovery GPP node:
-     gpp@f000 {
-	     compatible = "marvell,mv64360-gpp";
-	     reg = <0xf100 0x20>;
-     };
-
-
-   o) Marvell Discovery PCI host bridge node
-
-   Represents the Discovery's PCI host bridge device.  The properties
-   for this node conform to Rev 2.1 of the PCI Bus Binding to IEEE
-   1275-1994.  A typical value for the compatible property is
-   "marvell,mv64360-pci".
-
-   Example Discovery PCI host bridge node
-     pci@80000000 {
-	     #address-cells = <3>;
-	     #size-cells = <2>;
-	     #interrupt-cells = <1>;
-	     device_type = "pci";
-	     compatible = "marvell,mv64360-pci";
-	     reg = <0xcf8 0x8>;
-	     ranges = <0x01000000 0x0        0x0
-			     0x88000000 0x0 0x01000000
-		       0x02000000 0x0 0x80000000
-			     0x80000000 0x0 0x08000000>;
-	     bus-range = <0 255>;
-	     clock-frequency = <66000000>;
-	     interrupt-parent = <&PIC>;
-	     interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
-	     interrupt-map = <
-		     /* IDSEL 0x0a */
-		     0x5000 0 0 1 &PIC 80
-		     0x5000 0 0 2 &PIC 81
-		     0x5000 0 0 3 &PIC 91
-		     0x5000 0 0 4 &PIC 93
-
-		     /* IDSEL 0x0b */
-		     0x5800 0 0 1 &PIC 91
-		     0x5800 0 0 2 &PIC 93
-		     0x5800 0 0 3 &PIC 80
-		     0x5800 0 0 4 &PIC 81
-
-		     /* IDSEL 0x0c */
-		     0x6000 0 0 1 &PIC 91
-		     0x6000 0 0 2 &PIC 93
-		     0x6000 0 0 3 &PIC 80
-		     0x6000 0 0 4 &PIC 81
-
-		     /* IDSEL 0x0d */
-		     0x6800 0 0 1 &PIC 93
-		     0x6800 0 0 2 &PIC 80
-		     0x6800 0 0 3 &PIC 81
-		     0x6800 0 0 4 &PIC 91
-	     >;
-     };
-
-
-   p) Marvell Discovery CPU Error nodes
-
-   Represent the Discovery's CPU error handler device.
-
-   Required properties:
-     - compatible : "marvell,mv64360-cpu-error"
-     - reg : Offset and length of the register set for this device
-     - interrupts : the interrupt number for this device
-     - interrupt-parent : the phandle for the interrupt controller
-       that services interrupts for this device.
-
-   Example Discovery CPU Error node:
-     cpu-error@70 {
-	     compatible = "marvell,mv64360-cpu-error";
-	     reg = <0x70 0x10 0x128 0x28>;
-	     interrupts = <3>;
-	     interrupt-parent = <&PIC>;
-     };
-
-
-   q) Marvell Discovery SRAM Controller nodes
-
-   Represent the Discovery's SRAM controller device.
-
-   Required properties:
-     - compatible : "marvell,mv64360-sram-ctrl"
-     - reg : Offset and length of the register set for this device
-     - interrupts : the interrupt number for this device
-     - interrupt-parent : the phandle for the interrupt controller
-       that services interrupts for this device.
-
-   Example Discovery SRAM Controller node:
-     sram-ctrl@380 {
-	     compatible = "marvell,mv64360-sram-ctrl";
-	     reg = <0x380 0x80>;
-	     interrupts = <13>;
-	     interrupt-parent = <&PIC>;
-     };
-
-
-   r) Marvell Discovery PCI Error Handler nodes
-
-   Represent the Discovery's PCI error handler device.
-
-   Required properties:
-     - compatible : "marvell,mv64360-pci-error"
-     - reg : Offset and length of the register set for this device
-     - interrupts : the interrupt number for this device
-     - interrupt-parent : the phandle for the interrupt controller
-       that services interrupts for this device.
-
-   Example Discovery PCI Error Handler node:
-     pci-error@1d40 {
-	     compatible = "marvell,mv64360-pci-error";
-	     reg = <0x1d40 0x40 0xc28 0x4>;
-	     interrupts = <12>;
-	     interrupt-parent = <&PIC>;
-     };
-
-
-   s) Marvell Discovery Memory Controller nodes
-
-   Represent the Discovery's memory controller device.
-
-   Required properties:
-     - compatible : "marvell,mv64360-mem-ctrl"
-     - reg : Offset and length of the register set for this device
-     - interrupts : the interrupt number for this device
-     - interrupt-parent : the phandle for the interrupt controller
-       that services interrupts for this device.
-
-   Example Discovery Memory Controller node:
-     mem-ctrl@1400 {
-	     compatible = "marvell,mv64360-mem-ctrl";
-	     reg = <0x1400 0x60>;
-	     interrupts = <17>;
-	     interrupt-parent = <&PIC>;
-     };
-
-
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index 9861407d644aa..ea2f595b5133d 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -28,9 +28,6 @@ obj-$(CONFIG_FSL_85XX_CACHE_SRAM)	+= fsl_85xx_l2ctlr.o fsl_85xx_cache_sram.o
 obj-$(CONFIG_SIMPLE_GPIO)	+= simple_gpio.o
 obj-$(CONFIG_FSL_RIO)		+= fsl_rio.o fsl_rmu.o
 obj-$(CONFIG_TSI108_BRIDGE)	+= tsi108_pci.o tsi108_dev.o
-mv64x60-$(CONFIG_PCI)		+= mv64x60_pci.o
-obj-$(CONFIG_MV64X60)		+= $(mv64x60-y) mv64x60_pic.o mv64x60_dev.o \
-				   mv64x60_udbg.o
 obj-$(CONFIG_RTC_DRV_CMOS)	+= rtc_cmos_setup.o
 
 obj-$(CONFIG_PPC_INDIRECT_PCI)	+= indirect_pci.o
diff --git a/arch/powerpc/sysdev/mv64x60.h b/arch/powerpc/sysdev/mv64x60.h
deleted file mode 100644
index 60cfcb90d1fa1..0000000000000
--- a/arch/powerpc/sysdev/mv64x60.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __MV64X60_H__
-#define __MV64X60_H__
-
-#include <linux/init.h>
-
-extern void __init mv64x60_init_irq(void);
-extern unsigned int mv64x60_get_irq(void);
-
-extern void __init mv64x60_pci_init(void);
-extern void __init mv64x60_init_early(void);
-
-#endif /* __MV64X60_H__ */
diff --git a/arch/powerpc/sysdev/mv64x60_dev.c b/arch/powerpc/sysdev/mv64x60_dev.c
deleted file mode 100644
index 185a67e742a61..0000000000000
--- a/arch/powerpc/sysdev/mv64x60_dev.c
+++ /dev/null
@@ -1,535 +0,0 @@
-/*
- * Platform device setup for Marvell mv64360/mv64460 host bridges (Discovery)
- *
- * Author: Dale Farnsworth <dale@farnsworth.org>
- *
- * 2007 (c) MontaVista, Software, Inc.  This file is licensed under
- * the terms of the GNU General Public License version 2.  This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- */
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/console.h>
-#include <linux/mv643xx.h>
-#include <linux/platform_device.h>
-#include <linux/of_platform.h>
-#include <linux/of_net.h>
-#include <linux/dma-mapping.h>
-
-#include <asm/prom.h>
-
-/* These functions provide the necessary setup for the mv64x60 drivers. */
-
-static const struct of_device_id of_mv64x60_devices[] __initconst = {
-	{ .compatible = "marvell,mv64306-devctrl", },
-	{}
-};
-
-/*
- * Create MPSC platform devices
- */
-static int __init mv64x60_mpsc_register_shared_pdev(struct device_node *np)
-{
-	struct platform_device *pdev;
-	struct resource r[2];
-	struct mpsc_shared_pdata pdata;
-	const phandle *ph;
-	struct device_node *mpscrouting, *mpscintr;
-	int err;
-
-	ph = of_get_property(np, "mpscrouting", NULL);
-	mpscrouting = of_find_node_by_phandle(*ph);
-	if (!mpscrouting)
-		return -ENODEV;
-
-	err = of_address_to_resource(mpscrouting, 0, &r[0]);
-	of_node_put(mpscrouting);
-	if (err)
-		return err;
-
-	ph = of_get_property(np, "mpscintr", NULL);
-	mpscintr = of_find_node_by_phandle(*ph);
-	if (!mpscintr)
-		return -ENODEV;
-
-	err = of_address_to_resource(mpscintr, 0, &r[1]);
-	of_node_put(mpscintr);
-	if (err)
-		return err;
-
-	memset(&pdata, 0, sizeof(pdata));
-
-	pdev = platform_device_alloc(MPSC_SHARED_NAME, 0);
-	if (!pdev)
-		return -ENOMEM;
-
-	err = platform_device_add_resources(pdev, r, 2);
-	if (err)
-		goto error;
-
-	err = platform_device_add_data(pdev, &pdata, sizeof(pdata));
-	if (err)
-		goto error;
-
-	err = platform_device_add(pdev);
-	if (err)
-		goto error;
-
-	return 0;
-
-error:
-	platform_device_put(pdev);
-	return err;
-}
-
-
-static int __init mv64x60_mpsc_device_setup(struct device_node *np, int id)
-{
-	struct resource r[5];
-	struct mpsc_pdata pdata;
-	struct platform_device *pdev;
-	const unsigned int *prop;
-	const phandle *ph;
-	struct device_node *sdma, *brg;
-	int err;
-	int port_number;
-
-	/* only register the shared platform device the first time through */
-	if (id == 0 && (err = mv64x60_mpsc_register_shared_pdev(np)))
-		return err;
-
-	memset(r, 0, sizeof(r));
-
-	err = of_address_to_resource(np, 0, &r[0]);
-	if (err)
-		return err;
-
-	of_irq_to_resource(np, 0, &r[4]);
-
-	ph = of_get_property(np, "sdma", NULL);
-	sdma = of_find_node_by_phandle(*ph);
-	if (!sdma)
-		return -ENODEV;
-
-	of_irq_to_resource(sdma, 0, &r[3]);
-	err = of_address_to_resource(sdma, 0, &r[1]);
-	of_node_put(sdma);
-	if (err)
-		return err;
-
-	ph = of_get_property(np, "brg", NULL);
-	brg = of_find_node_by_phandle(*ph);
-	if (!brg)
-		return -ENODEV;
-
-	err = of_address_to_resource(brg, 0, &r[2]);
-	of_node_put(brg);
-	if (err)
-		return err;
-
-	prop = of_get_property(np, "cell-index", NULL);
-	if (!prop)
-		return -ENODEV;
-	port_number = *(int *)prop;
-
-	memset(&pdata, 0, sizeof(pdata));
-
-	pdata.cache_mgmt = 1; /* All current revs need this set */
-
-	pdata.max_idle = 40; /* default */
-	prop = of_get_property(np, "max_idle", NULL);
-	if (prop)
-		pdata.max_idle = *prop;
-
-	prop = of_get_property(brg, "current-speed", NULL);
-	if (prop)
-		pdata.default_baud = *prop;
-
-	/* Default is 8 bits, no parity, no flow control */
-	pdata.default_bits = 8;
-	pdata.default_parity = 'n';
-	pdata.default_flow = 'n';
-
-	prop = of_get_property(np, "chr_1", NULL);
-	if (prop)
-		pdata.chr_1_val = *prop;
-
-	prop = of_get_property(np, "chr_2", NULL);
-	if (prop)
-		pdata.chr_2_val = *prop;
-
-	prop = of_get_property(np, "chr_10", NULL);
-	if (prop)
-		pdata.chr_10_val = *prop;
-
-	prop = of_get_property(np, "mpcr", NULL);
-	if (prop)
-		pdata.mpcr_val = *prop;
-
-	prop = of_get_property(brg, "bcr", NULL);
-	if (prop)
-		pdata.bcr_val = *prop;
-
-	pdata.brg_can_tune = 1; /* All current revs need this set */
-
-	prop = of_get_property(brg, "clock-src", NULL);
-	if (prop)
-		pdata.brg_clk_src = *prop;
-
-	prop = of_get_property(brg, "clock-frequency", NULL);
-	if (prop)
-		pdata.brg_clk_freq = *prop;
-
-	pdev = platform_device_alloc(MPSC_CTLR_NAME, port_number);
-	if (!pdev)
-		return -ENOMEM;
-	pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
-
-	err = platform_device_add_resources(pdev, r, 5);
-	if (err)
-		goto error;
-
-	err = platform_device_add_data(pdev, &pdata, sizeof(pdata));
-	if (err)
-		goto error;
-
-	err = platform_device_add(pdev);
-	if (err)
-		goto error;
-
-	return 0;
-
-error:
-	platform_device_put(pdev);
-	return err;
-}
-
-/*
- * Create mv64x60_eth platform devices
- */
-static struct platform_device * __init mv64x60_eth_register_shared_pdev(
-						struct device_node *np, int id)
-{
-	struct platform_device *pdev;
-	struct resource r[2];
-	int err;
-
-	err = of_address_to_resource(np, 0, &r[0]);
-	if (err)
-		return ERR_PTR(err);
-
-	/* register an orion mdio bus driver */
-	r[1].start = r[0].start + 0x4;
-	r[1].end = r[0].start + 0x84 - 1;
-	r[1].flags = IORESOURCE_MEM;
-
-	if (id == 0) {
-		pdev = platform_device_register_simple("orion-mdio", -1, &r[1], 1);
-		if (IS_ERR(pdev))
-			return pdev;
-	}
-
-	pdev = platform_device_register_simple(MV643XX_ETH_SHARED_NAME, id,
-					       &r[0], 1);
-
-	return pdev;
-}
-
-static int __init mv64x60_eth_device_setup(struct device_node *np, int id,
-					   struct platform_device *shared_pdev)
-{
-	struct resource r[1];
-	struct mv643xx_eth_platform_data pdata;
-	struct platform_device *pdev;
-	struct device_node *phy;
-	const u8 *mac_addr;
-	const int *prop;
-	const phandle *ph;
-	int err;
-
-	memset(r, 0, sizeof(r));
-	of_irq_to_resource(np, 0, &r[0]);
-
-	memset(&pdata, 0, sizeof(pdata));
-
-	pdata.shared = shared_pdev;
-
-	prop = of_get_property(np, "reg", NULL);
-	if (!prop)
-		return -ENODEV;
-	pdata.port_number = *prop;
-
-	mac_addr = of_get_mac_address(np);
-	if (mac_addr)
-		memcpy(pdata.mac_addr, mac_addr, 6);
-
-	prop = of_get_property(np, "speed", NULL);
-	if (prop)
-		pdata.speed = *prop;
-
-	prop = of_get_property(np, "tx_queue_size", NULL);
-	if (prop)
-		pdata.tx_queue_size = *prop;
-
-	prop = of_get_property(np, "rx_queue_size", NULL);
-	if (prop)
-		pdata.rx_queue_size = *prop;
-
-	prop = of_get_property(np, "tx_sram_addr", NULL);
-	if (prop)
-		pdata.tx_sram_addr = *prop;
-
-	prop = of_get_property(np, "tx_sram_size", NULL);
-	if (prop)
-		pdata.tx_sram_size = *prop;
-
-	prop = of_get_property(np, "rx_sram_addr", NULL);
-	if (prop)
-		pdata.rx_sram_addr = *prop;
-
-	prop = of_get_property(np, "rx_sram_size", NULL);
-	if (prop)
-		pdata.rx_sram_size = *prop;
-
-	ph = of_get_property(np, "phy", NULL);
-	if (!ph)
-		return -ENODEV;
-
-	phy = of_find_node_by_phandle(*ph);
-	if (phy == NULL)
-		return -ENODEV;
-
-	prop = of_get_property(phy, "reg", NULL);
-	if (prop)
-		pdata.phy_addr = MV643XX_ETH_PHY_ADDR(*prop);
-
-	of_node_put(phy);
-
-	pdev = platform_device_alloc(MV643XX_ETH_NAME, id);
-	if (!pdev)
-		return -ENOMEM;
-
-	pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
-	err = platform_device_add_resources(pdev, r, 1);
-	if (err)
-		goto error;
-
-	err = platform_device_add_data(pdev, &pdata, sizeof(pdata));
-	if (err)
-		goto error;
-
-	err = platform_device_add(pdev);
-	if (err)
-		goto error;
-
-	return 0;
-
-error:
-	platform_device_put(pdev);
-	return err;
-}
-
-/*
- * Create mv64x60_i2c platform devices
- */
-static int __init mv64x60_i2c_device_setup(struct device_node *np, int id)
-{
-	struct resource r[2];
-	struct platform_device *pdev;
-	struct mv64xxx_i2c_pdata pdata;
-	const unsigned int *prop;
-	int err;
-
-	memset(r, 0, sizeof(r));
-
-	err = of_address_to_resource(np, 0, &r[0]);
-	if (err)
-		return err;
-
-	of_irq_to_resource(np, 0, &r[1]);
-
-	memset(&pdata, 0, sizeof(pdata));
-
-	pdata.freq_m = 8;	/* default */
-	prop = of_get_property(np, "freq_m", NULL);
-	if (prop)
-		pdata.freq_m = *prop;
-
-	pdata.freq_n = 3;	/* default */
-	prop = of_get_property(np, "freq_n", NULL);
-	if (prop)
-		pdata.freq_n = *prop;
-
-	pdata.timeout = 1000;				/* default: 1 second */
-
-	pdev = platform_device_alloc(MV64XXX_I2C_CTLR_NAME, id);
-	if (!pdev)
-		return -ENOMEM;
-
-	err = platform_device_add_resources(pdev, r, 2);
-	if (err)
-		goto error;
-
-	err = platform_device_add_data(pdev, &pdata, sizeof(pdata));
-	if (err)
-		goto error;
-
-	err = platform_device_add(pdev);
-	if (err)
-		goto error;
-
-	return 0;
-
-error:
-	platform_device_put(pdev);
-	return err;
-}
-
-/*
- * Create mv64x60_wdt platform devices
- */
-static int __init mv64x60_wdt_device_setup(struct device_node *np, int id)
-{
-	struct resource r;
-	struct platform_device *pdev;
-	struct mv64x60_wdt_pdata pdata;
-	const unsigned int *prop;
-	int err;
-
-	err = of_address_to_resource(np, 0, &r);
-	if (err)
-		return err;
-
-	memset(&pdata, 0, sizeof(pdata));
-
-	pdata.timeout = 10;			/* Default: 10 seconds */
-
-	np = of_get_parent(np);
-	if (!np)
-		return -ENODEV;
-
-	prop = of_get_property(np, "clock-frequency", NULL);
-	of_node_put(np);
-	if (!prop)
-		return -ENODEV;
-	pdata.bus_clk = *prop / 1000000; /* wdt driver wants freq in MHz */
-
-	pdev = platform_device_alloc(MV64x60_WDT_NAME, id);
-	if (!pdev)
-		return -ENOMEM;
-
-	err = platform_device_add_resources(pdev, &r, 1);
-	if (err)
-		goto error;
-
-	err = platform_device_add_data(pdev, &pdata, sizeof(pdata));
-	if (err)
-		goto error;
-
-	err = platform_device_add(pdev);
-	if (err)
-		goto error;
-
-	return 0;
-
-error:
-	platform_device_put(pdev);
-	return err;
-}
-
-static int __init mv64x60_device_setup(void)
-{
-	struct device_node *np, *np2;
-	struct platform_device *pdev;
-	int id, id2;
-	int err;
-
-	id = 0;
-	for_each_compatible_node(np, NULL, "marvell,mv64360-mpsc") {
-		err = mv64x60_mpsc_device_setup(np, id++);
-		if (err)
-			printk(KERN_ERR "Failed to initialize MV64x60 "
-					"serial device %pOF: error %d.\n",
-					np, err);
-	}
-
-	id = 0;
-	id2 = 0;
-	for_each_compatible_node(np, NULL, "marvell,mv64360-eth-group") {
-		pdev = mv64x60_eth_register_shared_pdev(np, id++);
-		if (IS_ERR(pdev)) {
-			err = PTR_ERR(pdev);
-			printk(KERN_ERR "Failed to initialize MV64x60 "
-					"network block %pOF: error %d.\n",
-					np, err);
-			continue;
-		}
-		for_each_child_of_node(np, np2) {
-			if (!of_device_is_compatible(np2,
-					"marvell,mv64360-eth"))
-				continue;
-			err = mv64x60_eth_device_setup(np2, id2++, pdev);
-			if (err)
-				printk(KERN_ERR "Failed to initialize "
-						"MV64x60 network device %pOF: "
-						"error %d.\n",
-						np2, err);
-		}
-	}
-
-	id = 0;
-	for_each_compatible_node(np, "i2c", "marvell,mv64360-i2c") {
-		err = mv64x60_i2c_device_setup(np, id++);
-		if (err)
-			printk(KERN_ERR "Failed to initialize MV64x60 I2C "
-					"bus %pOF: error %d.\n",
-					np, err);
-	}
-
-	/* support up to one watchdog timer */
-	np = of_find_compatible_node(np, NULL, "marvell,mv64360-wdt");
-	if (np) {
-		if ((err = mv64x60_wdt_device_setup(np, id)))
-			printk(KERN_ERR "Failed to initialize MV64x60 "
-					"Watchdog %pOF: error %d.\n",
-					np, err);
-		of_node_put(np);
-	}
-
-	/* Now add every node that is on the device bus */
-	for_each_compatible_node(np, NULL, "marvell,mv64360")
-		of_platform_bus_probe(np, of_mv64x60_devices, NULL);
-
-	return 0;
-}
-arch_initcall(mv64x60_device_setup);
-
-static int __init mv64x60_add_mpsc_console(void)
-{
-	struct device_node *np = NULL;
-	const char *prop;
-
-	prop = of_get_property(of_chosen, "linux,stdout-path", NULL);
-	if (prop == NULL)
-		goto not_mpsc;
-
-	np = of_find_node_by_path(prop);
-	if (!np)
-		goto not_mpsc;
-
-	if (!of_device_is_compatible(np, "marvell,mv64360-mpsc"))
-		goto not_mpsc;
-
-	prop = of_get_property(np, "cell-index", NULL);
-	if (!prop)
-		goto not_mpsc;
-
-	add_preferred_console("ttyMM", *(int *)prop, NULL);
-
-not_mpsc:
-	return 0;
-}
-console_initcall(mv64x60_add_mpsc_console);
diff --git a/arch/powerpc/sysdev/mv64x60_pci.c b/arch/powerpc/sysdev/mv64x60_pci.c
deleted file mode 100644
index 1afcdb428e511..0000000000000
--- a/arch/powerpc/sysdev/mv64x60_pci.c
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * PCI bus setup for Marvell mv64360/mv64460 host bridges (Discovery)
- *
- * Author: Dale Farnsworth <dale@farnsworth.org>
- *
- * 2007 (c) MontaVista, Software, Inc.  This file is licensed under
- * the terms of the GNU General Public License version 2.  This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- */
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/stat.h>
-#include <linux/pci.h>
-
-#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-
-#define PCI_HEADER_TYPE_INVALID		0x7f	/* Invalid PCI header type */
-
-#ifdef CONFIG_SYSFS
-/* 32-bit hex or dec stringified number + '\n' */
-#define MV64X60_VAL_LEN_MAX		11
-#define MV64X60_PCICFG_CPCI_HOTSWAP	0x68
-
-static ssize_t mv64x60_hs_reg_read(struct file *filp, struct kobject *kobj,
-				   struct bin_attribute *attr, char *buf,
-				   loff_t off, size_t count)
-{
-	struct pci_dev *phb;
-	u32 v;
-
-	if (off > 0)
-		return 0;
-	if (count < MV64X60_VAL_LEN_MAX)
-		return -EINVAL;
-
-	phb = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0));
-	if (!phb)
-		return -ENODEV;
-	pci_read_config_dword(phb, MV64X60_PCICFG_CPCI_HOTSWAP, &v);
-	pci_dev_put(phb);
-
-	return sprintf(buf, "0x%08x\n", v);
-}
-
-static ssize_t mv64x60_hs_reg_write(struct file *filp, struct kobject *kobj,
-				    struct bin_attribute *attr, char *buf,
-				    loff_t off, size_t count)
-{
-	struct pci_dev *phb;
-	u32 v;
-
-	if (off > 0)
-		return 0;
-	if (count <= 0)
-		return -EINVAL;
-
-	if (sscanf(buf, "%i", &v) != 1)
-		return -EINVAL;
-
-	phb = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0));
-	if (!phb)
-		return -ENODEV;
-	pci_write_config_dword(phb, MV64X60_PCICFG_CPCI_HOTSWAP, v);
-	pci_dev_put(phb);
-
-	return count;
-}
-
-static const struct bin_attribute mv64x60_hs_reg_attr = { /* Hotswap register */
-	.attr = {
-		.name = "hs_reg",
-		.mode = 0644,
-	},
-	.size  = MV64X60_VAL_LEN_MAX,
-	.read  = mv64x60_hs_reg_read,
-	.write = mv64x60_hs_reg_write,
-};
-
-static int __init mv64x60_sysfs_init(void)
-{
-	struct device_node *np;
-	struct platform_device *pdev;
-	const unsigned int *prop;
-
-	np = of_find_compatible_node(NULL, NULL, "marvell,mv64360");
-	if (!np)
-		return 0;
-
-	prop = of_get_property(np, "hs_reg_valid", NULL);
-	of_node_put(np);
-
-	pdev = platform_device_register_simple("marvell,mv64360", 0, NULL, 0);
-	if (IS_ERR(pdev))
-		return PTR_ERR(pdev);
-
-	return sysfs_create_bin_file(&pdev->dev.kobj, &mv64x60_hs_reg_attr);
-}
-
-subsys_initcall(mv64x60_sysfs_init);
-
-#endif /* CONFIG_SYSFS */
-
-static void mv64x60_pci_fixup_early(struct pci_dev *dev)
-{
-	/*
-	 * Set the host bridge hdr_type to an invalid value so that
-	 * pci_setup_device() will ignore the host bridge.
-	 */
-	dev->hdr_type = PCI_HEADER_TYPE_INVALID;
-}
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_MARVELL, PCI_DEVICE_ID_MARVELL_MV64360,
-			mv64x60_pci_fixup_early);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_MARVELL, PCI_DEVICE_ID_MARVELL_MV64460,
-			mv64x60_pci_fixup_early);
-
-static int __init mv64x60_add_bridge(struct device_node *dev)
-{
-	int len;
-	struct pci_controller *hose;
-	struct resource rsrc;
-	const int *bus_range;
-	int primary;
-
-	memset(&rsrc, 0, sizeof(rsrc));
-
-	/* Fetch host bridge registers address */
-	if (of_address_to_resource(dev, 0, &rsrc)) {
-		printk(KERN_ERR "No PCI reg property in device tree\n");
-		return -ENODEV;
-	}
-
-	/* Get bus range if any */
-	bus_range = of_get_property(dev, "bus-range", &len);
-	if (bus_range == NULL || len < 2 * sizeof(int))
-		printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
-		       " bus 0\n", dev);
-
-	hose = pcibios_alloc_controller(dev);
-	if (!hose)
-		return -ENOMEM;
-
-	hose->first_busno = bus_range ? bus_range[0] : 0;
-	hose->last_busno = bus_range ? bus_range[1] : 0xff;
-
-	setup_indirect_pci(hose, rsrc.start, rsrc.start + 4, 0);
-	hose->self_busno = hose->first_busno;
-
-	printk(KERN_INFO "Found MV64x60 PCI host bridge at 0x%016llx. "
-	       "Firmware bus number: %d->%d\n",
-	       (unsigned long long)rsrc.start, hose->first_busno,
-	       hose->last_busno);
-
-	/* Interpret the "ranges" property */
-	/* This also maps the I/O region and sets isa_io/mem_base */
-	primary = (hose->first_busno == 0);
-	pci_process_bridge_OF_ranges(hose, dev, primary);
-
-	return 0;
-}
-
-void __init mv64x60_pci_init(void)
-{
-	struct device_node *np;
-
-	for_each_compatible_node(np, "pci", "marvell,mv64360-pci")
-		mv64x60_add_bridge(np);
-}
diff --git a/arch/powerpc/sysdev/mv64x60_pic.c b/arch/powerpc/sysdev/mv64x60_pic.c
deleted file mode 100644
index a79953deb4896..0000000000000
--- a/arch/powerpc/sysdev/mv64x60_pic.c
+++ /dev/null
@@ -1,297 +0,0 @@
-/*
- * Interrupt handling for Marvell mv64360/mv64460 host bridges (Discovery)
- *
- * Author: Dale Farnsworth <dale@farnsworth.org>
- *
- * 2007 (c) MontaVista, Software, Inc.  This file is licensed under
- * the terms of the GNU General Public License version 2.  This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- */
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-#include <linux/spinlock.h>
-
-#include <asm/byteorder.h>
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/irq.h>
-
-#include "mv64x60.h"
-
-/* Interrupt Controller Interface Registers */
-#define MV64X60_IC_MAIN_CAUSE_LO	0x0004
-#define MV64X60_IC_MAIN_CAUSE_HI	0x000c
-#define MV64X60_IC_CPU0_INTR_MASK_LO	0x0014
-#define MV64X60_IC_CPU0_INTR_MASK_HI	0x001c
-#define MV64X60_IC_CPU0_SELECT_CAUSE	0x0024
-
-#define MV64X60_HIGH_GPP_GROUPS		0x0f000000
-#define MV64X60_SELECT_CAUSE_HIGH	0x40000000
-
-/* General Purpose Pins Controller Interface Registers */
-#define MV64x60_GPP_INTR_CAUSE		0x0008
-#define MV64x60_GPP_INTR_MASK		0x000c
-
-#define MV64x60_LEVEL1_LOW		0
-#define MV64x60_LEVEL1_HIGH		1
-#define MV64x60_LEVEL1_GPP		2
-
-#define MV64x60_LEVEL1_MASK		0x00000060
-#define MV64x60_LEVEL1_OFFSET		5
-
-#define MV64x60_LEVEL2_MASK		0x0000001f
-
-#define MV64x60_NUM_IRQS		96
-
-static DEFINE_SPINLOCK(mv64x60_lock);
-
-static void __iomem *mv64x60_irq_reg_base;
-static void __iomem *mv64x60_gpp_reg_base;
-
-/*
- * Interrupt Controller Handling
- *
- * The interrupt controller handles three groups of interrupts:
- *   main low:	IRQ0-IRQ31
- *   main high:	IRQ32-IRQ63
- *   gpp:	IRQ64-IRQ95
- *
- * This code handles interrupts in two levels.  Level 1 selects the
- * interrupt group, and level 2 selects an IRQ within that group.
- * Each group has its own irq_chip structure.
- */
-
-static u32 mv64x60_cached_low_mask;
-static u32 mv64x60_cached_high_mask = MV64X60_HIGH_GPP_GROUPS;
-static u32 mv64x60_cached_gpp_mask;
-
-static struct irq_domain *mv64x60_irq_host;
-
-/*
- * mv64x60_chip_low functions
- */
-
-static void mv64x60_mask_low(struct irq_data *d)
-{
-	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
-	unsigned long flags;
-
-	spin_lock_irqsave(&mv64x60_lock, flags);
-	mv64x60_cached_low_mask &= ~(1 << level2);
-	out_le32(mv64x60_irq_reg_base + MV64X60_IC_CPU0_INTR_MASK_LO,
-		 mv64x60_cached_low_mask);
-	spin_unlock_irqrestore(&mv64x60_lock, flags);
-	(void)in_le32(mv64x60_irq_reg_base + MV64X60_IC_CPU0_INTR_MASK_LO);
-}
-
-static void mv64x60_unmask_low(struct irq_data *d)
-{
-	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
-	unsigned long flags;
-
-	spin_lock_irqsave(&mv64x60_lock, flags);
-	mv64x60_cached_low_mask |= 1 << level2;
-	out_le32(mv64x60_irq_reg_base + MV64X60_IC_CPU0_INTR_MASK_LO,
-		 mv64x60_cached_low_mask);
-	spin_unlock_irqrestore(&mv64x60_lock, flags);
-	(void)in_le32(mv64x60_irq_reg_base + MV64X60_IC_CPU0_INTR_MASK_LO);
-}
-
-static struct irq_chip mv64x60_chip_low = {
-	.name		= "mv64x60_low",
-	.irq_mask	= mv64x60_mask_low,
-	.irq_mask_ack	= mv64x60_mask_low,
-	.irq_unmask	= mv64x60_unmask_low,
-};
-
-/*
- * mv64x60_chip_high functions
- */
-
-static void mv64x60_mask_high(struct irq_data *d)
-{
-	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
-	unsigned long flags;
-
-	spin_lock_irqsave(&mv64x60_lock, flags);
-	mv64x60_cached_high_mask &= ~(1 << level2);
-	out_le32(mv64x60_irq_reg_base + MV64X60_IC_CPU0_INTR_MASK_HI,
-		 mv64x60_cached_high_mask);
-	spin_unlock_irqrestore(&mv64x60_lock, flags);
-	(void)in_le32(mv64x60_irq_reg_base + MV64X60_IC_CPU0_INTR_MASK_HI);
-}
-
-static void mv64x60_unmask_high(struct irq_data *d)
-{
-	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
-	unsigned long flags;
-
-	spin_lock_irqsave(&mv64x60_lock, flags);
-	mv64x60_cached_high_mask |= 1 << level2;
-	out_le32(mv64x60_irq_reg_base + MV64X60_IC_CPU0_INTR_MASK_HI,
-		 mv64x60_cached_high_mask);
-	spin_unlock_irqrestore(&mv64x60_lock, flags);
-	(void)in_le32(mv64x60_irq_reg_base + MV64X60_IC_CPU0_INTR_MASK_HI);
-}
-
-static struct irq_chip mv64x60_chip_high = {
-	.name		= "mv64x60_high",
-	.irq_mask	= mv64x60_mask_high,
-	.irq_mask_ack	= mv64x60_mask_high,
-	.irq_unmask	= mv64x60_unmask_high,
-};
-
-/*
- * mv64x60_chip_gpp functions
- */
-
-static void mv64x60_mask_gpp(struct irq_data *d)
-{
-	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
-	unsigned long flags;
-
-	spin_lock_irqsave(&mv64x60_lock, flags);
-	mv64x60_cached_gpp_mask &= ~(1 << level2);
-	out_le32(mv64x60_gpp_reg_base + MV64x60_GPP_INTR_MASK,
-		 mv64x60_cached_gpp_mask);
-	spin_unlock_irqrestore(&mv64x60_lock, flags);
-	(void)in_le32(mv64x60_gpp_reg_base + MV64x60_GPP_INTR_MASK);
-}
-
-static void mv64x60_mask_ack_gpp(struct irq_data *d)
-{
-	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
-	unsigned long flags;
-
-	spin_lock_irqsave(&mv64x60_lock, flags);
-	mv64x60_cached_gpp_mask &= ~(1 << level2);
-	out_le32(mv64x60_gpp_reg_base + MV64x60_GPP_INTR_MASK,
-		 mv64x60_cached_gpp_mask);
-	out_le32(mv64x60_gpp_reg_base + MV64x60_GPP_INTR_CAUSE,
-		 ~(1 << level2));
-	spin_unlock_irqrestore(&mv64x60_lock, flags);
-	(void)in_le32(mv64x60_gpp_reg_base + MV64x60_GPP_INTR_CAUSE);
-}
-
-static void mv64x60_unmask_gpp(struct irq_data *d)
-{
-	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
-	unsigned long flags;
-
-	spin_lock_irqsave(&mv64x60_lock, flags);
-	mv64x60_cached_gpp_mask |= 1 << level2;
-	out_le32(mv64x60_gpp_reg_base + MV64x60_GPP_INTR_MASK,
-		 mv64x60_cached_gpp_mask);
-	spin_unlock_irqrestore(&mv64x60_lock, flags);
-	(void)in_le32(mv64x60_gpp_reg_base + MV64x60_GPP_INTR_MASK);
-}
-
-static struct irq_chip mv64x60_chip_gpp = {
-	.name		= "mv64x60_gpp",
-	.irq_mask	= mv64x60_mask_gpp,
-	.irq_mask_ack	= mv64x60_mask_ack_gpp,
-	.irq_unmask	= mv64x60_unmask_gpp,
-};
-
-/*
- * mv64x60_host_ops functions
- */
-
-static struct irq_chip *mv64x60_chips[] = {
-	[MV64x60_LEVEL1_LOW]  = &mv64x60_chip_low,
-	[MV64x60_LEVEL1_HIGH] = &mv64x60_chip_high,
-	[MV64x60_LEVEL1_GPP]  = &mv64x60_chip_gpp,
-};
-
-static int mv64x60_host_map(struct irq_domain *h, unsigned int virq,
-			  irq_hw_number_t hwirq)
-{
-	int level1;
-
-	irq_set_status_flags(virq, IRQ_LEVEL);
-
-	level1 = (hwirq & MV64x60_LEVEL1_MASK) >> MV64x60_LEVEL1_OFFSET;
-	BUG_ON(level1 > MV64x60_LEVEL1_GPP);
-	irq_set_chip_and_handler(virq, mv64x60_chips[level1],
-				 handle_level_irq);
-
-	return 0;
-}
-
-static const struct irq_domain_ops mv64x60_host_ops = {
-	.map   = mv64x60_host_map,
-};
-
-/*
- * Global functions
- */
-
-void __init mv64x60_init_irq(void)
-{
-	struct device_node *np;
-	phys_addr_t paddr;
-	unsigned int size;
-	const unsigned int *reg;
-	unsigned long flags;
-
-	np = of_find_compatible_node(NULL, NULL, "marvell,mv64360-gpp");
-	reg = of_get_property(np, "reg", &size);
-	paddr = of_translate_address(np, reg);
-	mv64x60_gpp_reg_base = ioremap(paddr, reg[1]);
-	of_node_put(np);
-
-	np = of_find_compatible_node(NULL, NULL, "marvell,mv64360-pic");
-	reg = of_get_property(np, "reg", &size);
-	paddr = of_translate_address(np, reg);
-	mv64x60_irq_reg_base = ioremap(paddr, reg[1]);
-
-	mv64x60_irq_host = irq_domain_add_linear(np, MV64x60_NUM_IRQS,
-					  &mv64x60_host_ops, NULL);
-
-	spin_lock_irqsave(&mv64x60_lock, flags);
-	out_le32(mv64x60_gpp_reg_base + MV64x60_GPP_INTR_MASK,
-		 mv64x60_cached_gpp_mask);
-	out_le32(mv64x60_irq_reg_base + MV64X60_IC_CPU0_INTR_MASK_LO,
-		 mv64x60_cached_low_mask);
-	out_le32(mv64x60_irq_reg_base + MV64X60_IC_CPU0_INTR_MASK_HI,
-		 mv64x60_cached_high_mask);
-
-	out_le32(mv64x60_gpp_reg_base + MV64x60_GPP_INTR_CAUSE, 0);
-	out_le32(mv64x60_irq_reg_base + MV64X60_IC_MAIN_CAUSE_LO, 0);
-	out_le32(mv64x60_irq_reg_base + MV64X60_IC_MAIN_CAUSE_HI, 0);
-	spin_unlock_irqrestore(&mv64x60_lock, flags);
-}
-
-unsigned int mv64x60_get_irq(void)
-{
-	u32 cause;
-	int level1;
-	irq_hw_number_t hwirq;
-	int virq = 0;
-
-	cause = in_le32(mv64x60_irq_reg_base + MV64X60_IC_CPU0_SELECT_CAUSE);
-	if (cause & MV64X60_SELECT_CAUSE_HIGH) {
-		cause &= mv64x60_cached_high_mask;
-		level1 = MV64x60_LEVEL1_HIGH;
-		if (cause & MV64X60_HIGH_GPP_GROUPS) {
-			cause = in_le32(mv64x60_gpp_reg_base +
-					MV64x60_GPP_INTR_CAUSE);
-			cause &= mv64x60_cached_gpp_mask;
-			level1 = MV64x60_LEVEL1_GPP;
-		}
-	} else {
-		cause &= mv64x60_cached_low_mask;
-		level1 = MV64x60_LEVEL1_LOW;
-	}
-	if (cause) {
-		hwirq = (level1 << MV64x60_LEVEL1_OFFSET) | __ilog2(cause);
-		virq = irq_linear_revmap(mv64x60_irq_host, hwirq);
-	}
-
-	return virq;
-}
diff --git a/arch/powerpc/sysdev/mv64x60_udbg.c b/arch/powerpc/sysdev/mv64x60_udbg.c
deleted file mode 100644
index 3b8734b870e9c..0000000000000
--- a/arch/powerpc/sysdev/mv64x60_udbg.c
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * udbg serial input/output routines for the Marvell MV64x60 (Discovery).
- *
- * Author: Dale Farnsworth <dale@farnsworth.org>
- *
- * 2007 (c) MontaVista Software, Inc.  This file is licensed under
- * the terms of the GNU General Public License version 2.  This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- */
-
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-
-#include <sysdev/mv64x60.h>
-
-#define MPSC_0_CR1_OFFSET	0x000c
-
-#define MPSC_0_CR2_OFFSET	0x0010
-#define MPSC_CHR_2_TCS		(1 << 9)
-
-#define MPSC_0_CHR_10_OFFSET	0x0030
-
-#define MPSC_INTR_CAUSE_OFF_0	0x0004
-#define MPSC_INTR_CAUSE_OFF_1	0x000c
-#define MPSC_INTR_CAUSE_RCC	(1<<6)
-
-static void __iomem *mpsc_base;
-static void __iomem *mpsc_intr_cause;
-
-static void mv64x60_udbg_putc(char c)
-{
-	if (c == '\n')
-		mv64x60_udbg_putc('\r');
-
-	while(in_le32(mpsc_base + MPSC_0_CR2_OFFSET) & MPSC_CHR_2_TCS)
-		;
-	out_le32(mpsc_base + MPSC_0_CR1_OFFSET, c);
-	out_le32(mpsc_base + MPSC_0_CR2_OFFSET, MPSC_CHR_2_TCS);
-}
-
-static int mv64x60_udbg_testc(void)
-{
-	return (in_le32(mpsc_intr_cause) & MPSC_INTR_CAUSE_RCC) != 0;
-}
-
-static int mv64x60_udbg_getc(void)
-{
-	int cause = 0;
-	int c;
-
-	while (!mv64x60_udbg_testc())
-		;
-
-	c = in_8(mpsc_base + MPSC_0_CHR_10_OFFSET + 2);
-	out_8(mpsc_base + MPSC_0_CHR_10_OFFSET + 2, c);
-	out_le32(mpsc_intr_cause, cause & ~MPSC_INTR_CAUSE_RCC);
-	return c;
-}
-
-static int mv64x60_udbg_getc_poll(void)
-{
-	if (!mv64x60_udbg_testc())
-		return -1;
-
-	return mv64x60_udbg_getc();
-}
-
-static void mv64x60_udbg_init(void)
-{
-	struct device_node *np, *mpscintr, *stdout = NULL;
-	const char *path;
-	const phandle *ph;
-	struct resource r[2];
-	const int *block_index;
-	int intr_cause_offset;
-	int err;
-
-	path = of_get_property(of_chosen, "linux,stdout-path", NULL);
-	if (!path)
-		return;
-
-	stdout = of_find_node_by_path(path);
-	if (!stdout)
-		return;
-
-	for_each_compatible_node(np, NULL, "marvell,mv64360-mpsc") {
-		if (np == stdout)
-			break;
-	}
-
-	of_node_put(stdout);
-	if (!np)
-		return;
-
-	block_index = of_get_property(np, "cell-index", NULL);
-	if (!block_index)
-		goto error;
-
-	switch (*block_index) {
-	case 0:
-		intr_cause_offset = MPSC_INTR_CAUSE_OFF_0;
-		break;
-	case 1:
-		intr_cause_offset = MPSC_INTR_CAUSE_OFF_1;
-		break;
-	default:
-		goto error;
-	}
-
-	err = of_address_to_resource(np, 0, &r[0]);
-	if (err)
-		goto error;
-
-	ph = of_get_property(np, "mpscintr", NULL);
-	mpscintr = of_find_node_by_phandle(*ph);
-	if (!mpscintr)
-		goto error;
-
-	err = of_address_to_resource(mpscintr, 0, &r[1]);
-	of_node_put(mpscintr);
-	if (err)
-		goto error;
-
-	of_node_put(np);
-
-	mpsc_base = ioremap(r[0].start, resource_size(&r[0]));
-	if (!mpsc_base)
-		return;
-
-	mpsc_intr_cause = ioremap(r[1].start, resource_size(&r[1]));
-	if (!mpsc_intr_cause) {
-		iounmap(mpsc_base);
-		return;
-	}
-	mpsc_intr_cause += intr_cause_offset;
-
-	udbg_putc = mv64x60_udbg_putc;
-	udbg_getc = mv64x60_udbg_getc;
-	udbg_getc_poll = mv64x60_udbg_getc_poll;
-
-	return;
-
-error:
-	of_node_put(np);
-}
-
-void mv64x60_init_early(void)
-{
-	mv64x60_udbg_init();
-}

From 75743649064ec0cf5ddd69f240ef23af66dde16e Mon Sep 17 00:00:00 2001
From: Haren Myneni <haren@us.ibm.com>
Date: Mon, 4 Jun 2018 18:33:38 +1000
Subject: [PATCH 244/251] powerpc/powernv: copy/paste - Mask SO bit in CR

NX can set the 3rd bit in CR register for XER[SO] (Summary overflow)
which is not related to paste request. The current paste function
returns failure for a successful request when this bit is set. So mask
this bit and check the proper return status.

Fixes: 2392c8c8c045 ("powerpc/powernv/vas: Define copy/paste interfaces")
Cc: stable@vger.kernel.org # v4.14+
Signed-off-by: Haren Myneni <haren@us.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/copy-paste.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/copy-paste.h b/arch/powerpc/platforms/powernv/copy-paste.h
index 3fa62de96d9ca..cb36f9fbcef3e 100644
--- a/arch/powerpc/platforms/powernv/copy-paste.h
+++ b/arch/powerpc/platforms/powernv/copy-paste.h
@@ -41,5 +41,6 @@ static inline int vas_paste(void *paste_address, int offset)
 		: "b" (offset), "b" (paste_address)
 		: "memory", "cr0");
 
-	return (cr >> CR0_SHIFT) & CR0_MASK;
+	/* We mask with 0xE to ignore SO */
+	return (cr >> CR0_SHIFT) & 0xE;
 }

From eabdb8ca8690eedd461e61ea7780595fbbae8132 Mon Sep 17 00:00:00 2001
From: Ram Pai <linuxram@us.ibm.com>
Date: Fri, 4 May 2018 13:01:51 -0700
Subject: [PATCH 245/251] powerpc/pkeys: Detach execute_only key on !PROT_EXEC

Disassociate the exec_key from a VMA if the VMA permission is not
PROT_EXEC anymore. Otherwise the exec_only key continues to be
associated with the vma, causing unexpected behavior.

The problem was reported on x86 by Shakeel Butt, which is also
applicable on powerpc.

Fixes: 5586cf61e108 ("powerpc: introduce execute-only pkey")
Cc: stable@vger.kernel.org # v4.16+
Reported-by: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
Reviewed-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/pkeys.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c
index 0eafdf01edc7d..e6f500fabf5ea 100644
--- a/arch/powerpc/mm/pkeys.c
+++ b/arch/powerpc/mm/pkeys.c
@@ -383,9 +383,9 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot,
 {
 	/*
 	 * If the currently associated pkey is execute-only, but the requested
-	 * protection requires read or write, move it back to the default pkey.
+	 * protection is not execute-only, move it back to the default pkey.
 	 */
-	if (vma_is_pkey_exec_only(vma) && (prot & (PROT_READ | PROT_WRITE)))
+	if (vma_is_pkey_exec_only(vma) && (prot != PROT_EXEC))
 		return 0;
 
 	/*

From e6536a9f94c00a5e1da0a869ccea07e9a59665e4 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 30 Mar 2018 16:55:53 +0100
Subject: [PATCH 246/251] powerpc: fix spelling mistake: "Usupported" ->
 "Unsupported"

Trivial fix to spelling mistake in bootx_printf message text

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powermac/bootx_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index ba0964c176208..3b3b0b9b35770 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -519,7 +519,7 @@ void __init bootx_init(unsigned long r3, unsigned long r4)
 			;
 	}
 	if (bi->architecture != BOOT_ARCH_PCI) {
-		bootx_printf(" !!! WARNING - Usupported machine"
+		bootx_printf(" !!! WARNING - Unsupported machine"
 			     " architecture !\n");
 		for (;;)
 			;

From b0c4acb1dd7c653f78391661079af544429b1f22 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sat, 26 May 2018 16:15:31 +0100
Subject: [PATCH 247/251] powerpc-opal: fix spelling mistake "Uniterrupted" ->
 "Uninterrupted"

Trivial fix to spelling mistake in hmi_error_types text

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Stewart Smith <stewart@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/opal-hmi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c
index 4efc95b4c7d41..586ec71a4e175 100644
--- a/arch/powerpc/platforms/powernv/opal-hmi.c
+++ b/arch/powerpc/platforms/powernv/opal-hmi.c
@@ -177,7 +177,7 @@ static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt)
 		"Processor recovery occurred for masked error",
 		"Timer facility experienced an error",
 		"TFMR SPR is corrupted",
-		"UPS (Uniterrupted Power System) Overflow indication",
+		"UPS (Uninterrupted Power System) Overflow indication",
 		"An XSCOM operation failure",
 		"An XSCOM operation completed",
 		"SCOM has set a reserved FIR bit to cause recovery",

From 2e5c93d6bb2f7bc17eb82748943a1b9f6b068520 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Tue, 5 Jun 2018 09:16:21 +0000
Subject: [PATCH 248/251] ocxl: Fix missing unlock on error in
 afu_ioctl_enable_p9_wait()

Add the missing unlock before return from function
afu_ioctl_enable_p9_wait() in the error handling case.

Fixes: e948e06fc63a ("ocxl: Expose the thread_id needed for wait on POWER9")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Alastair D'Silva <alastair@d-silva.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/misc/ocxl/file.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
index 33ae46ce0a8ae..e6a607488f8af 100644
--- a/drivers/misc/ocxl/file.c
+++ b/drivers/misc/ocxl/file.c
@@ -139,8 +139,10 @@ static long afu_ioctl_enable_p9_wait(struct ocxl_context *ctx,
 		// Locks both status & tidr
 		mutex_lock(&ctx->status_mutex);
 		if (!ctx->tidr) {
-			if (set_thread_tidr(current))
+			if (set_thread_tidr(current)) {
+				mutex_unlock(&ctx->status_mutex);
 				return -ENOENT;
+			}
 
 			ctx->tidr = current->thread.tidr;
 		}

From 415520373975d2eba565c256d2cad875ed4e9243 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Tue, 5 Jun 2018 06:57:43 +0000
Subject: [PATCH 249/251] powerpc: fix build failure by disabling
 attribute-alias warning in pci_32

Commit 2479bfc9bc600 ("powerpc: Fix build by disabling attribute-alias
warning for SYSCALL_DEFINEx") forgot arch/powerpc/kernel/pci_32.c

Latest GCC version emit the following warnings

As arch/powerpc code is built with -Werror, this breaks build with
GCC 8.1

This patch inhibits this warning

In file included from arch/powerpc/kernel/pci_32.c:14:
./include/linux/syscalls.h:233:18: error: 'sys_pciconfig_iobase' alias between functions of incompatible types 'long int(long int,  long unsigned int,  long unsigned int)' and 'long int(long int,  long int,  long int)' [-Werror=attribute-alias]
  asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
                  ^~~
./include/linux/syscalls.h:222:2: note: in expansion of macro '__SYSCALL_DEFINEx'
  __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
  ^~~~~~~~~~~~~~~~~

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/pci_32.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index d63b488d34d79..4f861055a8521 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -285,6 +285,9 @@ pci_bus_to_hose(int bus)
  * Note that the returned IO or memory base is a physical address
  */
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpragmas"
+#pragma GCC diagnostic ignored "-Wattribute-alias"
 SYSCALL_DEFINE3(pciconfig_iobase, long, which,
 		unsigned long, bus, unsigned long, devfn)
 {
@@ -310,3 +313,4 @@ SYSCALL_DEFINE3(pciconfig_iobase, long, which,
 
 	return result;
 }
+#pragma GCC diagnostic pop

From 0a4ec6aa035a52c422eceb2ed51ed88392a3d6c2 Mon Sep 17 00:00:00 2001
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
Date: Thu, 31 May 2018 17:45:09 +0530
Subject: [PATCH 250/251] cpuidle: powernv: Fix promotion from snooze if next
 state disabled

The commit 78eaa10f027c ("cpuidle: powernv/pseries: Auto-promotion of
snooze to deeper idle state") introduced a timeout for the snooze idle
state so that it could be eventually be promoted to a deeper idle
state. The snooze timeout value is static and set to the target
residency of the next idle state, which would train the cpuidle
governor to pick the next idle state eventually.

The unfortunate side-effect of this is that if the next idle state(s)
is disabled, the CPU will forever remain in snooze, despite the fact
that the system is completely idle, and other deeper idle states are
available.

This patch fixes the issue by dynamically setting the snooze timeout
to the target residency of the next enabled state on the device.

Before Patch:
  POWER8 : Only nap disabled.
  $ cpupower monitor sleep 30
  sleep took 30.01297 seconds and exited with status 0
                |Idle_Stats
  PKG |CORE|CPU | snoo | Nap  | Fast
     0|   8|   0| 96.41|  0.00|  0.00
     0|   8|   1| 96.43|  0.00|  0.00
     0|   8|   2| 96.47|  0.00|  0.00
     0|   8|   3| 96.35|  0.00|  0.00
     0|   8|   4| 96.37|  0.00|  0.00
     0|   8|   5| 96.37|  0.00|  0.00
     0|   8|   6| 96.47|  0.00|  0.00
     0|   8|   7| 96.47|  0.00|  0.00

  POWER9: Shallow states (stop0lite, stop1lite, stop2lite, stop0, stop1,
  stop2) disabled:
  $ cpupower monitor sleep 30
  sleep took 30.05033 seconds and exited with status 0
                |Idle_Stats
  PKG |CORE|CPU | snoo | stop | stop | stop | stop | stop | stop | stop | stop
     0|  16|   0| 89.79|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00
     0|  16|   1| 90.12|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00
     0|  16|   2| 90.21|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00
     0|  16|   3| 90.29|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00

After Patch:
  POWER8 : Only nap disabled.
  $ cpupower monitor sleep 30
  sleep took 30.01200 seconds and exited with status 0
                |Idle_Stats
  PKG |CORE|CPU | snoo | Nap  | Fast
     0|   8|   0| 16.58|  0.00| 77.21
     0|   8|   1| 18.42|  0.00| 75.38
     0|   8|   2|  4.70|  0.00| 94.09
     0|   8|   3| 17.06|  0.00| 81.73
     0|   8|   4|  3.06|  0.00| 95.73
     0|   8|   5|  7.00|  0.00| 96.80
     0|   8|   6|  1.00|  0.00| 98.79
     0|   8|   7|  5.62|  0.00| 94.17

  POWER9: Shallow states (stop0lite, stop1lite, stop2lite, stop0, stop1,
  stop2) disabled:

  $ cpupower monitor sleep 30
  sleep took 30.02110 seconds and exited with status 0
                |Idle_Stats
  PKG |CORE|CPU | snoo | stop | stop | stop | stop | stop | stop | stop | stop
     0|   0|   0|  0.69|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  9.39| 89.70
     0|   0|   1|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.05| 93.21
     0|   0|   2|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00| 89.93
     0|   0|   3|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00| 93.26

Fixes: 78eaa10f027c ("cpuidle: powernv/pseries: Auto-promotion of snooze to deeper idle state")
Cc: stable@vger.kernel.org # v4.2+
Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Reviewed-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/cpuidle/cpuidle-powernv.c | 32 +++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 1a8234e706bc7..d29e4f041efe6 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -43,9 +43,31 @@ struct stop_psscr_table {
 
 static struct stop_psscr_table stop_psscr_table[CPUIDLE_STATE_MAX] __read_mostly;
 
-static u64 snooze_timeout __read_mostly;
+static u64 default_snooze_timeout __read_mostly;
 static bool snooze_timeout_en __read_mostly;
 
+static u64 get_snooze_timeout(struct cpuidle_device *dev,
+			      struct cpuidle_driver *drv,
+			      int index)
+{
+	int i;
+
+	if (unlikely(!snooze_timeout_en))
+		return default_snooze_timeout;
+
+	for (i = index + 1; i < drv->state_count; i++) {
+		struct cpuidle_state *s = &drv->states[i];
+		struct cpuidle_state_usage *su = &dev->states_usage[i];
+
+		if (s->disabled || su->disable)
+			continue;
+
+		return s->target_residency * tb_ticks_per_usec;
+	}
+
+	return default_snooze_timeout;
+}
+
 static int snooze_loop(struct cpuidle_device *dev,
 			struct cpuidle_driver *drv,
 			int index)
@@ -56,7 +78,7 @@ static int snooze_loop(struct cpuidle_device *dev,
 
 	local_irq_enable();
 
-	snooze_exit_time = get_tb() + snooze_timeout;
+	snooze_exit_time = get_tb() + get_snooze_timeout(dev, drv, index);
 	ppc64_runlatch_off();
 	HMT_very_low();
 	while (!need_resched()) {
@@ -465,11 +487,9 @@ static int powernv_idle_probe(void)
 		cpuidle_state_table = powernv_states;
 		/* Device tree can indicate more idle states */
 		max_idle_state = powernv_add_idle_states();
-		if (max_idle_state > 1) {
+		default_snooze_timeout = TICK_USEC * tb_ticks_per_usec;
+		if (max_idle_state > 1)
 			snooze_timeout_en = true;
-			snooze_timeout = powernv_states[1].target_residency *
-					 tb_ticks_per_usec;
-		}
  	} else
  		return -ENODEV;
 

From ff5bc793e47b537bf3e904fada585e102c54dd8b Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 6 Jun 2018 11:40:08 +1000
Subject: [PATCH 251/251] powerpc/64s/radix: Fix missing ptesync in
 flush_cache_vmap

There is a typo in f1cb8f9beb ("powerpc/64s/radix: avoid ptesync after
set_pte and ptep_set_access_flags") config ifdef, which results in the
necessary ptesync not being issued after vmalloc.

This causes random kernel faults in module load, bpf load, anywhere
that vmalloc mappings are used.

After correcting the code, this survives a guest kernel booting
hundreds of times where previously there would be a crash every few
boots (I haven't noticed the crash on host, perhaps due to different
TLB and page table walking behaviour in hardware).

A memory clobber is also added to the flush, just to be sure it won't
be reordered with the pte set or the subsequent mapping access.

Fixes: f1cb8f9beb ("powerpc/64s/radix: avoid ptesync after set_pte and ptep_set_access_flags")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/cacheflush.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index e9662648e72d7..0d72ec75da631 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -23,10 +23,9 @@
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_icache_page(vma, page)		do { } while (0)
-#define flush_cache_vmap(start, end)		do { } while (0)
 #define flush_cache_vunmap(start, end)		do { } while (0)
 
-#ifdef CONFIG_BOOK3S_64
+#ifdef CONFIG_PPC_BOOK3S_64
 /*
  * Book3s has no ptesync after setting a pte, so without this ptesync it's
  * possible for a kernel virtual mapping access to return a spurious fault
@@ -34,7 +33,7 @@
  * not expect this type of fault. flush_cache_vmap is not exactly the right
  * place to put this, but it seems to work well enough.
  */
-#define flush_cache_vmap(start, end)		do { asm volatile("ptesync"); } while (0)
+#define flush_cache_vmap(start, end)		do { asm volatile("ptesync" ::: "memory"); } while (0)
 #else
 #define flush_cache_vmap(start, end)		do { } while (0)
 #endif