From 81cddd65b5c82758ea5571a25e31ff6f1f89ff02 Mon Sep 17 00:00:00 2001
From: Kristina Martsenko <kristina.martsenko@arm.com>
Date: Wed, 3 May 2017 16:37:45 +0100
Subject: [PATCH 01/13] arm64: traps: fix userspace cache maintenance emulation
 on a tagged pointer

When we emulate userspace cache maintenance in the kernel, we can
currently send the task a SIGSEGV even though the maintenance was done
on a valid address. This happens if the address has a non-zero address
tag, and happens to not be mapped in.

When we get the address from a user register, we don't currently remove
the address tag before performing cache maintenance on it. If the
maintenance faults, we end up in either __do_page_fault, where find_vma
can't find the VMA if the address has a tag, or in do_translation_fault,
where the tagged address will appear to be above TASK_SIZE. In both
cases, the address is not mapped in, and the task is sent a SIGSEGV.

This patch removes the tag from the address before using it. With this
patch, the fault is handled correctly, the address gets mapped in, and
the cache maintenance succeeds.

As a second bug, if cache maintenance (correctly) fails on an invalid
tagged address, the address gets passed into arm64_notify_segfault,
where find_vma fails to find the VMA due to the tag, and the wrong
si_code may be sent as part of the siginfo_t of the segfault. With this
patch, the correct si_code is sent.

Fixes: 7dd01aef0557 ("arm64: trap userspace "dc cvau" cache operation on errata-affected core")
Cc: <stable@vger.kernel.org> # 4.8.x-
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/traps.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index d4d6ae02cd558..0805b44f986a5 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -443,7 +443,7 @@ int cpu_enable_cache_maint_trap(void *__unused)
 }
 
 #define __user_cache_maint(insn, address, res)			\
-	if (untagged_addr(address) >= user_addr_max()) {	\
+	if (address >= user_addr_max()) {			\
 		res = -EFAULT;					\
 	} else {						\
 		uaccess_ttbr0_enable();				\
@@ -469,7 +469,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
 	int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT;
 	int ret = 0;
 
-	address = pt_regs_read_reg(regs, rt);
+	address = untagged_addr(pt_regs_read_reg(regs, rt));
 
 	switch (crm) {
 	case ESR_ELx_SYS64_ISS_CRM_DC_CVAU:	/* DC CVAU, gets promoted */

From 7dcd9dd8cebe9fa626af7e2358d03a37041a70fb Mon Sep 17 00:00:00 2001
From: Kristina Martsenko <kristina.martsenko@arm.com>
Date: Wed, 3 May 2017 16:37:46 +0100
Subject: [PATCH 02/13] arm64: hw_breakpoint: fix watchpoint matching for
 tagged pointers

When we take a watchpoint exception, the address that triggered the
watchpoint is found in FAR_EL1. We compare it to the address of each
configured watchpoint to see which one was hit.

The configured watchpoint addresses are untagged, while the address in
FAR_EL1 will have an address tag if the data access was done using a
tagged address. The tag needs to be removed to compare the address to
the watchpoints.

Currently we don't remove it, and as a result can report the wrong
watchpoint as being hit (specifically, always either the highest TTBR0
watchpoint or lowest TTBR1 watchpoint). This patch removes the tag.

Fixes: d50240a5f6ce ("arm64: mm: permit use of tagged pointers at EL0")
Cc: <stable@vger.kernel.org> # 3.12.x-
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/uaccess.h  | 6 +++---
 arch/arm64/kernel/hw_breakpoint.c | 3 +++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 5308d696311bc..0221029e27ff7 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -106,9 +106,9 @@ static inline void set_fs(mm_segment_t fs)
 })
 
 /*
- * When dealing with data aborts or instruction traps we may end up with
- * a tagged userland pointer. Clear the tag to get a sane pointer to pass
- * on to access_ok(), for instance.
+ * When dealing with data aborts, watchpoints, or instruction traps we may end
+ * up with a tagged userland pointer. Clear the tag to get a sane pointer to
+ * pass on to access_ok(), for instance.
  */
 #define untagged_addr(addr)		sign_extend64(addr, 55)
 
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 0296e79242402..749f81779420c 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -36,6 +36,7 @@
 #include <asm/traps.h>
 #include <asm/cputype.h>
 #include <asm/system_misc.h>
+#include <asm/uaccess.h>
 
 /* Breakpoint currently in use for each BRP. */
 static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]);
@@ -721,6 +722,8 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val,
 	u64 wp_low, wp_high;
 	u32 lens, lene;
 
+	addr = untagged_addr(addr);
+
 	lens = __ffs(ctrl->len);
 	lene = __fls(ctrl->len);
 

From 276e93279a630657fff4b086ba14c95955912dfa Mon Sep 17 00:00:00 2001
From: Kristina Martsenko <kristina.martsenko@arm.com>
Date: Wed, 3 May 2017 16:37:47 +0100
Subject: [PATCH 03/13] arm64: entry: improve data abort handling of tagged
 pointers

When handling a data abort from EL0, we currently zero the top byte of
the faulting address, as we assume the address is a TTBR0 address, which
may contain a non-zero address tag. However, the address may be a TTBR1
address, in which case we should not zero the top byte. This patch fixes
that. The effect is that the full TTBR1 address is passed to the task's
signal handler (or printed out in the kernel log).

When handling a data abort from EL1, we leave the faulting address
intact, as we assume it's either a TTBR1 address or a TTBR0 address with
tag 0x00. This is true as far as I'm aware, we don't seem to access a
tagged TTBR0 address anywhere in the kernel. Regardless, it's easy to
forget about address tags, and code added in the future may not always
remember to remove tags from addresses before accessing them. So add tag
handling to the EL1 data abort handler as well. This also makes it
consistent with the EL0 data abort handler.

Fixes: d50240a5f6ce ("arm64: mm: permit use of tagged pointers at EL0")
Cc: <stable@vger.kernel.org> # 3.12.x-
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/asm-uaccess.h | 9 +++++++++
 arch/arm64/kernel/entry.S            | 5 +++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
index df411f3e083ce..ecd9788cd2988 100644
--- a/arch/arm64/include/asm/asm-uaccess.h
+++ b/arch/arm64/include/asm/asm-uaccess.h
@@ -62,4 +62,13 @@ alternative_if ARM64_ALT_PAN_NOT_UAO
 alternative_else_nop_endif
 	.endm
 
+/*
+ * Remove the address tag from a virtual address, if present.
+ */
+	.macro	clear_address_tag, dst, addr
+	tst	\addr, #(1 << 55)
+	bic	\dst, \addr, #(0xff << 56)
+	csel	\dst, \dst, \addr, eq
+	.endm
+
 #endif
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 43512d4d7df21..b738880350f9e 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -428,12 +428,13 @@ el1_da:
 	/*
 	 * Data abort handling
 	 */
-	mrs	x0, far_el1
+	mrs	x3, far_el1
 	enable_dbg
 	// re-enable interrupts if they were enabled in the aborted context
 	tbnz	x23, #7, 1f			// PSR_I_BIT
 	enable_irq
 1:
+	clear_address_tag x0, x3
 	mov	x2, sp				// struct pt_regs
 	bl	do_mem_abort
 
@@ -594,7 +595,7 @@ el0_da:
 	// enable interrupts before calling the main handler
 	enable_dbg_and_irq
 	ct_user_exit
-	bic	x0, x26, #(0xff << 56)
+	clear_address_tag x0, x26
 	mov	x1, x25
 	mov	x2, sp
 	bl	do_mem_abort

From f0e421b1bf7af97f026e1bb8bfe4c5a7a8c08f42 Mon Sep 17 00:00:00 2001
From: Kristina Martsenko <kristina.martsenko@arm.com>
Date: Wed, 3 May 2017 16:37:48 +0100
Subject: [PATCH 04/13] arm64: documentation: document tagged pointer stack
 constraints

Some kernel features don't currently work if a task puts a non-zero
address tag in its stack pointer, frame pointer, or frame record entries
(FP, LR).

For example, with a tagged stack pointer, the kernel can't deliver
signals to the process, and the task is killed instead. As another
example, with a tagged frame pointer or frame records, perf fails to
generate call graphs or resolve symbols.

For now, just document these limitations, instead of finding and fixing
everything that doesn't work, as it's not known if anyone needs to use
tags in these places anyway.

In addition, as requested by Dave Martin, generalize the limitations
into a general kernel address tag policy, and refactor
tagged-pointers.txt to include it.

Fixes: d50240a5f6ce ("arm64: mm: permit use of tagged pointers at EL0")
Cc: <stable@vger.kernel.org> # 3.12.x-
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 Documentation/arm64/tagged-pointers.txt | 62 +++++++++++++++++++------
 1 file changed, 47 insertions(+), 15 deletions(-)

diff --git a/Documentation/arm64/tagged-pointers.txt b/Documentation/arm64/tagged-pointers.txt
index d9995f1f51b3e..a25a99e82bb1c 100644
--- a/Documentation/arm64/tagged-pointers.txt
+++ b/Documentation/arm64/tagged-pointers.txt
@@ -11,24 +11,56 @@ in AArch64 Linux.
 The kernel configures the translation tables so that translations made
 via TTBR0 (i.e. userspace mappings) have the top byte (bits 63:56) of
 the virtual address ignored by the translation hardware. This frees up
-this byte for application use, with the following caveats:
+this byte for application use.
 
-	(1) The kernel requires that all user addresses passed to EL1
-	    are tagged with tag 0x00. This means that any syscall
-	    parameters containing user virtual addresses *must* have
-	    their top byte cleared before trapping to the kernel.
 
-	(2) Non-zero tags are not preserved when delivering signals.
-	    This means that signal handlers in applications making use
-	    of tags cannot rely on the tag information for user virtual
-	    addresses being maintained for fields inside siginfo_t.
-	    One exception to this rule is for signals raised in response
-	    to watchpoint debug exceptions, where the tag information
-	    will be preserved.
+Passing tagged addresses to the kernel
+--------------------------------------
 
-	(3) Special care should be taken when using tagged pointers,
-	    since it is likely that C compilers will not hazard two
-	    virtual addresses differing only in the upper byte.
+All interpretation of userspace memory addresses by the kernel assumes
+an address tag of 0x00.
+
+This includes, but is not limited to, addresses found in:
+
+ - pointer arguments to system calls, including pointers in structures
+   passed to system calls,
+
+ - the stack pointer (sp), e.g. when interpreting it to deliver a
+   signal,
+
+ - the frame pointer (x29) and frame records, e.g. when interpreting
+   them to generate a backtrace or call graph.
+
+Using non-zero address tags in any of these locations may result in an
+error code being returned, a (fatal) signal being raised, or other modes
+of failure.
+
+For these reasons, passing non-zero address tags to the kernel via
+system calls is forbidden, and using a non-zero address tag for sp is
+strongly discouraged.
+
+Programs maintaining a frame pointer and frame records that use non-zero
+address tags may suffer impaired or inaccurate debug and profiling
+visibility.
+
+
+Preserving tags
+---------------
+
+Non-zero tags are not preserved when delivering signals. This means that
+signal handlers in applications making use of tags cannot rely on the
+tag information for user virtual addresses being maintained for fields
+inside siginfo_t. One exception to this rule is for signals raised in
+response to watchpoint debug exceptions, where the tag information will
+be preserved.
 
 The architecture prevents the use of a tagged PC, so the upper byte will
 be set to a sign-extension of bit 55 on exception return.
+
+
+Other considerations
+--------------------
+
+Special care should be taken when using tagged pointers, since it is
+likely that C compilers will not hazard two virtual addresses differing
+only in the upper byte.

From fee960bed5e857eb126c4e56dd9ff85938356579 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 3 May 2017 16:09:33 +0100
Subject: [PATCH 05/13] arm64: xchg: hazard against entire exchange variable

The inline assembly in __XCHG_CASE() uses a +Q constraint to hazard
against other accesses to the memory location being exchanged. However,
the pointer passed to the constraint is a u8 pointer, and thus the
hazard only applies to the first byte of the location.

GCC can take advantage of this, assuming that other portions of the
location are unchanged, as demonstrated with the following test case:

union u {
	unsigned long l;
	unsigned int i[2];
};

unsigned long update_char_hazard(union u *u)
{
	unsigned int a, b;

	a = u->i[1];
	asm ("str %1, %0" : "+Q" (*(char *)&u->l) : "r" (0UL));
	b = u->i[1];

	return a ^ b;
}

unsigned long update_long_hazard(union u *u)
{
	unsigned int a, b;

	a = u->i[1];
	asm ("str %1, %0" : "+Q" (*(long *)&u->l) : "r" (0UL));
	b = u->i[1];

	return a ^ b;
}

The linaro 15.08 GCC 5.1.1 toolchain compiles the above as follows when
using -O2 or above:

0000000000000000 <update_char_hazard>:
   0:	d2800001 	mov	x1, #0x0                   	// #0
   4:	f9000001 	str	x1, [x0]
   8:	d2800000 	mov	x0, #0x0                   	// #0
   c:	d65f03c0 	ret

0000000000000010 <update_long_hazard>:
  10:	b9400401 	ldr	w1, [x0,#4]
  14:	d2800002 	mov	x2, #0x0                   	// #0
  18:	f9000002 	str	x2, [x0]
  1c:	b9400400 	ldr	w0, [x0,#4]
  20:	4a000020 	eor	w0, w1, w0
  24:	d65f03c0 	ret

This patch fixes the issue by passing an unsigned long pointer into the
+Q constraint, as we do for our cmpxchg code. This may hazard against
more than is necessary, but this is better than missing a necessary
hazard.

Fixes: 305d454aaa29 ("arm64: atomics: implement native {relaxed, acquire, release} atomics")
Cc: <stable@vger.kernel.org> # 4.4.x-
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cmpxchg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 91b26d26af8a5..ae852add053d8 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -46,7 +46,7 @@ static inline unsigned long __xchg_case_##name(unsigned long x,		\
 	"	swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n"	\
 		__nops(3)						\
 	"	" #nop_lse)						\
-	: "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr)			\
+	: "=&r" (ret), "=&r" (tmp), "+Q" (*(unsigned long *)ptr)	\
 	: "r" (x)							\
 	: cl);								\
 									\

From 994870bead4ab19087a79492400a5478e2906196 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 3 May 2017 16:09:34 +0100
Subject: [PATCH 06/13] arm64: ensure extension of smp_store_release value

When an inline assembly operand's type is narrower than the register it
is allocated to, the least significant bits of the register (up to the
operand type's width) are valid, and any other bits are permitted to
contain any arbitrary value. This aligns with the AAPCS64 parameter
passing rules.

Our __smp_store_release() implementation does not account for this, and
implicitly assumes that operands have been zero-extended to the width of
the type being stored to. Thus, we may store unknown values to memory
when the value type is narrower than the pointer type (e.g. when storing
a char to a long).

This patch fixes the issue by casting the value operand to the same
width as the pointer operand in all cases, which ensures that the value
is zero-extended as we expect. We use the same union trickery as
__smp_load_acquire and {READ,WRITE}_ONCE() to avoid GCC complaining that
pointers are potentially cast to narrower width integers in unreachable
paths.

A whitespace issue at the top of __smp_store_release() is also
corrected.

No changes are necessary for __smp_load_acquire(). Load instructions
implicitly clear any upper bits of the register, and the compiler will
only consider the least significant bits of the register as valid
regardless.

Fixes: 47933ad41a86 ("arch: Introduce smp_load_acquire(), smp_store_release()")
Fixes: 878a84d5a8a1 ("arm64: add missing data types in smp_load_acquire/smp_store_release")
Cc: <stable@vger.kernel.org> # 3.14.x-
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/barrier.h | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 4e0497f581a05..0fe7e43b7fbc2 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -42,25 +42,35 @@
 #define __smp_rmb()	dmb(ishld)
 #define __smp_wmb()	dmb(ishst)
 
-#define __smp_store_release(p, v)						\
+#define __smp_store_release(p, v)					\
 do {									\
+	union { typeof(*p) __val; char __c[1]; } __u =			\
+		{ .__val = (__force typeof(*p)) (v) }; 			\
 	compiletime_assert_atomic_type(*p);				\
 	switch (sizeof(*p)) {						\
 	case 1:								\
 		asm volatile ("stlrb %w1, %0"				\
-				: "=Q" (*p) : "r" (v) : "memory");	\
+				: "=Q" (*p)				\
+				: "r" (*(__u8 *)__u.__c)		\
+				: "memory");				\
 		break;							\
 	case 2:								\
 		asm volatile ("stlrh %w1, %0"				\
-				: "=Q" (*p) : "r" (v) : "memory");	\
+				: "=Q" (*p)				\
+				: "r" (*(__u16 *)__u.__c)		\
+				: "memory");				\
 		break;							\
 	case 4:								\
 		asm volatile ("stlr %w1, %0"				\
-				: "=Q" (*p) : "r" (v) : "memory");	\
+				: "=Q" (*p)				\
+				: "r" (*(__u32 *)__u.__c)		\
+				: "memory");				\
 		break;							\
 	case 8:								\
 		asm volatile ("stlr %1, %0"				\
-				: "=Q" (*p) : "r" (v) : "memory");	\
+				: "=Q" (*p)				\
+				: "r" (*(__u64 *)__u.__c)		\
+				: "memory");				\
 		break;							\
 	}								\
 } while (0)

From a06040d7a791a9177581dcf7293941bd92400856 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 3 May 2017 16:09:35 +0100
Subject: [PATCH 07/13] arm64: uaccess: ensure extension of access_ok() addr

Our access_ok() simply hands its arguments over to __range_ok(), which
implicitly assummes that the addr parameter is 64 bits wide. This isn't
necessarily true for compat code, which might pass down a 32-bit address
parameter.

In these cases, we don't have a guarantee that the address has been zero
extended to 64 bits, and the upper bits of the register may contain
unknown values, potentially resulting in a suprious failure.

Avoid this by explicitly casting the addr parameter to an unsigned long
(as is done on other architectures), ensuring that the parameter is
widened appropriately.

Fixes: 0aea86a2176c ("arm64: User access library functions")
Cc: <stable@vger.kernel.org> # 3.7.x-
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/uaccess.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 0221029e27ff7..e6540471dcda7 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -95,11 +95,12 @@ static inline void set_fs(mm_segment_t fs)
  */
 #define __range_ok(addr, size)						\
 ({									\
+	unsigned long __addr = (unsigned long __force)(addr);		\
 	unsigned long flag, roksum;					\
 	__chk_user_ptr(addr);						\
 	asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, ls"		\
 		: "=&r" (flag), "=&r" (roksum)				\
-		: "1" (addr), "Ir" (size),				\
+		: "1" (__addr), "Ir" (size),				\
 		  "r" (current_thread_info()->addr_limit)		\
 		: "cc");						\
 	flag;								\

From 55de49f9aa17b0b2b144dd2af587177b9aadf429 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 3 May 2017 16:09:36 +0100
Subject: [PATCH 08/13] arm64: armv8_deprecated: ensure extension of addr

Our compat swp emulation holds the compat user address in an unsigned
int, which it passes to __user_swpX_asm(). When a 32-bit value is passed
in a register, the upper 32 bits of the register are unknown, and we
must extend the value to 64 bits before we can use it as a base address.

This patch casts the address to unsigned long to ensure it has been
suitably extended, avoiding the potential issue, and silencing a related
warning from clang.

Fixes: bd35a4adc413 ("arm64: Port SWP/SWPB emulation support from arm")
Cc: <stable@vger.kernel.org> # 3.19.x-
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/armv8_deprecated.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index 657977e77ec8f..f0e6d717885b1 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -306,7 +306,8 @@ do {								\
 	_ASM_EXTABLE(0b, 4b)					\
 	_ASM_EXTABLE(1b, 4b)					\
 	: "=&r" (res), "+r" (data), "=&r" (temp), "=&r" (temp2)	\
-	: "r" (addr), "i" (-EAGAIN), "i" (-EFAULT),		\
+	: "r" ((unsigned long)addr), "i" (-EAGAIN),		\
+	  "i" (-EFAULT),					\
 	  "i" (__SWP_LL_SC_LOOPS)				\
 	: "memory");						\
 	uaccess_disable();					\

From 8997c93452d16aac11d3b0cc53940c94330273a4 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 3 May 2017 16:09:37 +0100
Subject: [PATCH 09/13] arm64: atomic_lse: match asm register sizes

The LSE atomic code uses asm register variables to ensure that
parameters are allocated in specific registers. In the majority of cases
we specifically ask for an x register when using 64-bit values, but in a
couple of cases we use a w regsiter for a 64-bit value.

For asm register variables, the compiler only cares about the register
index, with wN and xN having the same meaning. The compiler determines
the register size to use based on the type of the variable. Thus, this
inconsistency is merely confusing, and not harmful to code generation.

For consistency, this patch updates those cases to use the x register
alias. There should be no functional change as a result of this patch.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/atomic_lse.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 7457ce082b5ff..99fa69c9c3cf3 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -322,7 +322,7 @@ static inline void atomic64_and(long i, atomic64_t *v)
 #define ATOMIC64_FETCH_OP_AND(name, mb, cl...)				\
 static inline long atomic64_fetch_and##name(long i, atomic64_t *v)	\
 {									\
-	register long x0 asm ("w0") = i;				\
+	register long x0 asm ("x0") = i;				\
 	register atomic64_t *x1 asm ("x1") = v;				\
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
@@ -394,7 +394,7 @@ ATOMIC64_OP_SUB_RETURN(        , al, "memory")
 #define ATOMIC64_FETCH_OP_SUB(name, mb, cl...)				\
 static inline long atomic64_fetch_sub##name(long i, atomic64_t *v)	\
 {									\
-	register long x0 asm ("w0") = i;				\
+	register long x0 asm ("x0") = i;				\
 	register atomic64_t *x1 asm ("x1") = v;				\
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\

From d135b8b5060ea91dd751ff172d179eb4eab1e966 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 3 May 2017 16:09:38 +0100
Subject: [PATCH 10/13] arm64: uaccess: suppress spurious clang warning

Clang tries to warn when there's a mismatch between an operand's size,
and the size of the register it is held in, as this may indicate a bug.
Specifically, clang warns when the operand's type is less than 64 bits
wide, and the register is used unqualified (i.e. %N rather than %xN or
%wN).

Unfortunately clang can generate these warnings for unreachable code.
For example, for code like:

do {                                            \
        typeof(*(ptr)) __v = (v);               \
        switch(sizeof(*(ptr))) {                \
        case 1:                                 \
                // assume __v is 1 byte wide    \
                asm ("{op}b %w0" : : "r" (v));  \
                break;                          \
        case 8:                                 \
                // assume __v is 8 bytes wide   \
                asm ("{op} %0" : : "r" (v));    \
                break;                          \
        }
while (0)

... if op() were passed a char value and pointer to char, clang may
produce a warning for the unreachable case where sizeof(*(ptr)) is 8.

For the same reasons, clang produces warnings when __put_user_err() is
used for types that are less than 64 bits wide.

We could avoid this with a cast to a fixed-width type in each of the
cases. However, GCC will then warn that pointer types are being cast to
mismatched integer sizes (in unreachable paths).

Another option would be to use the same union trickery as we do for
__smp_store_release() and __smp_load_acquire(), but this is fairly
invasive.

Instead, this patch suppresses the clang warning by using an x modifier
in the assembly for the 8 byte case of __put_user_err(). No additional
work is necessary as the value has been cast to typeof(*(ptr)), so the
compiler will have performed any necessary extension for the reachable
case.

For consistency, __get_user_err() is also updated to use the x modifier
for its 8 byte case.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reported-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/uaccess.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index e6540471dcda7..90b5755c5003c 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -257,7 +257,7 @@ do {									\
 			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	case 8:								\
-		__get_user_asm("ldr", "ldtr", "%",  __gu_val, (ptr),	\
+		__get_user_asm("ldr", "ldtr", "%x",  __gu_val, (ptr),	\
 			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	default:							\
@@ -324,7 +324,7 @@ do {									\
 			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	case 8:								\
-		__put_user_asm("str", "sttr", "%", __pu_val, (ptr),	\
+		__put_user_asm("str", "sttr", "%x", __pu_val, (ptr),	\
 			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	default:							\

From 03497d761c55438144fd63534d4223418fdfd345 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 27 Apr 2017 11:19:00 -0700
Subject: [PATCH 11/13] mm: Silence vmap() allocation failures based on caller
 gfp_flags

If the caller has set __GFP_NOWARN don't print the following message:
vmap allocation for size 15736832 failed: use vmalloc=<size> to increase
size.

This can happen with the ARM/Linux or ARM64/Linux module loader built
with CONFIG_ARM{,64}_MODULE_PLTS=y which does a first attempt at loading
a large module from module space, then falls back to vmalloc space.

Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 mm/vmalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 0b057628a7ba5..b74f1d01ef76f 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -521,7 +521,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
 		}
 	}
 
-	if (printk_ratelimit())
+	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
 		pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n",
 			size);
 	kfree(va);

From 75d24d968af8913f641c612930c96acc5399e427 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 27 Apr 2017 11:19:01 -0700
Subject: [PATCH 12/13] ARM: Silence first allocation with
 CONFIG_ARM_MODULE_PLTS=y

When CONFIG_ARM_MODULE_PLTS is enabled, the first allocation using the
module space fails, because the module is too big, and then the module
allocation is attempted from vmalloc space. Silence the first allocation
failure in that case by setting __GFP_NOWARN.

Acked-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/kernel/module.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index 80254b47dc342..3ff571c2c71ce 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -40,8 +40,15 @@
 #ifdef CONFIG_MMU
 void *module_alloc(unsigned long size)
 {
-	void *p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-				GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+	gfp_t gfp_mask = GFP_KERNEL;
+	void *p;
+
+	/* Silence the initial allocation */
+	if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS))
+		gfp_mask |= __GFP_NOWARN;
+
+	p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+				gfp_mask, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
 				__builtin_return_address(0));
 	if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p)
 		return p;

From 0c2cf6d9487cb90be6ad7fac66044dfa8e8e5243 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 27 Apr 2017 11:19:02 -0700
Subject: [PATCH 13/13] arm64: Silence first allocation with
 CONFIG_ARM64_MODULE_PLTS=y

When CONFIG_ARM64_MODULE_PLTS is enabled, the first allocation using the
module space fails, because the module is too big, and then the module
allocation is attempted from vmalloc space. Silence the first allocation
failure in that case by setting __GFP_NOWARN.

Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/module.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index c9a2ab446dc6f..f035ff6fb223f 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -32,11 +32,16 @@
 
 void *module_alloc(unsigned long size)
 {
+	gfp_t gfp_mask = GFP_KERNEL;
 	void *p;
 
+	/* Silence the initial allocation */
+	if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
+		gfp_mask |= __GFP_NOWARN;
+
 	p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
 				module_alloc_base + MODULES_VSIZE,
-				GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
+				gfp_mask, PAGE_KERNEL_EXEC, 0,
 				NUMA_NO_NODE, __builtin_return_address(0));
 
 	if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&