From 4ae73f2d53255c388d50bf83c1681112a6f9cba1 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 26 May 2012 10:14:39 -0700
Subject: [PATCH 1/5] x86: use generic strncpy_from_user routine

The generic strncpy_from_user() is not really optimal, since it is
designed to work on both little-endian and big-endian.  And on
little-endian you can simplify much of the logic to find the first zero
byte, since little-endian arithmetic doesn't have to worry about the
carry bit propagating into earlier bytes (only later bytes, which we
don't care about).

But I have patches to make the generic routines use the architecture-
specific <asm/word-at-a-time.h> infrastructure, so that we can regain
the little-endian optimizations.  But before we do that, switch over to
the generic routines to make the patches each do just one well-defined
thing.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/Kconfig               |  1 +
 arch/x86/include/asm/uaccess.h |  1 +
 arch/x86/lib/usercopy.c        | 97 ----------------------------------
 3 files changed, 2 insertions(+), 97 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 81c3e8be789a1..3220d44e24d01 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -93,6 +93,7 @@ config X86
 	select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
 	select GENERIC_TIME_VSYSCALL if X86_64
 	select KTIME_SCALAR if X86_32
+	select GENERIC_STRNCPY_FROM_USER
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS || UPROBES)
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 851fe0dc13bc1..1354facd8f636 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -32,6 +32,7 @@
 
 #define segment_eq(a, b)	((a).seg == (b).seg)
 
+#define user_addr_max() (current_thread_info()->addr_limit.seg)
 #define __addr_ok(addr)					\
 	((unsigned long __force)(addr) <		\
 	 (current_thread_info()->addr_limit.seg))
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index 2e4e4b02c37a6..f61ee67ec00f0 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -43,100 +43,3 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
 	return len;
 }
 EXPORT_SYMBOL_GPL(copy_from_user_nmi);
-
-/*
- * Do a strncpy, return length of string without final '\0'.
- * 'count' is the user-supplied count (return 'count' if we
- * hit it), 'max' is the address space maximum (and we return
- * -EFAULT if we hit it).
- */
-static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, unsigned long max)
-{
-	long res = 0;
-
-	/*
-	 * Truncate 'max' to the user-specified limit, so that
-	 * we only have one limit we need to check in the loop
-	 */
-	if (max > count)
-		max = count;
-
-	while (max >= sizeof(unsigned long)) {
-		unsigned long c, mask;
-
-		/* Fall back to byte-at-a-time if we get a page fault */
-		if (unlikely(__get_user(c,(unsigned long __user *)(src+res))))
-			break;
-		mask = has_zero(c);
-		if (mask) {
-			mask = (mask - 1) & ~mask;
-			mask >>= 7;
-			*(unsigned long *)(dst+res) = c & mask;
-			return res + count_masked_bytes(mask);
-		}
-		*(unsigned long *)(dst+res) = c;
-		res += sizeof(unsigned long);
-		max -= sizeof(unsigned long);
-	}
-
-	while (max) {
-		char c;
-
-		if (unlikely(__get_user(c,src+res)))
-			return -EFAULT;
-		dst[res] = c;
-		if (!c)
-			return res;
-		res++;
-		max--;
-	}
-
-	/*
-	 * Uhhuh. We hit 'max'. But was that the user-specified maximum
-	 * too? If so, that's ok - we got as much as the user asked for.
-	 */
-	if (res >= count)
-		return res;
-
-	/*
-	 * Nope: we hit the address space limit, and we still had more
-	 * characters the caller would have wanted. That's an EFAULT.
-	 */
-	return -EFAULT;
-}
-
-/**
- * strncpy_from_user: - Copy a NUL terminated string from userspace.
- * @dst:   Destination address, in kernel space.  This buffer must be at
- *         least @count bytes long.
- * @src:   Source address, in user space.
- * @count: Maximum number of bytes to copy, including the trailing NUL.
- *
- * Copies a NUL-terminated string from userspace to kernel space.
- *
- * On success, returns the length of the string (not including the trailing
- * NUL).
- *
- * If access to userspace fails, returns -EFAULT (some data may have been
- * copied).
- *
- * If @count is smaller than the length of the string, copies @count bytes
- * and returns @count.
- */
-long
-strncpy_from_user(char *dst, const char __user *src, long count)
-{
-	unsigned long max_addr, src_addr;
-
-	if (unlikely(count <= 0))
-		return 0;
-
-	max_addr = current_thread_info()->addr_limit.seg;
-	src_addr = (unsigned long)src;
-	if (likely(src_addr < max_addr)) {
-		unsigned long max = max_addr - src_addr;
-		return do_strncpy_from_user(dst, src, count, max);
-	}
-	return -EFAULT;
-}
-EXPORT_SYMBOL(strncpy_from_user);

From 36126f8f2ed8168eb13aa0662b9b9585cba100a9 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 26 May 2012 10:43:17 -0700
Subject: [PATCH 2/5] word-at-a-time: make the interfaces truly generic

This changes the interfaces in <asm/word-at-a-time.h> to be a bit more
complicated, but a lot more generic.

In particular, it allows us to really do the operations efficiently on
both little-endian and big-endian machines, pretty much regardless of
machine details.  For example, if you can rely on a fast population
count instruction on your architecture, this will allow you to make your
optimized <asm/word-at-a-time.h> file with that.

NOTE! The "generic" version in include/asm-generic/word-at-a-time.h is
not truly generic, it actually only works on big-endian.  Why? Because
on little-endian the generic algorithms are wasteful, since you can
inevitably do better. The x86 implementation is an example of that.

(The only truly non-generic part of the asm-generic implementation is
the "find_zero()" function, and you could make a little-endian version
of it.  And if the Kbuild infrastructure allowed us to pick a particular
header file, that would be lovely)

The <asm/word-at-a-time.h> functions are as follows:

 - WORD_AT_A_TIME_CONSTANTS: specific constants that the algorithm
   uses.

 - has_zero(): take a word, and determine if it has a zero byte in it.
   It gets the word, the pointer to the constant pool, and a pointer to
   an intermediate "data" field it can set.

   This is the "quick-and-dirty" zero tester: it's what is run inside
   the hot loops.

 - "prep_zero_mask()": take the word, the data that has_zero() produced,
   and the constant pool, and generate an *exact* mask of which byte had
   the first zero.  This is run directly *outside* the loop, and allows
   the "has_zero()" function to answer the "is there a zero byte"
   question without necessarily getting exactly *which* byte is the
   first one to contain a zero.

   If you do multiple byte lookups concurrently (eg "hash_name()", which
   looks for both NUL and '/' bytes), after you've done the prep_zero_mask()
   phase, the result of those can be or'ed together to get the "either
   or" case.

 - The result from "prep_zero_mask()" can then be fed into "find_zero()"
   (to find the byte offset of the first byte that was zero) or into
   "zero_bytemask()" (to find the bytemask of the bytes preceding the
   zero byte).

   The existence of zero_bytemask() is optional, and is not necessary
   for the normal string routines.  But dentry name hashing needs it, so
   if you enable DENTRY_WORD_AT_A_TIME you need to expose it.

This changes the generic strncpy_from_user() function and the dentry
hashing functions to use these modified word-at-a-time interfaces.  This
gets us back to the optimized state of the x86 strncpy that we lost in
the previous commit when moving over to the generic version.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/openrisc/include/asm/Kbuild      |  1 +
 arch/sparc/include/asm/Kbuild         |  1 +
 arch/x86/include/asm/word-at-a-time.h | 32 +++++++++++++++--
 fs/namei.c                            | 22 ++++++------
 include/asm-generic/word-at-a-time.h  | 52 +++++++++++++++++++++++++++
 lib/strncpy_from_user.c               | 47 ++++--------------------
 6 files changed, 102 insertions(+), 53 deletions(-)
 create mode 100644 include/asm-generic/word-at-a-time.h

diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index c936483bc8e2a..3f35c38d7b649 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -66,3 +66,4 @@ generic-y += topology.h
 generic-y += types.h
 generic-y += ucontext.h
 generic-y += user.h
+generic-y += word-at-a-time.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 2c2e38821f608..67f83e0a0d68d 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -21,3 +21,4 @@ generic-y += div64.h
 generic-y += local64.h
 generic-y += irq_regs.h
 generic-y += local.h
+generic-y += word-at-a-time.h
diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h
index ae03facfadd6b..5b238981542a2 100644
--- a/arch/x86/include/asm/word-at-a-time.h
+++ b/arch/x86/include/asm/word-at-a-time.h
@@ -10,6 +10,11 @@
  * bit count instruction, that might be better than the multiply
  * and shift, for example.
  */
+struct word_at_a_time {
+	const unsigned long one_bits, high_bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
 
 #ifdef CONFIG_64BIT
 
@@ -37,10 +42,31 @@ static inline long count_masked_bytes(long mask)
 
 #endif
 
-/* Return the high bit set in the first byte that is a zero */
-static inline unsigned long has_zero(unsigned long a)
+/* Return nonzero if it has a zero */
+static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
+{
+	unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
+	*bits = mask;
+	return mask;
+}
+
+static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
+{
+	return bits;
+}
+
+static inline unsigned long create_zero_mask(unsigned long bits)
+{
+	bits = (bits - 1) & ~bits;
+	return bits >> 7;
+}
+
+/* The mask we created is directly usable as a bytemask */
+#define zero_bytemask(mask) (mask)
+
+static inline unsigned long find_zero(unsigned long mask)
 {
-	return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80);
+	return count_masked_bytes(mask);
 }
 
 /*
diff --git a/fs/namei.c b/fs/namei.c
index 93ff12b1a1de0..c651f02c9fecb 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1452,7 +1452,8 @@ EXPORT_SYMBOL(full_name_hash);
  */
 static inline unsigned long hash_name(const char *name, unsigned int *hashp)
 {
-	unsigned long a, mask, hash, len;
+	unsigned long a, b, adata, bdata, mask, hash, len;
+	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
 
 	hash = a = 0;
 	len = -sizeof(unsigned long);
@@ -1460,17 +1461,18 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp)
 		hash = (hash + a) * 9;
 		len += sizeof(unsigned long);
 		a = load_unaligned_zeropad(name+len);
-		/* Do we have any NUL or '/' bytes in this word? */
-		mask = has_zero(a) | has_zero(a ^ REPEAT_BYTE('/'));
-	} while (!mask);
-
-	/* The mask *below* the first high bit set */
-	mask = (mask - 1) & ~mask;
-	mask >>= 7;
-	hash += a & mask;
+		b = a ^ REPEAT_BYTE('/');
+	} while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants)));
+
+	adata = prep_zero_mask(a, adata, &constants);
+	bdata = prep_zero_mask(b, bdata, &constants);
+
+	mask = create_zero_mask(adata | bdata);
+
+	hash += a & zero_bytemask(mask);
 	*hashp = fold_hash(hash);
 
-	return len + count_masked_bytes(mask);
+	return len + find_zero(mask);
 }
 
 #else
diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h
new file mode 100644
index 0000000000000..3f21f1b72e45d
--- /dev/null
+++ b/include/asm-generic/word-at-a-time.h
@@ -0,0 +1,52 @@
+#ifndef _ASM_WORD_AT_A_TIME_H
+#define _ASM_WORD_AT_A_TIME_H
+
+/*
+ * This says "generic", but it's actually big-endian only.
+ * Little-endian can use more efficient versions of these
+ * interfaces, see for example
+ *	 arch/x86/include/asm/word-at-a-time.h
+ * for those.
+ */
+
+#include <linux/kernel.h>
+
+struct word_at_a_time {
+	const unsigned long high_bits, low_bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0xfe) + 1, REPEAT_BYTE(0x7f) }
+
+/* Bit set in the bytes that have a zero */
+static inline long prep_zero_mask(unsigned long val, unsigned long rhs, const struct word_at_a_time *c)
+{
+	unsigned long mask = (val & c->low_bits) + c->low_bits;
+	return ~(mask | rhs);
+}
+
+#define create_zero_mask(mask) (mask)
+
+static inline long find_zero(unsigned long mask)
+{
+	long byte = 0;
+#ifdef CONFIG_64BIT
+	if (mask >> 32)
+		mask >>= 32;
+	else
+		byte = 4;
+#endif
+	if (mask >> 16)
+		mask >>= 16;
+	else
+		byte += 2;
+	return (mask >> 8) ? byte : byte + 1;
+}
+
+static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+{
+	unsigned long rhs = val | c->low_bits;
+	*data = rhs;
+	return (val + c->high_bits) & ~rhs;
+}
+
+#endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index c4c09b0e96bac..bb2b201d6ad03 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -4,37 +4,7 @@
 #include <linux/errno.h>
 
 #include <asm/byteorder.h>
-
-static inline long find_zero(unsigned long mask)
-{
-	long byte = 0;
-
-#ifdef __BIG_ENDIAN
-#ifdef CONFIG_64BIT
-	if (mask >> 32)
-		mask >>= 32;
-	else
-		byte = 4;
-#endif
-	if (mask >> 16)
-		mask >>= 16;
-	else
-		byte += 2;
-	return (mask >> 8) ? byte : byte + 1;
-#else
-#ifdef CONFIG_64BIT
-	if (!((unsigned int) mask)) {
-		mask >>= 32;
-		byte = 4;
-	}
-#endif
-	if (!(mask & 0xffff)) {
-		mask >>= 16;
-		byte += 2;
-	}
-	return (mask & 0xff) ? byte : byte + 1;
-#endif
-}
+#include <asm/word-at-a-time.h>
 
 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 #define IS_UNALIGNED(src, dst)	0
@@ -51,8 +21,7 @@ static inline long find_zero(unsigned long mask)
  */
 static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, unsigned long max)
 {
-	const unsigned long high_bits = REPEAT_BYTE(0xfe) + 1;
-	const unsigned long low_bits = REPEAT_BYTE(0x7f);
+	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
 	long res = 0;
 
 	/*
@@ -66,18 +35,16 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src, long
 		goto byte_at_a_time;
 
 	while (max >= sizeof(unsigned long)) {
-		unsigned long c, v, rhs;
+		unsigned long c, data;
 
 		/* Fall back to byte-at-a-time if we get a page fault */
 		if (unlikely(__get_user(c,(unsigned long __user *)(src+res))))
 			break;
-		rhs = c | low_bits;
-		v = (c + high_bits) & ~rhs;
 		*(unsigned long *)(dst+res) = c;
-		if (v) {
-			v = (c & low_bits) + low_bits;
-			v = ~(v | rhs);
-			return res + find_zero(v);
+		if (has_zero(c, &data, &constants)) {
+			data = prep_zero_mask(c, data, &constants);
+			data = create_zero_mask(data);
+			return res + find_zero(data);
 		}
 		res += sizeof(unsigned long);
 		max -= sizeof(unsigned long);

From a08c5356a3aaf638c41897ae4169de18db89595e Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 26 May 2012 11:06:38 -0700
Subject: [PATCH 3/5] lib: add generic strnlen_user() function

This adds a new generic optimized strnlen_user() function that uses the
<asm/word-at-a-time.h> infrastructure to portably do efficient string
handling.

In many ways, strnlen is much simpler than strncpy, and in particular we
can always pre-align the words we load from memory.  That means that all
the worries about alignment etc are a non-issue, so this one can easily
be used on any architecture.  You obviously do have to do the
appropriate word-at-a-time.h macros.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig        |   3 +
 lib/Makefile       |   1 +
 lib/strnlen_user.c | 138 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 142 insertions(+)
 create mode 100644 lib/strnlen_user.c

diff --git a/lib/Kconfig b/lib/Kconfig
index 98230ac3db293..64ddc44d0b81e 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -19,6 +19,9 @@ config RATIONAL
 config GENERIC_STRNCPY_FROM_USER
 	bool
 
+config GENERIC_STRNLEN_USER
+	bool
+
 config GENERIC_FIND_FIRST_BIT
 	bool
 
diff --git a/lib/Makefile b/lib/Makefile
index b98df505f335d..77937a7dd5ce3 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -126,6 +126,7 @@ obj-$(CONFIG_CLZ_TAB) += clz_tab.o
 obj-$(CONFIG_DDR) += jedec_ddr_data.o
 
 obj-$(CONFIG_GENERIC_STRNCPY_FROM_USER) += strncpy_from_user.o
+obj-$(CONFIG_GENERIC_STRNLEN_USER) += strnlen_user.o
 
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c
new file mode 100644
index 0000000000000..90900ecfeb542
--- /dev/null
+++ b/lib/strnlen_user.c
@@ -0,0 +1,138 @@
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/uaccess.h>
+
+#include <asm/word-at-a-time.h>
+
+/* Set bits in the first 'n' bytes when loaded from memory */
+#ifdef __LITTLE_ENDIAN
+#  define aligned_byte_mask(n) ((1ul << 8*(n))-1)
+#else
+#  define aligned_byte_mask(n) (~0xfful << 8*(7-(n)))
+#endif
+
+/*
+ * Do a strnlen, return length of string *with* final '\0'.
+ * 'count' is the user-supplied count, while 'max' is the
+ * address space maximum.
+ *
+ * Return 0 for exceptions (which includes hitting the address
+ * space maximum), or 'count+1' if hitting the user-supplied
+ * maximum count.
+ *
+ * NOTE! We can sometimes overshoot the user-supplied maximum
+ * if it fits in a aligned 'long'. The caller needs to check
+ * the return value against "> max".
+ */
+static inline long do_strnlen_user(const char __user *src, unsigned long count, unsigned long max)
+{
+	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
+	long align, res = 0;
+	unsigned long c;
+
+	/*
+	 * Truncate 'max' to the user-specified limit, so that
+	 * we only have one limit we need to check in the loop
+	 */
+	if (max > count)
+		max = count;
+
+	/*
+	 * Do everything aligned. But that means that we
+	 * need to also expand the maximum..
+	 */
+	align = (sizeof(long) - 1) & (unsigned long)src;
+	src -= align;
+	max += align;
+
+	if (unlikely(__get_user(c,(unsigned long __user *)src)))
+		return 0;
+	c |= aligned_byte_mask(align);
+
+	for (;;) {
+		unsigned long data;
+		if (has_zero(c, &data, &constants)) {
+			data = prep_zero_mask(c, data, &constants);
+			data = create_zero_mask(data);
+			return res + find_zero(data) + 1 - align;
+		}
+		res += sizeof(unsigned long);
+		if (unlikely(max < sizeof(unsigned long)))
+			break;
+		max -= sizeof(unsigned long);
+		if (unlikely(__get_user(c,(unsigned long __user *)(src+res))))
+			return 0;
+	}
+	res -= align;
+
+	/*
+	 * Uhhuh. We hit 'max'. But was that the user-specified maximum
+	 * too? If so, return the marker for "too long".
+	 */
+	if (res >= count)
+		return count+1;
+
+	/*
+	 * Nope: we hit the address space limit, and we still had more
+	 * characters the caller would have wanted. That's 0.
+	 */
+	return 0;
+}
+
+/**
+ * strnlen_user: - Get the size of a user string INCLUDING final NUL.
+ * @str: The string to measure.
+ * @count: Maximum count (including NUL character)
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Get the size of a NUL-terminated string in user space.
+ *
+ * Returns the size of the string INCLUDING the terminating NUL.
+ * If the string is too long, returns 'count+1'.
+ * On exception (or invalid count), returns 0.
+ */
+long strnlen_user(const char __user *str, long count)
+{
+	unsigned long max_addr, src_addr;
+
+	if (unlikely(count <= 0))
+		return 0;
+
+	max_addr = user_addr_max();
+	src_addr = (unsigned long)str;
+	if (likely(src_addr < max_addr)) {
+		unsigned long max = max_addr - src_addr;
+		return do_strnlen_user(str, count, max);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(strnlen_user);
+
+/**
+ * strlen_user: - Get the size of a user string INCLUDING final NUL.
+ * @str: The string to measure.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Get the size of a NUL-terminated string in user space.
+ *
+ * Returns the size of the string INCLUDING the terminating NUL.
+ * On exception, returns 0.
+ *
+ * If there is a limit on the length of a valid string, you may wish to
+ * consider using strnlen_user() instead.
+ */
+long strlen_user(const char __user *str)
+{
+	unsigned long max_addr, src_addr;
+
+	max_addr = user_addr_max();
+	src_addr = (unsigned long)str;
+	if (likely(src_addr < max_addr)) {
+		unsigned long max = max_addr - src_addr;
+		return do_strnlen_user(str, ~0ul, max);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(strlen_user);

From 5723aa993d83803157c22327e90cd59e3dcbe879 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 26 May 2012 11:09:53 -0700
Subject: [PATCH 4/5] x86: use the new generic strnlen_user() function

This throws away the old x86-specific functions in favor of the generic
optimized version.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/Kconfig                  |  1 +
 arch/x86/include/asm/uaccess.h    |  3 ++
 arch/x86/include/asm/uaccess_32.h | 17 -----------
 arch/x86/include/asm/uaccess_64.h |  3 --
 arch/x86/lib/usercopy_32.c        | 41 --------------------------
 arch/x86/lib/usercopy_64.c        | 48 -------------------------------
 6 files changed, 4 insertions(+), 109 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3220d44e24d01..d700811785ea1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -94,6 +94,7 @@ config X86
 	select GENERIC_TIME_VSYSCALL if X86_64
 	select KTIME_SCALAR if X86_32
 	select GENERIC_STRNCPY_FROM_USER
+	select GENERIC_STRNLEN_USER
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS || UPROBES)
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 1354facd8f636..04cd6882308e5 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -566,6 +566,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n);
 extern __must_check long
 strncpy_from_user(char *dst, const char __user *src, long count);
 
+extern __must_check long strlen_user(const char __user *str);
+extern __must_check long strnlen_user(const char __user *str, long n);
+
 /*
  * movsl can be slow when source and dest are not both 8-byte aligned
  */
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 8084bc73b18cb..576e39bca6ad1 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -213,23 +213,6 @@ static inline unsigned long __must_check copy_from_user(void *to,
 	return n;
 }
 
-/**
- * strlen_user: - Get the size of a string in user space.
- * @str: The string to measure.
- *
- * Context: User context only.  This function may sleep.
- *
- * Get the size of a NUL-terminated string in user space.
- *
- * Returns the size of the string INCLUDING the terminating NUL.
- * On exception, returns 0.
- *
- * If there is a limit on the length of a valid string, you may wish to
- * consider using strnlen_user() instead.
- */
-#define strlen_user(str) strnlen_user(str, LONG_MAX)
-
-long strnlen_user(const char __user *str, long n);
 unsigned long __must_check clear_user(void __user *mem, unsigned long len);
 unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
 
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index fcd4b6f3ef02f..8e796fbbf9c66 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -208,9 +208,6 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
 	}
 }
 
-__must_check long strnlen_user(const char __user *str, long n);
-__must_check long __strnlen_user(const char __user *str, long n);
-__must_check long strlen_user(const char __user *str);
 __must_check unsigned long clear_user(void __user *mem, unsigned long len);
 __must_check unsigned long __clear_user(void __user *mem, unsigned long len);
 
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 883b216c60b2d..1781b2f950e23 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -95,47 +95,6 @@ __clear_user(void __user *to, unsigned long n)
 }
 EXPORT_SYMBOL(__clear_user);
 
-/**
- * strnlen_user: - Get the size of a string in user space.
- * @s: The string to measure.
- * @n: The maximum valid length
- *
- * Get the size of a NUL-terminated string in user space.
- *
- * Returns the size of the string INCLUDING the terminating NUL.
- * On exception, returns 0.
- * If the string is too long, returns a value greater than @n.
- */
-long strnlen_user(const char __user *s, long n)
-{
-	unsigned long mask = -__addr_ok(s);
-	unsigned long res, tmp;
-
-	might_fault();
-
-	__asm__ __volatile__(
-		"	testl %0, %0\n"
-		"	jz 3f\n"
-		"	andl %0,%%ecx\n"
-		"0:	repne; scasb\n"
-		"	setne %%al\n"
-		"	subl %%ecx,%0\n"
-		"	addl %0,%%eax\n"
-		"1:\n"
-		".section .fixup,\"ax\"\n"
-		"2:	xorl %%eax,%%eax\n"
-		"	jmp 1b\n"
-		"3:	movb $1,%%al\n"
-		"	jmp 1b\n"
-		".previous\n"
-		_ASM_EXTABLE(0b,2b)
-		:"=&r" (n), "=&D" (s), "=&a" (res), "=&c" (tmp)
-		:"0" (n), "1" (s), "2" (0), "3" (mask)
-		:"cc");
-	return res & mask;
-}
-EXPORT_SYMBOL(strnlen_user);
-
 #ifdef CONFIG_X86_INTEL_USERCOPY
 static unsigned long
 __copy_user_intel(void __user *to, const void *from, unsigned long size)
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 0d0326f388c0b..e5b130bc2d0ef 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -52,54 +52,6 @@ unsigned long clear_user(void __user *to, unsigned long n)
 }
 EXPORT_SYMBOL(clear_user);
 
-/*
- * Return the size of a string (including the ending 0)
- *
- * Return 0 on exception, a value greater than N if too long
- */
-
-long __strnlen_user(const char __user *s, long n)
-{
-	long res = 0;
-	char c;
-
-	while (1) {
-		if (res>n)
-			return n+1;
-		if (__get_user(c, s))
-			return 0;
-		if (!c)
-			return res+1;
-		res++;
-		s++;
-	}
-}
-EXPORT_SYMBOL(__strnlen_user);
-
-long strnlen_user(const char __user *s, long n)
-{
-	if (!access_ok(VERIFY_READ, s, 1))
-		return 0;
-	return __strnlen_user(s, n);
-}
-EXPORT_SYMBOL(strnlen_user);
-
-long strlen_user(const char __user *s)
-{
-	long res = 0;
-	char c;
-
-	for (;;) {
-		if (get_user(c, s))
-			return 0;
-		if (!c)
-			return res+1;
-		res++;
-		s++;
-	}
-}
-EXPORT_SYMBOL(strlen_user);
-
 unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len)
 {
 	if (access_ok(VERIFY_WRITE, to, len) && access_ok(VERIFY_READ, from, len)) { 

From 2c66f623631709aa5f2e4c14c7e089682e7394a3 Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Sat, 26 May 2012 11:14:27 -0700
Subject: [PATCH 5/5] sparc: use the new generic strnlen_user() function

This throws away the sparc-specific functions in favor of the generic
optimized version.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sparc/Kconfig                  |   1 +
 arch/sparc/include/asm/uaccess_32.h |  22 +-----
 arch/sparc/include/asm/uaccess_64.h |   8 +-
 arch/sparc/lib/Makefile             |   1 -
 arch/sparc/lib/ksyms.c              |   2 -
 arch/sparc/lib/strlen_user_32.S     | 109 ----------------------------
 arch/sparc/lib/strlen_user_64.S     |  97 -------------------------
 7 files changed, 9 insertions(+), 231 deletions(-)
 delete mode 100644 arch/sparc/lib/strlen_user_32.S
 delete mode 100644 arch/sparc/lib/strlen_user_64.S

diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 15e9e05740da3..83bd051754e1f 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -35,6 +35,7 @@ config SPARC
 	select GENERIC_CMOS_UPDATE
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_STRNCPY_FROM_USER
+	select GENERIC_STRNLEN_USER
 
 config SPARC32
 	def_bool !64BIT
diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h
index 59586b57ef1a4..53a28dd59f595 100644
--- a/arch/sparc/include/asm/uaccess_32.h
+++ b/arch/sparc/include/asm/uaccess_32.h
@@ -16,6 +16,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/processor.h>
+
 #define ARCH_HAS_SORT_EXTABLE
 #define ARCH_HAS_SEARCH_EXTABLE
 
@@ -304,24 +306,8 @@ static inline unsigned long clear_user(void __user *addr, unsigned long n)
 		return n;
 }
 
-extern long __strlen_user(const char __user *);
-extern long __strnlen_user(const char __user *, long len);
-
-static inline long strlen_user(const char __user *str)
-{
-	if (!access_ok(VERIFY_READ, str, 0))
-		return 0;
-	else
-		return __strlen_user(str);
-}
-
-static inline long strnlen_user(const char __user *str, long len)
-{
-	if (!access_ok(VERIFY_READ, str, 0))
-		return 0;
-	else
-		return __strnlen_user(str, len);
-}
+extern __must_check long strlen_user(const char __user *str);
+extern __must_check long strnlen_user(const char __user *str, long n);
 
 #endif  /* __ASSEMBLY__ */
 
diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
index dcdfb89cbf3ff..7c831d848b4e1 100644
--- a/arch/sparc/include/asm/uaccess_64.h
+++ b/arch/sparc/include/asm/uaccess_64.h
@@ -17,6 +17,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/processor.h>
+
 /*
  * Sparc64 is segmented, though more like the M68K than the I386.
  * We use the secondary ASI to address user memory, which references a
@@ -257,11 +259,9 @@ extern unsigned long __must_check __clear_user(void __user *, unsigned long);
 
 #define clear_user __clear_user
 
-extern long __strlen_user(const char __user *);
-extern long __strnlen_user(const char __user *, long len);
+extern __must_check long strlen_user(const char __user *str);
+extern __must_check long strnlen_user(const char __user *str, long n);
 
-#define strlen_user __strlen_user
-#define strnlen_user __strnlen_user
 #define __copy_to_user_inatomic ___copy_to_user
 #define __copy_from_user_inatomic ___copy_from_user
 
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 943d98dc4cdb5..dff4096f3dec0 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -10,7 +10,6 @@ lib-y                 += strlen.o
 lib-y                 += checksum_$(BITS).o
 lib-$(CONFIG_SPARC32) += blockops.o
 lib-y                 += memscan_$(BITS).o memcmp.o strncmp_$(BITS).o
-lib-y                 += strlen_user_$(BITS).o
 lib-$(CONFIG_SPARC32) += divdi3.o udivdi3.o
 lib-$(CONFIG_SPARC32) += copy_user.o locks.o
 lib-$(CONFIG_SPARC64) += atomic_64.o
diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c
index 6b278abdb63de..3b31218cafc6c 100644
--- a/arch/sparc/lib/ksyms.c
+++ b/arch/sparc/lib/ksyms.c
@@ -15,8 +15,6 @@
 
 /* string functions */
 EXPORT_SYMBOL(strlen);
-EXPORT_SYMBOL(__strlen_user);
-EXPORT_SYMBOL(__strnlen_user);
 EXPORT_SYMBOL(strncmp);
 
 /* mem* functions */
diff --git a/arch/sparc/lib/strlen_user_32.S b/arch/sparc/lib/strlen_user_32.S
deleted file mode 100644
index 8c8a371df3c9c..0000000000000
--- a/arch/sparc/lib/strlen_user_32.S
+++ /dev/null
@@ -1,109 +0,0 @@
-/* strlen_user.S: Sparc optimized strlen_user code
- *
- * Return length of string in userspace including terminating 0
- * or 0 for error
- *
- * Copyright (C) 1991,1996 Free Software Foundation
- * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
- * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- */
-
-#define LO_MAGIC 0x01010101
-#define HI_MAGIC 0x80808080
-
-10:
-	ldub	[%o0], %o5
-	cmp	%o5, 0
-	be	1f
-	 add	%o0, 1, %o0
-	andcc	%o0, 3, %g0
-	be	4f
-	 or	%o4, %lo(HI_MAGIC), %o3
-11:
-	ldub	[%o0], %o5
-	cmp	%o5, 0
-	be	2f
-	 add	%o0, 1, %o0
-	andcc	%o0, 3, %g0
-	be	5f
-	 sethi	%hi(LO_MAGIC), %o4
-12:
-	ldub	[%o0], %o5
-	cmp	%o5, 0
-	be	3f
-	 add	%o0, 1, %o0
-	b	13f
-	 or	%o4, %lo(LO_MAGIC), %o2
-1:
-	retl
-	 mov	1, %o0
-2:
-	retl
-	 mov	2, %o0
-3:
-	retl
-	 mov	3, %o0
-
-	.align 4
-	.global __strlen_user, __strnlen_user
-__strlen_user:
-	sethi	%hi(32768), %o1
-__strnlen_user:
-	mov	%o1, %g1
-	mov	%o0, %o1
-	andcc	%o0, 3, %g0
-	bne	10b
-	 sethi	%hi(HI_MAGIC), %o4
-	or	%o4, %lo(HI_MAGIC), %o3
-4:
-	sethi	%hi(LO_MAGIC), %o4
-5:
-	or	%o4, %lo(LO_MAGIC), %o2
-13:
-	ld	[%o0], %o5
-2:
-	sub	%o5, %o2, %o4
-	andcc	%o4, %o3, %g0
-	bne	82f
-	 add	%o0, 4, %o0
-	sub	%o0, %o1, %g2
-81:	cmp	%g2, %g1
-	blu	13b
-	 mov	%o0, %o4
-	ba,a	1f
-
-	/* Check every byte. */
-82:	srl	%o5, 24, %g5
-	andcc	%g5, 0xff, %g0
-	be	1f
-	 add	%o0, -3, %o4
-	srl	%o5, 16, %g5
-	andcc	%g5, 0xff, %g0
-	be	1f
-	 add	%o4, 1, %o4
-	srl	%o5, 8, %g5
-	andcc	%g5, 0xff, %g0
-	be	1f
-	 add	%o4, 1, %o4
-	andcc	%o5, 0xff, %g0
-	bne	81b
-	 sub	%o0, %o1, %g2
-
-	add	%o4, 1, %o4
-1:
-	retl
-	 sub	%o4, %o1, %o0
-
-	.section .fixup,#alloc,#execinstr
-	.align	4
-9:
-	retl
-	 clr	%o0
-
-	.section __ex_table,#alloc
-	.align	4
-
-	.word	10b, 9b
-	.word	11b, 9b
-	.word	12b, 9b
-	.word	13b, 9b
diff --git a/arch/sparc/lib/strlen_user_64.S b/arch/sparc/lib/strlen_user_64.S
deleted file mode 100644
index c3df71fa49285..0000000000000
--- a/arch/sparc/lib/strlen_user_64.S
+++ /dev/null
@@ -1,97 +0,0 @@
-/* strlen_user.S: Sparc64 optimized strlen_user code
- *
- * Return length of string in userspace including terminating 0
- * or 0 for error
- *
- * Copyright (C) 1991,1996 Free Software Foundation
- * Copyright (C) 1996,1999 David S. Miller (davem@redhat.com)
- * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- */
-
-#include <linux/linkage.h>
-#include <asm/asi.h>
-
-#define LO_MAGIC 0x01010101
-#define HI_MAGIC 0x80808080
-
-	.align 4
-ENTRY(__strlen_user)
-	sethi	%hi(32768), %o1
-ENTRY(__strnlen_user)
-	mov	%o1, %g1
-	mov	%o0, %o1
-	andcc	%o0, 3, %g0
-	be,pt	%icc, 9f
-	 sethi	%hi(HI_MAGIC), %o4
-10:	lduba	[%o0] %asi, %o5
-	brz,pn	%o5, 21f
-	 add	%o0, 1, %o0
-	andcc	%o0, 3, %g0
-	be,pn	%icc, 4f
-	 or	%o4, %lo(HI_MAGIC), %o3
-11:	lduba	[%o0] %asi, %o5
-	brz,pn	%o5, 22f
-	 add	%o0, 1, %o0
-	andcc	%o0, 3, %g0
-	be,pt	%icc, 13f
-	 srl	%o3, 7, %o2
-12:	lduba	[%o0] %asi, %o5
-	brz,pn	%o5, 23f
-	 add	%o0, 1, %o0
-	ba,pt	%icc, 2f
-15:	 lda	[%o0] %asi, %o5
-9:	or	%o4, %lo(HI_MAGIC), %o3
-4:	srl	%o3, 7, %o2
-13:	lda	[%o0] %asi, %o5
-2:	sub	%o5, %o2, %o4
-	andcc	%o4, %o3, %g0
-	bne,pn	%icc, 82f
-	 add	%o0, 4, %o0
-	sub	%o0, %o1, %g2
-81:	cmp	%g2, %g1
-	blu,pt	%icc, 13b
-	 mov	%o0, %o4
-	ba,a,pt	%xcc, 1f
-
-	/* Check every byte. */
-82:	srl	%o5, 24, %g7
-	andcc	%g7, 0xff, %g0
-	be,pn	%icc, 1f
-	 add	%o0, -3, %o4
-	srl	%o5, 16, %g7
-	andcc	%g7, 0xff, %g0
-	be,pn	%icc, 1f
-	 add	%o4, 1, %o4
-	srl	%o5, 8, %g7
-	andcc	%g7, 0xff, %g0
-	be,pn	%icc, 1f
-	 add	%o4, 1, %o4
-	andcc	%o5, 0xff, %g0
-	bne,pt	%icc, 81b
-	 sub	%o0, %o1, %g2
-	add	%o4, 1, %o4
-1:	retl
-	 sub	%o4, %o1, %o0
-21:	retl
-	 mov	1, %o0
-22:	retl
-	 mov	2, %o0
-23:	retl
-	 mov	3, %o0
-ENDPROC(__strlen_user)
-ENDPROC(__strnlen_user)
-
-        .section .fixup,#alloc,#execinstr
-        .align  4
-30:
-        retl
-         clr    %o0
-
-	.section __ex_table,"a"
-	.align	4
-
-	.word	10b, 30b
-	.word	11b, 30b
-	.word	12b, 30b
-	.word	15b, 30b
-	.word	13b, 30b