diff --git a/ChangeLog b/ChangeLog
index 76d5e82388..591bfc07ea 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,42 @@
+2012-11-13  David S. Miller  <davem@davemloft.net>
+
+	* crypt/Makefile: Move test targets after toplevel Rules
+	inclusion.  Grab any necessary sysdep routines when linking.
+	* crypt/md5.c (md5_process_block): Remove define, we will always
+	name it __md5_process_block.
+	(md5_finish_ctx): Update md5_process_block call.
+	(md5_stream): Likewise.
+	(md5_process_bytes): Likewise.
+	(md5_process_block): Rename to __md5_process_block and move to ...
+	* crypt/md5-block.c: ... here.
+	* crypt/sha256.c (sha256_process_block): Move to ...
+	* crypt/sha256-block.c: ... here.
+	* crypt/sha512.c (sha512_process_block): Move to ...
+	* crypt/sha512-block.c: ... here.
+	* locale/Makefile (CFLAGS-md5.c): Define to add crypt/ to include
+	path.
+	* sysdeps/sparc/sparc-ifunc.c (sparc_libc_ifunc): Define.
+	* sysdeps/sparc/sparc64/multiarch/Makefile
+	(libcrypt-sysdep_routines): Add crypto assembler sysdeps when in
+	crypt subdir.
+	(localedef-aux): Add md5 crypto assembler when in locale subdir.
+	* sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile: Mirror sparc64
+	multiarch changes.
+	* sysdeps/sparc/sparc64/multiarch/md5-block.c: New file.
+	* sysdeps/sparc/sparc64/multiarch/md5-crop.S: New file.
+	* sysdeps/sparc/sparc64/multiarch/sha256-block.c: New file.
+	* sysdeps/sparc/sparc64/multiarch/sha256-crop.S: New file.
+	* sysdeps/sparc/sparc64/multiarch/sha512-block.c: New file.
+	* sysdeps/sparc/sparc64/multiarch/sha512-crop.S: New file.
+	* sysdeps/sparc/sparc32/sparcv9/multiarch/md5-block.c: New file.
+	* sysdeps/sparc/sparc32/sparcv9/multiarch/md5-crop.S: New file.
+	* sysdeps/sparc/sparc32/sparcv9/multiarch/sha256-block.c: New
+	file.
+	* sysdeps/sparc/sparc32/sparcv9/multiarch/sha256-crop.S: New file.
+	* sysdeps/sparc/sparc32/sparcv9/multiarch/sha512-block.c: New
+	file.
+	* sysdeps/sparc/sparc32/sparcv9/multiarch/sha512-crop.S: New file.
+
 2012-11-13  Joseph Myers  <joseph@codesourcery.com>
 
 	* timezone/tzselect.ksh: Update from tzcode git revision
diff --git a/crypt/Makefile b/crypt/Makefile
index 3d4f243ed5..54f0021a23 100644
--- a/crypt/Makefile
+++ b/crypt/Makefile
@@ -49,15 +49,21 @@ tests += md5test sha256test sha512test
 # The test md5test-giant uses up to 400 MB of RSS and runs on a fast
 # machine over a minute.
 xtests = md5test-giant
-
-$(objpfx)md5test: $(objpfx)md5.o
-$(objpfx)md5test-giant: $(objpfx)md5.o
-$(objpfx)sha256test: $(objpfx)sha256.o
-$(objpfx)sha512test: $(objpfx)sha512.o
 endif
 
 include ../Rules
 
+ifneq ($(nss-crypt),yes)
+md5-routines := md5 $(filter md5%,$(libcrypt-sysdep_routines))
+sha256-routines := sha256 $(filter sha256%,$(libcrypt-sysdep_routines))
+sha512-routines := sha512 $(filter sha512%,$(libcrypt-sysdep_routines))
+
+$(objpfx)md5test: $(patsubst %, $(objpfx)%.o,$(md5-routines))
+$(objpfx)md5test-giant: $(patsubst %, $(objpfx)%.o,$(md5-routines))
+$(objpfx)sha256test: $(patsubst %, $(objpfx)%.o,$(sha256-routines))
+$(objpfx)sha512test: $(patsubst %, $(objpfx)%.o,$(sha512-routines))
+endif
+
 ifeq (yes,$(build-shared))
 $(addprefix $(objpfx),$(tests)): $(objpfx)libcrypt.so
 else
diff --git a/crypt/md5-block.c b/crypt/md5-block.c
new file mode 100644
index 0000000000..35e99addec
--- /dev/null
+++ b/crypt/md5-block.c
@@ -0,0 +1,166 @@
+/* These are the four functions used in the four steps of the MD5 algorithm
+   and defined in the RFC 1321.  The first function is a little bit optimized
+   (as found in Colin Plumbs public domain implementation).  */
+/* #define FF(b, c, d) ((b & c) | (~b & d)) */
+#define FF(b, c, d) (d ^ (b & (c ^ d)))
+#define FG(b, c, d) FF (d, b, c)
+#define FH(b, c, d) (b ^ c ^ d)
+#define FI(b, c, d) (c ^ (b | ~d))
+
+/* Process LEN bytes of BUFFER, accumulating context into CTX.
+   It is assumed that LEN % 64 == 0.  */
+
+void
+__md5_process_block (const void *buffer, size_t len, struct md5_ctx *ctx)
+{
+  md5_uint32 correct_words[16];
+  const md5_uint32 *words = buffer;
+  size_t nwords = len / sizeof (md5_uint32);
+  const md5_uint32 *endp = words + nwords;
+  md5_uint32 A = ctx->A;
+  md5_uint32 B = ctx->B;
+  md5_uint32 C = ctx->C;
+  md5_uint32 D = ctx->D;
+  md5_uint32 lolen = len;
+
+  /* First increment the byte count.  RFC 1321 specifies the possible
+     length of the file up to 2^64 bits.  Here we only compute the
+     number of bytes.  Do a double word increment.  */
+  ctx->total[0] += lolen;
+  ctx->total[1] += (len >> 31 >> 1) + (ctx->total[0] < lolen);
+
+  /* Process all bytes in the buffer with 64 bytes in each round of
+     the loop.  */
+  while (words < endp)
+    {
+      md5_uint32 *cwp = correct_words;
+      md5_uint32 A_save = A;
+      md5_uint32 B_save = B;
+      md5_uint32 C_save = C;
+      md5_uint32 D_save = D;
+
+      /* First round: using the given function, the context and a constant
+	 the next context is computed.  Because the algorithms processing
+	 unit is a 32-bit word and it is determined to work on words in
+	 little endian byte order we perhaps have to change the byte order
+	 before the computation.  To reduce the work for the next steps
+	 we store the swapped words in the array CORRECT_WORDS.  */
+
+#define OP(a, b, c, d, s, T)						\
+      do								\
+	{								\
+	  a += FF (b, c, d) + (*cwp++ = SWAP (*words)) + T;		\
+	  ++words;							\
+	  CYCLIC (a, s);						\
+	  a += b;							\
+	}								\
+      while (0)
+
+      /* It is unfortunate that C does not provide an operator for
+	 cyclic rotation.  Hope the C compiler is smart enough.  */
+#define CYCLIC(w, s) (w = (w << s) | (w >> (32 - s)))
+
+      /* Before we start, one word to the strange constants.
+	 They are defined in RFC 1321 as
+
+	 T[i] = (int) (4294967296.0 * fabs (sin (i))), i=1..64
+       */
+
+      /* Round 1.  */
+      OP (A, B, C, D,  7, 0xd76aa478);
+      OP (D, A, B, C, 12, 0xe8c7b756);
+      OP (C, D, A, B, 17, 0x242070db);
+      OP (B, C, D, A, 22, 0xc1bdceee);
+      OP (A, B, C, D,  7, 0xf57c0faf);
+      OP (D, A, B, C, 12, 0x4787c62a);
+      OP (C, D, A, B, 17, 0xa8304613);
+      OP (B, C, D, A, 22, 0xfd469501);
+      OP (A, B, C, D,  7, 0x698098d8);
+      OP (D, A, B, C, 12, 0x8b44f7af);
+      OP (C, D, A, B, 17, 0xffff5bb1);
+      OP (B, C, D, A, 22, 0x895cd7be);
+      OP (A, B, C, D,  7, 0x6b901122);
+      OP (D, A, B, C, 12, 0xfd987193);
+      OP (C, D, A, B, 17, 0xa679438e);
+      OP (B, C, D, A, 22, 0x49b40821);
+
+      /* For the second to fourth round we have the possibly swapped words
+	 in CORRECT_WORDS.  Redefine the macro to take an additional first
+	 argument specifying the function to use.  */
+#undef OP
+#define OP(f, a, b, c, d, k, s, T)					\
+      do 								\
+	{								\
+	  a += f (b, c, d) + correct_words[k] + T;			\
+	  CYCLIC (a, s);						\
+	  a += b;							\
+	}								\
+      while (0)
+
+      /* Round 2.  */
+      OP (FG, A, B, C, D,  1,  5, 0xf61e2562);
+      OP (FG, D, A, B, C,  6,  9, 0xc040b340);
+      OP (FG, C, D, A, B, 11, 14, 0x265e5a51);
+      OP (FG, B, C, D, A,  0, 20, 0xe9b6c7aa);
+      OP (FG, A, B, C, D,  5,  5, 0xd62f105d);
+      OP (FG, D, A, B, C, 10,  9, 0x02441453);
+      OP (FG, C, D, A, B, 15, 14, 0xd8a1e681);
+      OP (FG, B, C, D, A,  4, 20, 0xe7d3fbc8);
+      OP (FG, A, B, C, D,  9,  5, 0x21e1cde6);
+      OP (FG, D, A, B, C, 14,  9, 0xc33707d6);
+      OP (FG, C, D, A, B,  3, 14, 0xf4d50d87);
+      OP (FG, B, C, D, A,  8, 20, 0x455a14ed);
+      OP (FG, A, B, C, D, 13,  5, 0xa9e3e905);
+      OP (FG, D, A, B, C,  2,  9, 0xfcefa3f8);
+      OP (FG, C, D, A, B,  7, 14, 0x676f02d9);
+      OP (FG, B, C, D, A, 12, 20, 0x8d2a4c8a);
+
+      /* Round 3.  */
+      OP (FH, A, B, C, D,  5,  4, 0xfffa3942);
+      OP (FH, D, A, B, C,  8, 11, 0x8771f681);
+      OP (FH, C, D, A, B, 11, 16, 0x6d9d6122);
+      OP (FH, B, C, D, A, 14, 23, 0xfde5380c);
+      OP (FH, A, B, C, D,  1,  4, 0xa4beea44);
+      OP (FH, D, A, B, C,  4, 11, 0x4bdecfa9);
+      OP (FH, C, D, A, B,  7, 16, 0xf6bb4b60);
+      OP (FH, B, C, D, A, 10, 23, 0xbebfbc70);
+      OP (FH, A, B, C, D, 13,  4, 0x289b7ec6);
+      OP (FH, D, A, B, C,  0, 11, 0xeaa127fa);
+      OP (FH, C, D, A, B,  3, 16, 0xd4ef3085);
+      OP (FH, B, C, D, A,  6, 23, 0x04881d05);
+      OP (FH, A, B, C, D,  9,  4, 0xd9d4d039);
+      OP (FH, D, A, B, C, 12, 11, 0xe6db99e5);
+      OP (FH, C, D, A, B, 15, 16, 0x1fa27cf8);
+      OP (FH, B, C, D, A,  2, 23, 0xc4ac5665);
+
+      /* Round 4.  */
+      OP (FI, A, B, C, D,  0,  6, 0xf4292244);
+      OP (FI, D, A, B, C,  7, 10, 0x432aff97);
+      OP (FI, C, D, A, B, 14, 15, 0xab9423a7);
+      OP (FI, B, C, D, A,  5, 21, 0xfc93a039);
+      OP (FI, A, B, C, D, 12,  6, 0x655b59c3);
+      OP (FI, D, A, B, C,  3, 10, 0x8f0ccc92);
+      OP (FI, C, D, A, B, 10, 15, 0xffeff47d);
+      OP (FI, B, C, D, A,  1, 21, 0x85845dd1);
+      OP (FI, A, B, C, D,  8,  6, 0x6fa87e4f);
+      OP (FI, D, A, B, C, 15, 10, 0xfe2ce6e0);
+      OP (FI, C, D, A, B,  6, 15, 0xa3014314);
+      OP (FI, B, C, D, A, 13, 21, 0x4e0811a1);
+      OP (FI, A, B, C, D,  4,  6, 0xf7537e82);
+      OP (FI, D, A, B, C, 11, 10, 0xbd3af235);
+      OP (FI, C, D, A, B,  2, 15, 0x2ad7d2bb);
+      OP (FI, B, C, D, A,  9, 21, 0xeb86d391);
+
+      /* Add the starting values of the context.  */
+      A += A_save;
+      B += B_save;
+      C += C_save;
+      D += D_save;
+    }
+
+  /* Put checksum in context given as argument.  */
+  ctx->A = A;
+  ctx->B = B;
+  ctx->C = C;
+  ctx->D = D;
+}
diff --git a/crypt/md5.c b/crypt/md5.c
index 3d2e79b905..16f3cda79c 100644
--- a/crypt/md5.c
+++ b/crypt/md5.c
@@ -44,7 +44,6 @@
 /* We need to keep the namespace clean so define the MD5 function
    protected using leading __ .  */
 # define md5_init_ctx __md5_init_ctx
-# define md5_process_block __md5_process_block
 # define md5_process_bytes __md5_process_bytes
 # define md5_finish_ctx __md5_finish_ctx
 # define md5_read_ctx __md5_read_ctx
@@ -126,7 +125,7 @@ md5_finish_ctx (ctx, resbuf)
 					       (ctx->total[0] >> 29));
 
   /* Process last bytes.  */
-  md5_process_block (ctx->buffer, bytes + pad + 8, ctx);
+  __md5_process_block (ctx->buffer, bytes + pad + 8, ctx);
 
   return md5_read_ctx (ctx, resbuf);
 }
@@ -175,7 +174,7 @@ md5_stream (stream, resblock)
       /* Process buffer with BLOCKSIZE bytes.  Note that
 			BLOCKSIZE % 64 == 0
        */
-      md5_process_block (buffer, BLOCKSIZE, &ctx);
+      __md5_process_block (buffer, BLOCKSIZE, &ctx);
     }
 
   /* Add the last bytes if necessary.  */
@@ -228,7 +227,7 @@ md5_process_bytes (buffer, len, ctx)
 
       if (ctx->buflen > 64)
 	{
-	  md5_process_block (ctx->buffer, ctx->buflen & ~63, ctx);
+	  __md5_process_block (ctx->buffer, ctx->buflen & ~63, ctx);
 
 	  ctx->buflen &= 63;
 	  /* The regions in the following copy operation cannot overlap.  */
@@ -254,14 +253,14 @@ md5_process_bytes (buffer, len, ctx)
       if (UNALIGNED_P (buffer))
 	while (len > 64)
 	  {
-	    md5_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx);
+	    __md5_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx);
 	    buffer = (const char *) buffer + 64;
 	    len -= 64;
 	  }
       else
 #endif
 	{
-	  md5_process_block (buffer, len & ~63, ctx);
+	  __md5_process_block (buffer, len & ~63, ctx);
 	  buffer = (const char *) buffer + (len & ~63);
 	  len &= 63;
 	}
@@ -276,7 +275,7 @@ md5_process_bytes (buffer, len, ctx)
       left_over += len;
       if (left_over >= 64)
 	{
-	  md5_process_block (ctx->buffer, 64, ctx);
+	  __md5_process_block (ctx->buffer, 64, ctx);
 	  left_over -= 64;
 	  memcpy (ctx->buffer, &ctx->buffer[64], left_over);
 	}
@@ -284,173 +283,4 @@ md5_process_bytes (buffer, len, ctx)
     }
 }
 
-
-/* These are the four functions used in the four steps of the MD5 algorithm
-   and defined in the RFC 1321.  The first function is a little bit optimized
-   (as found in Colin Plumbs public domain implementation).  */
-/* #define FF(b, c, d) ((b & c) | (~b & d)) */
-#define FF(b, c, d) (d ^ (b & (c ^ d)))
-#define FG(b, c, d) FF (d, b, c)
-#define FH(b, c, d) (b ^ c ^ d)
-#define FI(b, c, d) (c ^ (b | ~d))
-
-/* Process LEN bytes of BUFFER, accumulating context into CTX.
-   It is assumed that LEN % 64 == 0.  */
-
-void
-md5_process_block (buffer, len, ctx)
-     const void *buffer;
-     size_t len;
-     struct md5_ctx *ctx;
-{
-  md5_uint32 correct_words[16];
-  const md5_uint32 *words = buffer;
-  size_t nwords = len / sizeof (md5_uint32);
-  const md5_uint32 *endp = words + nwords;
-  md5_uint32 A = ctx->A;
-  md5_uint32 B = ctx->B;
-  md5_uint32 C = ctx->C;
-  md5_uint32 D = ctx->D;
-  md5_uint32 lolen = len;
-
-  /* First increment the byte count.  RFC 1321 specifies the possible
-     length of the file up to 2^64 bits.  Here we only compute the
-     number of bytes.  Do a double word increment.  */
-  ctx->total[0] += lolen;
-  ctx->total[1] += (len >> 31 >> 1) + (ctx->total[0] < lolen);
-
-  /* Process all bytes in the buffer with 64 bytes in each round of
-     the loop.  */
-  while (words < endp)
-    {
-      md5_uint32 *cwp = correct_words;
-      md5_uint32 A_save = A;
-      md5_uint32 B_save = B;
-      md5_uint32 C_save = C;
-      md5_uint32 D_save = D;
-
-      /* First round: using the given function, the context and a constant
-	 the next context is computed.  Because the algorithms processing
-	 unit is a 32-bit word and it is determined to work on words in
-	 little endian byte order we perhaps have to change the byte order
-	 before the computation.  To reduce the work for the next steps
-	 we store the swapped words in the array CORRECT_WORDS.  */
-
-#define OP(a, b, c, d, s, T)						\
-      do								\
-	{								\
-	  a += FF (b, c, d) + (*cwp++ = SWAP (*words)) + T;		\
-	  ++words;							\
-	  CYCLIC (a, s);						\
-	  a += b;							\
-	}								\
-      while (0)
-
-      /* It is unfortunate that C does not provide an operator for
-	 cyclic rotation.  Hope the C compiler is smart enough.  */
-#define CYCLIC(w, s) (w = (w << s) | (w >> (32 - s)))
-
-      /* Before we start, one word to the strange constants.
-	 They are defined in RFC 1321 as
-
-	 T[i] = (int) (4294967296.0 * fabs (sin (i))), i=1..64
-       */
-
-      /* Round 1.  */
-      OP (A, B, C, D,  7, 0xd76aa478);
-      OP (D, A, B, C, 12, 0xe8c7b756);
-      OP (C, D, A, B, 17, 0x242070db);
-      OP (B, C, D, A, 22, 0xc1bdceee);
-      OP (A, B, C, D,  7, 0xf57c0faf);
-      OP (D, A, B, C, 12, 0x4787c62a);
-      OP (C, D, A, B, 17, 0xa8304613);
-      OP (B, C, D, A, 22, 0xfd469501);
-      OP (A, B, C, D,  7, 0x698098d8);
-      OP (D, A, B, C, 12, 0x8b44f7af);
-      OP (C, D, A, B, 17, 0xffff5bb1);
-      OP (B, C, D, A, 22, 0x895cd7be);
-      OP (A, B, C, D,  7, 0x6b901122);
-      OP (D, A, B, C, 12, 0xfd987193);
-      OP (C, D, A, B, 17, 0xa679438e);
-      OP (B, C, D, A, 22, 0x49b40821);
-
-      /* For the second to fourth round we have the possibly swapped words
-	 in CORRECT_WORDS.  Redefine the macro to take an additional first
-	 argument specifying the function to use.  */
-#undef OP
-#define OP(f, a, b, c, d, k, s, T)					\
-      do 								\
-	{								\
-	  a += f (b, c, d) + correct_words[k] + T;			\
-	  CYCLIC (a, s);						\
-	  a += b;							\
-	}								\
-      while (0)
-
-      /* Round 2.  */
-      OP (FG, A, B, C, D,  1,  5, 0xf61e2562);
-      OP (FG, D, A, B, C,  6,  9, 0xc040b340);
-      OP (FG, C, D, A, B, 11, 14, 0x265e5a51);
-      OP (FG, B, C, D, A,  0, 20, 0xe9b6c7aa);
-      OP (FG, A, B, C, D,  5,  5, 0xd62f105d);
-      OP (FG, D, A, B, C, 10,  9, 0x02441453);
-      OP (FG, C, D, A, B, 15, 14, 0xd8a1e681);
-      OP (FG, B, C, D, A,  4, 20, 0xe7d3fbc8);
-      OP (FG, A, B, C, D,  9,  5, 0x21e1cde6);
-      OP (FG, D, A, B, C, 14,  9, 0xc33707d6);
-      OP (FG, C, D, A, B,  3, 14, 0xf4d50d87);
-      OP (FG, B, C, D, A,  8, 20, 0x455a14ed);
-      OP (FG, A, B, C, D, 13,  5, 0xa9e3e905);
-      OP (FG, D, A, B, C,  2,  9, 0xfcefa3f8);
-      OP (FG, C, D, A, B,  7, 14, 0x676f02d9);
-      OP (FG, B, C, D, A, 12, 20, 0x8d2a4c8a);
-
-      /* Round 3.  */
-      OP (FH, A, B, C, D,  5,  4, 0xfffa3942);
-      OP (FH, D, A, B, C,  8, 11, 0x8771f681);
-      OP (FH, C, D, A, B, 11, 16, 0x6d9d6122);
-      OP (FH, B, C, D, A, 14, 23, 0xfde5380c);
-      OP (FH, A, B, C, D,  1,  4, 0xa4beea44);
-      OP (FH, D, A, B, C,  4, 11, 0x4bdecfa9);
-      OP (FH, C, D, A, B,  7, 16, 0xf6bb4b60);
-      OP (FH, B, C, D, A, 10, 23, 0xbebfbc70);
-      OP (FH, A, B, C, D, 13,  4, 0x289b7ec6);
-      OP (FH, D, A, B, C,  0, 11, 0xeaa127fa);
-      OP (FH, C, D, A, B,  3, 16, 0xd4ef3085);
-      OP (FH, B, C, D, A,  6, 23, 0x04881d05);
-      OP (FH, A, B, C, D,  9,  4, 0xd9d4d039);
-      OP (FH, D, A, B, C, 12, 11, 0xe6db99e5);
-      OP (FH, C, D, A, B, 15, 16, 0x1fa27cf8);
-      OP (FH, B, C, D, A,  2, 23, 0xc4ac5665);
-
-      /* Round 4.  */
-      OP (FI, A, B, C, D,  0,  6, 0xf4292244);
-      OP (FI, D, A, B, C,  7, 10, 0x432aff97);
-      OP (FI, C, D, A, B, 14, 15, 0xab9423a7);
-      OP (FI, B, C, D, A,  5, 21, 0xfc93a039);
-      OP (FI, A, B, C, D, 12,  6, 0x655b59c3);
-      OP (FI, D, A, B, C,  3, 10, 0x8f0ccc92);
-      OP (FI, C, D, A, B, 10, 15, 0xffeff47d);
-      OP (FI, B, C, D, A,  1, 21, 0x85845dd1);
-      OP (FI, A, B, C, D,  8,  6, 0x6fa87e4f);
-      OP (FI, D, A, B, C, 15, 10, 0xfe2ce6e0);
-      OP (FI, C, D, A, B,  6, 15, 0xa3014314);
-      OP (FI, B, C, D, A, 13, 21, 0x4e0811a1);
-      OP (FI, A, B, C, D,  4,  6, 0xf7537e82);
-      OP (FI, D, A, B, C, 11, 10, 0xbd3af235);
-      OP (FI, C, D, A, B,  2, 15, 0x2ad7d2bb);
-      OP (FI, B, C, D, A,  9, 21, 0xeb86d391);
-
-      /* Add the starting values of the context.  */
-      A += A_save;
-      B += B_save;
-      C += C_save;
-      D += D_save;
-    }
-
-  /* Put checksum in context given as argument.  */
-  ctx->A = A;
-  ctx->B = B;
-  ctx->C = C;
-  ctx->D = D;
-}
+#include <md5-block.c>
diff --git a/crypt/sha256-block.c b/crypt/sha256-block.c
new file mode 100644
index 0000000000..a163e25865
--- /dev/null
+++ b/crypt/sha256-block.c
@@ -0,0 +1,96 @@
+/* Process LEN bytes of BUFFER, accumulating context into CTX.
+   It is assumed that LEN % 64 == 0.  */
+void
+sha256_process_block (const void *buffer, size_t len, struct sha256_ctx *ctx)
+{
+  const uint32_t *words = buffer;
+  size_t nwords = len / sizeof (uint32_t);
+  uint32_t a = ctx->H[0];
+  uint32_t b = ctx->H[1];
+  uint32_t c = ctx->H[2];
+  uint32_t d = ctx->H[3];
+  uint32_t e = ctx->H[4];
+  uint32_t f = ctx->H[5];
+  uint32_t g = ctx->H[6];
+  uint32_t h = ctx->H[7];
+
+  /* First increment the byte count.  FIPS 180-2 specifies the possible
+     length of the file up to 2^64 bits.  Here we only compute the
+     number of bytes.  */
+  ctx->total64 += len;
+
+  /* Process all bytes in the buffer with 64 bytes in each round of
+     the loop.  */
+  while (nwords > 0)
+    {
+      uint32_t W[64];
+      uint32_t a_save = a;
+      uint32_t b_save = b;
+      uint32_t c_save = c;
+      uint32_t d_save = d;
+      uint32_t e_save = e;
+      uint32_t f_save = f;
+      uint32_t g_save = g;
+      uint32_t h_save = h;
+
+      /* Operators defined in FIPS 180-2:4.1.2.  */
+#define Ch(x, y, z) ((x & y) ^ (~x & z))
+#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
+#define S0(x) (CYCLIC (x, 2) ^ CYCLIC (x, 13) ^ CYCLIC (x, 22))
+#define S1(x) (CYCLIC (x, 6) ^ CYCLIC (x, 11) ^ CYCLIC (x, 25))
+#define R0(x) (CYCLIC (x, 7) ^ CYCLIC (x, 18) ^ (x >> 3))
+#define R1(x) (CYCLIC (x, 17) ^ CYCLIC (x, 19) ^ (x >> 10))
+
+      /* It is unfortunate that C does not provide an operator for
+	 cyclic rotation.  Hope the C compiler is smart enough.  */
+#define CYCLIC(w, s) ((w >> s) | (w << (32 - s)))
+
+      /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2.  */
+      for (unsigned int t = 0; t < 16; ++t)
+	{
+	  W[t] = SWAP (*words);
+	  ++words;
+	}
+      for (unsigned int t = 16; t < 64; ++t)
+	W[t] = R1 (W[t - 2]) + W[t - 7] + R0 (W[t - 15]) + W[t - 16];
+
+      /* The actual computation according to FIPS 180-2:6.2.2 step 3.  */
+      for (unsigned int t = 0; t < 64; ++t)
+	{
+	  uint32_t T1 = h + S1 (e) + Ch (e, f, g) + K[t] + W[t];
+	  uint32_t T2 = S0 (a) + Maj (a, b, c);
+	  h = g;
+	  g = f;
+	  f = e;
+	  e = d + T1;
+	  d = c;
+	  c = b;
+	  b = a;
+	  a = T1 + T2;
+	}
+
+      /* Add the starting values of the context according to FIPS 180-2:6.2.2
+	 step 4.  */
+      a += a_save;
+      b += b_save;
+      c += c_save;
+      d += d_save;
+      e += e_save;
+      f += f_save;
+      g += g_save;
+      h += h_save;
+
+      /* Prepare for the next round.  */
+      nwords -= 16;
+    }
+
+  /* Put checksum in context given as argument.  */
+  ctx->H[0] = a;
+  ctx->H[1] = b;
+  ctx->H[2] = c;
+  ctx->H[3] = d;
+  ctx->H[4] = e;
+  ctx->H[5] = f;
+  ctx->H[6] = g;
+  ctx->H[7] = h;
+}
diff --git a/crypt/sha256.c b/crypt/sha256.c
index 61be6bce25..aea9465139 100644
--- a/crypt/sha256.c
+++ b/crypt/sha256.c
@@ -80,104 +80,8 @@ static const uint32_t K[64] =
     0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
   };
 
-
-/* Process LEN bytes of BUFFER, accumulating context into CTX.
-   It is assumed that LEN % 64 == 0.  */
-static void
-sha256_process_block (const void *buffer, size_t len, struct sha256_ctx *ctx)
-{
-  const uint32_t *words = buffer;
-  size_t nwords = len / sizeof (uint32_t);
-  uint32_t a = ctx->H[0];
-  uint32_t b = ctx->H[1];
-  uint32_t c = ctx->H[2];
-  uint32_t d = ctx->H[3];
-  uint32_t e = ctx->H[4];
-  uint32_t f = ctx->H[5];
-  uint32_t g = ctx->H[6];
-  uint32_t h = ctx->H[7];
-
-  /* First increment the byte count.  FIPS 180-2 specifies the possible
-     length of the file up to 2^64 bits.  Here we only compute the
-     number of bytes.  */
-  ctx->total64 += len;
-
-  /* Process all bytes in the buffer with 64 bytes in each round of
-     the loop.  */
-  while (nwords > 0)
-    {
-      uint32_t W[64];
-      uint32_t a_save = a;
-      uint32_t b_save = b;
-      uint32_t c_save = c;
-      uint32_t d_save = d;
-      uint32_t e_save = e;
-      uint32_t f_save = f;
-      uint32_t g_save = g;
-      uint32_t h_save = h;
-
-      /* Operators defined in FIPS 180-2:4.1.2.  */
-#define Ch(x, y, z) ((x & y) ^ (~x & z))
-#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
-#define S0(x) (CYCLIC (x, 2) ^ CYCLIC (x, 13) ^ CYCLIC (x, 22))
-#define S1(x) (CYCLIC (x, 6) ^ CYCLIC (x, 11) ^ CYCLIC (x, 25))
-#define R0(x) (CYCLIC (x, 7) ^ CYCLIC (x, 18) ^ (x >> 3))
-#define R1(x) (CYCLIC (x, 17) ^ CYCLIC (x, 19) ^ (x >> 10))
-
-      /* It is unfortunate that C does not provide an operator for
-	 cyclic rotation.  Hope the C compiler is smart enough.  */
-#define CYCLIC(w, s) ((w >> s) | (w << (32 - s)))
-
-      /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2.  */
-      for (unsigned int t = 0; t < 16; ++t)
-	{
-	  W[t] = SWAP (*words);
-	  ++words;
-	}
-      for (unsigned int t = 16; t < 64; ++t)
-	W[t] = R1 (W[t - 2]) + W[t - 7] + R0 (W[t - 15]) + W[t - 16];
-
-      /* The actual computation according to FIPS 180-2:6.2.2 step 3.  */
-      for (unsigned int t = 0; t < 64; ++t)
-	{
-	  uint32_t T1 = h + S1 (e) + Ch (e, f, g) + K[t] + W[t];
-	  uint32_t T2 = S0 (a) + Maj (a, b, c);
-	  h = g;
-	  g = f;
-	  f = e;
-	  e = d + T1;
-	  d = c;
-	  c = b;
-	  b = a;
-	  a = T1 + T2;
-	}
-
-      /* Add the starting values of the context according to FIPS 180-2:6.2.2
-	 step 4.  */
-      a += a_save;
-      b += b_save;
-      c += c_save;
-      d += d_save;
-      e += e_save;
-      f += f_save;
-      g += g_save;
-      h += h_save;
-
-      /* Prepare for the next round.  */
-      nwords -= 16;
-    }
-
-  /* Put checksum in context given as argument.  */
-  ctx->H[0] = a;
-  ctx->H[1] = b;
-  ctx->H[2] = c;
-  ctx->H[3] = d;
-  ctx->H[4] = e;
-  ctx->H[5] = f;
-  ctx->H[6] = g;
-  ctx->H[7] = h;
-}
-
+void
+sha256_process_block (const void *, size_t, struct sha256_ctx *);
 
 /* Initialize structure containing state of computation.
    (FIPS 180-2:5.3.2)  */
@@ -312,3 +216,5 @@ __sha256_process_bytes (buffer, len, ctx)
       ctx->buflen = left_over;
     }
 }
+
+#include <sha256-block.c>
diff --git a/crypt/sha512-block.c b/crypt/sha512-block.c
new file mode 100644
index 0000000000..e7c5cfd7f5
--- /dev/null
+++ b/crypt/sha512-block.c
@@ -0,0 +1,103 @@
+/* Process LEN bytes of BUFFER, accumulating context into CTX.
+   It is assumed that LEN % 128 == 0.  */
+void
+sha512_process_block (const void *buffer, size_t len, struct sha512_ctx *ctx)
+{
+  const uint64_t *words = buffer;
+  size_t nwords = len / sizeof (uint64_t);
+  uint64_t a = ctx->H[0];
+  uint64_t b = ctx->H[1];
+  uint64_t c = ctx->H[2];
+  uint64_t d = ctx->H[3];
+  uint64_t e = ctx->H[4];
+  uint64_t f = ctx->H[5];
+  uint64_t g = ctx->H[6];
+  uint64_t h = ctx->H[7];
+
+  /* First increment the byte count.  FIPS 180-2 specifies the possible
+     length of the file up to 2^128 bits.  Here we only compute the
+     number of bytes.  Do a double word increment.  */
+#ifdef USE_TOTAL128
+  ctx->total128 += len;
+#else
+  uint64_t lolen = len;
+  ctx->total[TOTAL128_low] += lolen;
+  ctx->total[TOTAL128_high] += ((len >> 31 >> 31 >> 2)
+				+ (ctx->total[TOTAL128_low] < lolen));
+#endif
+
+  /* Process all bytes in the buffer with 128 bytes in each round of
+     the loop.  */
+  while (nwords > 0)
+    {
+      uint64_t W[80];
+      uint64_t a_save = a;
+      uint64_t b_save = b;
+      uint64_t c_save = c;
+      uint64_t d_save = d;
+      uint64_t e_save = e;
+      uint64_t f_save = f;
+      uint64_t g_save = g;
+      uint64_t h_save = h;
+
+      /* Operators defined in FIPS 180-2:4.1.2.  */
+#define Ch(x, y, z) ((x & y) ^ (~x & z))
+#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
+#define S0(x) (CYCLIC (x, 28) ^ CYCLIC (x, 34) ^ CYCLIC (x, 39))
+#define S1(x) (CYCLIC (x, 14) ^ CYCLIC (x, 18) ^ CYCLIC (x, 41))
+#define R0(x) (CYCLIC (x, 1) ^ CYCLIC (x, 8) ^ (x >> 7))
+#define R1(x) (CYCLIC (x, 19) ^ CYCLIC (x, 61) ^ (x >> 6))
+
+      /* It is unfortunate that C does not provide an operator for
+	 cyclic rotation.  Hope the C compiler is smart enough.  */
+#define CYCLIC(w, s) ((w >> s) | (w << (64 - s)))
+
+      /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2.  */
+      for (unsigned int t = 0; t < 16; ++t)
+	{
+	  W[t] = SWAP (*words);
+	  ++words;
+	}
+      for (unsigned int t = 16; t < 80; ++t)
+	W[t] = R1 (W[t - 2]) + W[t - 7] + R0 (W[t - 15]) + W[t - 16];
+
+      /* The actual computation according to FIPS 180-2:6.3.2 step 3.  */
+      for (unsigned int t = 0; t < 80; ++t)
+	{
+	  uint64_t T1 = h + S1 (e) + Ch (e, f, g) + K[t] + W[t];
+	  uint64_t T2 = S0 (a) + Maj (a, b, c);
+	  h = g;
+	  g = f;
+	  f = e;
+	  e = d + T1;
+	  d = c;
+	  c = b;
+	  b = a;
+	  a = T1 + T2;
+	}
+
+      /* Add the starting values of the context according to FIPS 180-2:6.3.2
+	 step 4.  */
+      a += a_save;
+      b += b_save;
+      c += c_save;
+      d += d_save;
+      e += e_save;
+      f += f_save;
+      g += g_save;
+      h += h_save;
+
+      /* Prepare for the next round.  */
+      nwords -= 16;
+    }
+
+  /* Put checksum in context given as argument.  */
+  ctx->H[0] = a;
+  ctx->H[1] = b;
+  ctx->H[2] = c;
+  ctx->H[3] = d;
+  ctx->H[4] = e;
+  ctx->H[5] = f;
+  ctx->H[6] = g;
+  ctx->H[7] = h;
+}
diff --git a/crypt/sha512.c b/crypt/sha512.c
index 0675c948c1..c0df12e6f0 100644
--- a/crypt/sha512.c
+++ b/crypt/sha512.c
@@ -100,111 +100,8 @@ static const uint64_t K[80] =
     UINT64_C (0x5fcb6fab3ad6faec), UINT64_C (0x6c44198c4a475817)
   };
 
-
-/* Process LEN bytes of BUFFER, accumulating context into CTX.
-   It is assumed that LEN % 128 == 0.  */
-static void
-sha512_process_block (const void *buffer, size_t len, struct sha512_ctx *ctx)
-{
-  const uint64_t *words = buffer;
-  size_t nwords = len / sizeof (uint64_t);
-  uint64_t a = ctx->H[0];
-  uint64_t b = ctx->H[1];
-  uint64_t c = ctx->H[2];
-  uint64_t d = ctx->H[3];
-  uint64_t e = ctx->H[4];
-  uint64_t f = ctx->H[5];
-  uint64_t g = ctx->H[6];
-  uint64_t h = ctx->H[7];
-
-  /* First increment the byte count.  FIPS 180-2 specifies the possible
-     length of the file up to 2^128 bits.  Here we only compute the
-     number of bytes.  Do a double word increment.  */
-#ifdef USE_TOTAL128
-  ctx->total128 += len;
-#else
-  uint64_t lolen = len;
-  ctx->total[TOTAL128_low] += lolen;
-  ctx->total[TOTAL128_high] += ((len >> 31 >> 31 >> 2)
-				+ (ctx->total[TOTAL128_low] < lolen));
-#endif
-
-  /* Process all bytes in the buffer with 128 bytes in each round of
-     the loop.  */
-  while (nwords > 0)
-    {
-      uint64_t W[80];
-      uint64_t a_save = a;
-      uint64_t b_save = b;
-      uint64_t c_save = c;
-      uint64_t d_save = d;
-      uint64_t e_save = e;
-      uint64_t f_save = f;
-      uint64_t g_save = g;
-      uint64_t h_save = h;
-
-      /* Operators defined in FIPS 180-2:4.1.2.  */
-#define Ch(x, y, z) ((x & y) ^ (~x & z))
-#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
-#define S0(x) (CYCLIC (x, 28) ^ CYCLIC (x, 34) ^ CYCLIC (x, 39))
-#define S1(x) (CYCLIC (x, 14) ^ CYCLIC (x, 18) ^ CYCLIC (x, 41))
-#define R0(x) (CYCLIC (x, 1) ^ CYCLIC (x, 8) ^ (x >> 7))
-#define R1(x) (CYCLIC (x, 19) ^ CYCLIC (x, 61) ^ (x >> 6))
-
-      /* It is unfortunate that C does not provide an operator for
-	 cyclic rotation.  Hope the C compiler is smart enough.  */
-#define CYCLIC(w, s) ((w >> s) | (w << (64 - s)))
-
-      /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2.  */
-      for (unsigned int t = 0; t < 16; ++t)
-	{
-	  W[t] = SWAP (*words);
-	  ++words;
-	}
-      for (unsigned int t = 16; t < 80; ++t)
-	W[t] = R1 (W[t - 2]) + W[t - 7] + R0 (W[t - 15]) + W[t - 16];
-
-      /* The actual computation according to FIPS 180-2:6.3.2 step 3.  */
-      for (unsigned int t = 0; t < 80; ++t)
-	{
-	  uint64_t T1 = h + S1 (e) + Ch (e, f, g) + K[t] + W[t];
-	  uint64_t T2 = S0 (a) + Maj (a, b, c);
-	  h = g;
-	  g = f;
-	  f = e;
-	  e = d + T1;
-	  d = c;
-	  c = b;
-	  b = a;
-	  a = T1 + T2;
-	}
-
-      /* Add the starting values of the context according to FIPS 180-2:6.3.2
-	 step 4.  */
-      a += a_save;
-      b += b_save;
-      c += c_save;
-      d += d_save;
-      e += e_save;
-      f += f_save;
-      g += g_save;
-      h += h_save;
-
-      /* Prepare for the next round.  */
-      nwords -= 16;
-    }
-
-  /* Put checksum in context given as argument.  */
-  ctx->H[0] = a;
-  ctx->H[1] = b;
-  ctx->H[2] = c;
-  ctx->H[3] = d;
-  ctx->H[4] = e;
-  ctx->H[5] = f;
-  ctx->H[6] = g;
-  ctx->H[7] = h;
-}
-
+void
+sha512_process_block (const void *buffer, size_t len, struct sha512_ctx *ctx);
 
 /* Initialize structure containing state of computation.
    (FIPS 180-2:5.3.3)  */
@@ -342,3 +239,5 @@ __sha512_process_bytes (buffer, len, ctx)
       ctx->buflen = left_over;
     }
 }
+
+#include <sha512-block.c>
diff --git a/locale/Makefile b/locale/Makefile
index 42c6772ceb..84f3c930ba 100644
--- a/locale/Makefile
+++ b/locale/Makefile
@@ -59,6 +59,8 @@ GPERFFLAGS = -acCgopt -k1,2,5,9,$$ -L ANSI-C
 
 include ../Rules
 
+CFLAGS-md5.c = -I../crypt
+
 programs/%-kw.h: programs/%-kw.gperf
 	cd programs \
 	&& $(GPERF) $(GPERFFLAGS) -N $(@F:-kw.h=_hash) $(<F) > $(@F).new
diff --git a/sysdeps/sparc/sparc-ifunc.h b/sysdeps/sparc/sparc-ifunc.h
index db53a7126e..7de7e51538 100644
--- a/sysdeps/sparc/sparc-ifunc.h
+++ b/sysdeps/sparc/sparc-ifunc.h
@@ -109,4 +109,6 @@ END (__##name)
   }									\
   __asm__ (".type " #name ", %gnu_indirect_function");
 
+# define sparc_libc_ifunc(name, expr) sparc_libm_ifunc (name, expr)
+
 #endif	/* __ASSEMBLER__ */
diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile b/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile
index 7358bdb167..4ad7aff914 100644
--- a/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile
+++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile
@@ -1,3 +1,11 @@
+ifeq ($(subdir),crypt)
+libcrypt-sysdep_routines += md5-crop sha256-crop sha512-crop
+endif
+
+ifeq ($(subdir),locale)
+localedef-aux += md5-crop
+endif
+
 ifeq ($(subdir),string)
 sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \
 		   memset-niagara1 memcpy-niagara4 memset-niagara4
diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/md5-block.c b/sysdeps/sparc/sparc32/sparcv9/multiarch/md5-block.c
new file mode 100644
index 0000000000..3765cabae7
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/md5-block.c
@@ -0,0 +1 @@
+#include <sparc64/multiarch/md5-block.c>
diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/md5-crop.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/md5-crop.S
new file mode 100644
index 0000000000..11a3a81482
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/md5-crop.S
@@ -0,0 +1 @@
+#include <sparc64/multiarch/md5-crop.S>
diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/sha256-block.c b/sysdeps/sparc/sparc32/sparcv9/multiarch/sha256-block.c
new file mode 100644
index 0000000000..600c602b61
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/sha256-block.c
@@ -0,0 +1 @@
+#include <sparc64/multiarch/sha256-block.c>
diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/sha256-crop.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/sha256-crop.S
new file mode 100644
index 0000000000..4895405853
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/sha256-crop.S
@@ -0,0 +1 @@
+#include <sparc64/multiarch/sha256-crop.S>
diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/sha512-block.c b/sysdeps/sparc/sparc32/sparcv9/multiarch/sha512-block.c
new file mode 100644
index 0000000000..7c7c54e5a6
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/sha512-block.c
@@ -0,0 +1 @@
+#include <sparc64/multiarch/sha512-block.c>
diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/sha512-crop.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/sha512-crop.S
new file mode 100644
index 0000000000..cc74a99d3c
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/sha512-crop.S
@@ -0,0 +1 @@
+#include <sparc64/multiarch/sha512-crop.S>
diff --git a/sysdeps/sparc/sparc64/multiarch/Makefile b/sysdeps/sparc/sparc64/multiarch/Makefile
index 7358bdb167..4ad7aff914 100644
--- a/sysdeps/sparc/sparc64/multiarch/Makefile
+++ b/sysdeps/sparc/sparc64/multiarch/Makefile
@@ -1,3 +1,11 @@
+ifeq ($(subdir),crypt)
+libcrypt-sysdep_routines += md5-crop sha256-crop sha512-crop
+endif
+
+ifeq ($(subdir),locale)
+localedef-aux += md5-crop
+endif
+
 ifeq ($(subdir),string)
 sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \
 		   memset-niagara1 memcpy-niagara4 memset-niagara4
diff --git a/sysdeps/sparc/sparc64/multiarch/md5-block.c b/sysdeps/sparc/sparc64/multiarch/md5-block.c
new file mode 100644
index 0000000000..7c1a3a368f
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/md5-block.c
@@ -0,0 +1,29 @@
+#include <sparc-ifunc.h>
+
+#define  __md5_process_block __md5_process_block_generic
+extern void __md5_process_block_generic (const void *buffer, size_t len,
+					 struct md5_ctx *ctx);
+
+#include <crypt/md5-block.c>
+
+#undef __md5_process_block
+
+extern void __md5_process_block_crop (const void *buffer, size_t len,
+				      struct md5_ctx *ctx);
+static bool cpu_supports_md5(int hwcap)
+{
+  unsigned long cfr;
+
+  if (!(hwcap & HWCAP_SPARC_CRYPTO))
+    return false;
+
+  __asm__ ("rd %%asr26, %0" : "=r" (cfr));
+  if (cfr & (1 << 4))
+    return true;
+
+  return false;
+}
+
+extern void __md5_process_block (const void *buffer, size_t len,
+				 struct md5_ctx *ctx);
+sparc_libc_ifunc(__md5_process_block, cpu_supports_md5(hwcap) ? __md5_process_block_crop : __md5_process_block_generic);
diff --git a/sysdeps/sparc/sparc64/multiarch/md5-crop.S b/sysdeps/sparc/sparc64/multiarch/md5-crop.S
new file mode 100644
index 0000000000..702dda4c26
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/md5-crop.S
@@ -0,0 +1,110 @@
+/* MD5 using sparc crypto opcodes.
+   Copyright (C) 2012 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by David S. Miller (davem@davemloft.net)
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+#define ASI_PL 0x88
+
+#define MD5		\
+	.word	0x81b02800;
+
+	.text
+	.align	32
+ENTRY(__md5_process_block_crop)
+	/* %o0=buffer, %o1=len, %o2=CTX */
+	ld	[%o2 + 0x10], %g1
+	add	%g1, %o1, %o4
+	st	%o4, [%o2 + 0x10]
+	clr	%o5
+	cmp	%o4, %g1
+	movlu	%icc, 1, %o5
+#ifdef __arch64__
+	srlx	%o1, 32, %o4
+	add	%o5, %o4, %o5
+#endif
+	ld	[%o2 + 0x14], %o4
+	add	%o4, %o5, %o4
+	st	%o4, [%o2 + 0x14]
+	lda	[%o2] ASI_PL, %f0
+	add	%o2, 0x4, %g1
+	lda	[%g1] ASI_PL, %f1
+	add	%o2, 0x8, %g1
+	andcc	%o0, 0x7, %g0
+	lda	[%g1] ASI_PL, %f2
+	add	%o2, 0xc, %g1
+	bne,pn	%xcc, 10f
+	 lda	[%g1] ASI_PL, %f3
+
+1:
+	ldd	[%o0 + 0x00], %f8
+	ldd	[%o0 + 0x08], %f10
+	ldd	[%o0 + 0x10], %f12
+	ldd	[%o0 + 0x18], %f14
+	ldd	[%o0 + 0x20], %f16
+	ldd	[%o0 + 0x28], %f18
+	ldd	[%o0 + 0x30], %f20
+	ldd	[%o0 + 0x38], %f22
+
+	MD5
+
+	subcc	%o1, 64, %o1
+	bne,pt	%xcc, 1b
+	 add	%o0, 0x40, %o0
+
+5:
+	sta	%f0, [%o2] ASI_PL
+	add	%o2, 0x4, %g1
+	sta	%f1, [%g1] ASI_PL
+	add	%o2, 0x8, %g1
+	sta	%f2, [%g1] ASI_PL
+	add	%o2, 0xc, %g1
+	retl
+	 sta	%f3, [%g1] ASI_PL
+10:
+	alignaddr %o0, %g0, %o0
+
+	ldd	[%o0 + 0x00], %f10
+1:
+	ldd	[%o0 + 0x08], %f12
+	ldd	[%o0 + 0x10], %f14
+	ldd	[%o0 + 0x18], %f16
+	ldd	[%o0 + 0x20], %f18
+	ldd	[%o0 + 0x28], %f20
+	ldd	[%o0 + 0x30], %f22
+	ldd	[%o0 + 0x38], %f24
+	ldd	[%o0 + 0x40], %f26
+
+	faligndata %f10, %f12, %f8
+	faligndata %f12, %f14, %f10
+	faligndata %f14, %f16, %f12
+	faligndata %f16, %f18, %f14
+	faligndata %f18, %f20, %f16
+	faligndata %f20, %f22, %f18
+	faligndata %f22, %f24, %f20
+	faligndata %f24, %f26, %f22
+
+	MD5
+
+	subcc	%o1, 64, %o1
+	fsrc2	%f26, %f10
+	bne,pt	%xcc, 1b
+	 add	%o0, 0x40, %o0
+
+	ba,a,pt	%xcc, 5b
+END(__md5_process_block_crop)
diff --git a/sysdeps/sparc/sparc64/multiarch/sha256-block.c b/sysdeps/sparc/sparc64/multiarch/sha256-block.c
new file mode 100644
index 0000000000..79966b93d7
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/sha256-block.c
@@ -0,0 +1,30 @@
+#include <sparc-ifunc.h>
+
+#define sha256_process_block sha256_process_block_generic
+extern void sha256_process_block_generic (const void *buffer, size_t len,
+					  struct sha256_ctx *ctx);
+
+#include <crypt/sha256-block.c>
+
+#undef sha256_process_block
+
+extern void __sha256_process_block_crop (const void *buffer, size_t len,
+					 struct sha256_ctx *ctx);
+
+static bool cpu_supports_sha256(int hwcap)
+{
+  unsigned long cfr;
+
+  if (!(hwcap & HWCAP_SPARC_CRYPTO))
+    return false;
+
+  __asm__ ("rd %%asr26, %0" : "=r" (cfr));
+  if (cfr & (1 << 6))
+    return true;
+
+  return false;
+}
+
+extern void sha256_process_block (const void *buffer, size_t len,
+				  struct sha256_ctx *ctx);
+sparc_libc_ifunc(sha256_process_block, cpu_supports_sha256(hwcap) ? __sha256_process_block_crop : sha256_process_block_generic);
diff --git a/sysdeps/sparc/sparc64/multiarch/sha256-crop.S b/sysdeps/sparc/sparc64/multiarch/sha256-crop.S
new file mode 100644
index 0000000000..b79f536c48
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/sha256-crop.S
@@ -0,0 +1,101 @@
+/* SHA256 using sparc crypto opcodes.
+   Copyright (C) 2012 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by David S. Miller (davem@davemloft.net)
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+#define SHA256		\
+	.word	0x81b02840;
+
+	.text
+	.align	32
+ENTRY(__sha256_process_block_crop)
+	/* %o0=buffer, %o1=len, %o2=CTX */
+	ldx	[%o2 + 0x20], %g1
+	add	%g1, %o1, %g1
+	stx	%g1, [%o2 + 0x20]
+
+	ld	[%o2 + 0x00], %f0
+	ld	[%o2 + 0x04], %f1
+	ld	[%o2 + 0x08], %f2
+	ld	[%o2 + 0x0c], %f3
+	ld	[%o2 + 0x10], %f4
+	ld	[%o2 + 0x14], %f5
+	andcc	%o1, 0x7, %g0
+	ld	[%o2 + 0x18], %f6
+	bne,pn	%xcc, 10f
+	 ld	[%o2 + 0x1c], %f7
+
+1:
+	ldd	[%o0 + 0x00], %f8
+	ldd	[%o0 + 0x08], %f10
+	ldd	[%o0 + 0x10], %f12
+	ldd	[%o0 + 0x18], %f14
+	ldd	[%o0 + 0x20], %f16
+	ldd	[%o0 + 0x28], %f18
+	ldd	[%o0 + 0x30], %f20
+	ldd	[%o0 + 0x38], %f22
+
+	SHA256
+
+	subcc	%o1, 0x40, %o1
+	bne,pt	%xcc, 1b
+	 add	%o0, 0x40, %o0
+
+5:
+	st	%f0, [%o2 + 0x00]
+	st	%f1, [%o2 + 0x04]
+	st	%f2, [%o2 + 0x08]
+	st	%f3, [%o2 + 0x0c]
+	st	%f4, [%o2 + 0x10]
+	st	%f5, [%o2 + 0x14]
+	st	%f6, [%o2 + 0x18]
+	retl
+	 st	%f7, [%o2 + 0x1c]
+10:
+	alignaddr %o0, %g0, %o0
+
+	ldd	[%o0 + 0x00], %f10
+1:
+	ldd	[%o0 + 0x08], %f12
+	ldd	[%o0 + 0x10], %f14
+	ldd	[%o0 + 0x18], %f16
+	ldd	[%o0 + 0x20], %f18
+	ldd	[%o0 + 0x28], %f20
+	ldd	[%o0 + 0x30], %f22
+	ldd	[%o0 + 0x38], %f24
+	ldd	[%o0 + 0x40], %f26
+
+	faligndata %f10, %f12, %f8
+	faligndata %f12, %f14, %f10
+	faligndata %f14, %f16, %f12
+	faligndata %f16, %f18, %f14
+	faligndata %f18, %f20, %f16
+	faligndata %f20, %f22, %f18
+	faligndata %f22, %f24, %f20
+	faligndata %f24, %f26, %f22
+
+	SHA256
+
+	subcc	%o1, 0x40, %o1
+	fsrc2	%f26, %f10
+	bne,pt	%xcc, 1b
+	 add	%o0, 0x40, %o0
+
+	ba,a,pt	%xcc, 5b
+END(__sha256_process_block_crop)
diff --git a/sysdeps/sparc/sparc64/multiarch/sha512-block.c b/sysdeps/sparc/sparc64/multiarch/sha512-block.c
new file mode 100644
index 0000000000..0d1c3dd6d8
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/sha512-block.c
@@ -0,0 +1,30 @@
+#include <sparc-ifunc.h>
+
+#define sha512_process_block sha512_process_block_generic
+extern void sha512_process_block_generic (const void *buffer, size_t len,
+					  struct sha512_ctx *ctx);
+
+#include <crypt/sha512-block.c>
+
+#undef sha512_process_block
+
+extern void __sha512_process_block_crop (const void *buffer, size_t len,
+					 struct sha512_ctx *ctx);
+
+static bool cpu_supports_sha512(int hwcap)
+{
+  unsigned long cfr;
+
+  if (!(hwcap & HWCAP_SPARC_CRYPTO))
+    return false;
+
+  __asm__ ("rd %%asr26, %0" : "=r" (cfr));
+  if (cfr & (1 << 6))
+    return true;
+
+  return false;
+}
+
+extern void sha512_process_block (const void *buffer, size_t len,
+				  struct sha512_ctx *ctx);
+sparc_libc_ifunc(sha512_process_block, cpu_supports_sha512(hwcap) ? __sha512_process_block_crop : sha512_process_block_generic);
diff --git a/sysdeps/sparc/sparc64/multiarch/sha512-crop.S b/sysdeps/sparc/sparc64/multiarch/sha512-crop.S
new file mode 100644
index 0000000000..efd8ae3cde
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/sha512-crop.S
@@ -0,0 +1,131 @@
+/* SHA512 using sparc crypto opcodes.
+   Copyright (C) 2012 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by David S. Miller (davem@davemloft.net)
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+#define SHA512		\
+	.word	0x81b02860;
+
+	.text
+	.align	32
+ENTRY(__sha512_process_block_crop)
+	/* %o0=buffer, %o1=len, %o2=CTX */
+	ldx	[%o2 + 0x48], %g1
+	add	%g1, %o1, %o4
+	stx	%o4, [%o2 + 0x48]
+	cmp	%o4, %g1
+	bgeu,pt	%xcc, 1f
+	 nop
+	ldx	[%o2 + 0x40], %g1
+	add	%g1, 1, %g1
+	stx	%g1, [%o2 + 0x40]
+
+1:	ldd	[%o2 + 0x00], %f0
+	ldd	[%o2 + 0x08], %f2
+	ldd	[%o2 + 0x10], %f4
+	ldd	[%o2 + 0x18], %f6
+	ldd	[%o2 + 0x20], %f8
+	ldd	[%o2 + 0x28], %f10
+	andcc	%o1, 0x7, %g0
+	ldd	[%o2 + 0x30], %f12
+	bne,pn	%xcc, 10f
+	 ldd	[%o2 + 0x38], %f14
+
+1:
+	ldd	[%o0 + 0x00], %f16
+	ldd	[%o0 + 0x08], %f18
+	ldd	[%o0 + 0x10], %f20
+	ldd	[%o0 + 0x18], %f22
+	ldd	[%o0 + 0x20], %f24
+	ldd	[%o0 + 0x28], %f26
+	ldd	[%o0 + 0x30], %f28
+	ldd	[%o0 + 0x38], %f30
+	ldd	[%o0 + 0x40], %f32
+	ldd	[%o0 + 0x48], %f34
+	ldd	[%o0 + 0x50], %f36
+	ldd	[%o0 + 0x58], %f38
+	ldd	[%o0 + 0x60], %f40
+	ldd	[%o0 + 0x68], %f42
+	ldd	[%o0 + 0x70], %f44
+	ldd	[%o0 + 0x78], %f46
+
+	SHA512
+
+	subcc	%o1, 0x80, %o1
+	bne,pt	%xcc, 1b
+	 add	%o0, 0x80, %o0
+
+5:
+	std	%f0, [%o2 + 0x00]
+	std	%f2, [%o2 + 0x08]
+	std	%f4, [%o2 + 0x10]
+	std	%f6, [%o2 + 0x18]
+	std	%f8, [%o2 + 0x20]
+	std	%f10, [%o2 + 0x28]
+	std	%f12, [%o2 + 0x30]
+	retl
+	 std	%f14, [%o2 + 0x38]
+10:
+	alignaddr %o0, %g0, %o0
+
+	ldd	[%o0 + 0x00], %f18
+1:
+	ldd	[%o0 + 0x08], %f20
+	ldd	[%o0 + 0x10], %f22
+	ldd	[%o0 + 0x18], %f24
+	ldd	[%o0 + 0x20], %f26
+	ldd	[%o0 + 0x28], %f28
+	ldd	[%o0 + 0x30], %f30
+	ldd	[%o0 + 0x38], %f32
+	ldd	[%o0 + 0x40], %f34
+	ldd	[%o0 + 0x48], %f36
+	ldd	[%o0 + 0x50], %f38
+	ldd	[%o0 + 0x58], %f40
+	ldd	[%o0 + 0x60], %f42
+	ldd	[%o0 + 0x68], %f44
+	ldd	[%o0 + 0x70], %f46
+	ldd	[%o0 + 0x78], %f48
+	ldd	[%o0 + 0x80], %f50
+
+	faligndata %f18, %f20, %f16
+	faligndata %f20, %f22, %f18
+	faligndata %f22, %f24, %f20
+	faligndata %f24, %f26, %f22
+	faligndata %f26, %f28, %f24
+	faligndata %f28, %f30, %f26
+	faligndata %f30, %f32, %f28
+	faligndata %f32, %f34, %f30
+	faligndata %f34, %f36, %f32
+	faligndata %f36, %f38, %f34
+	faligndata %f38, %f40, %f36
+	faligndata %f40, %f42, %f38
+	faligndata %f42, %f44, %f40
+	faligndata %f44, %f46, %f42
+	faligndata %f46, %f48, %f44
+	faligndata %f48, %f50, %f46
+
+	SHA512
+
+	subcc	%o1, 0x80, %o1
+	fsrc2	%f50, %f18
+	bne,pt	%xcc, 1b
+	 add	%o0, 0x80, %o0
+
+	ba,a,pt	%xcc, 5b
+END(__sha512_process_block_crop)