Skip to content

Commit

Permalink
crypto: arm/aes - replace scalar AES cipher
Browse files Browse the repository at this point in the history
This replaces the scalar AES cipher that originates in the OpenSSL project
with a new implementation that is ~15% (*) faster (on modern cores), and
reuses the lookup tables and the key schedule generation routines from the
generic C implementation (which is usually compiled in anyway due to
networking and other subsystems depending on it).

Note that the bit sliced NEON code for AES still depends on the scalar cipher
that this patch replaces, so it is not removed entirely yet.

* On Cortex-A57, the performance increases from 17.0 to 14.9 cycles per byte
  for 128-bit keys.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
  • Loading branch information
Ard Biesheuvel authored and Herbert Xu committed Jan 12, 2017
1 parent bed593c commit 81edb42
Show file tree
Hide file tree
Showing 5 changed files with 256 additions and 119 deletions.
20 changes: 1 addition & 19 deletions arch/arm/crypto/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -62,33 +62,15 @@ config CRYPTO_SHA512_ARM
using optimized ARM assembler and NEON, when available.

config CRYPTO_AES_ARM
tristate "AES cipher algorithms (ARM-asm)"
depends on ARM
tristate "Scalar AES cipher for ARM"
select CRYPTO_ALGAPI
select CRYPTO_AES
help
Use optimized AES assembler routines for ARM platforms.

AES cipher algorithms (FIPS-197). AES uses the Rijndael
algorithm.

Rijndael appears to be consistently a very good performer in
both hardware and software across a wide range of computing
environments regardless of its use in feedback or non-feedback
modes. Its key setup time is excellent, and its key agility is
good. Rijndael's very low memory requirements make it very well
suited for restricted-space environments, in which it also
demonstrates excellent performance. Rijndael's operations are
among the easiest to defend against power and timing attacks.

The AES specifies three key sizes: 128, 192 and 256 bits

See <http://csrc.nist.gov/encryption/aes/> for more information.

config CRYPTO_AES_ARM_BS
tristate "Bit sliced AES using NEON instructions"
depends on KERNEL_MODE_NEON
select CRYPTO_AES_ARM
select CRYPTO_BLKCIPHER
select CRYPTO_SIMD
help
Expand Down
4 changes: 2 additions & 2 deletions arch/arm/crypto/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ $(warning $(ce-obj-y) $(ce-obj-m))
endif
endif

aes-arm-y := aes-armv4.o aes_glue.o
aes-arm-bs-y := aesbs-core.o aesbs-glue.o
aes-arm-y := aes-cipher-core.o aes-cipher-glue.o
aes-arm-bs-y := aes-armv4.o aesbs-core.o aesbs-glue.o
sha1-arm-y := sha1-armv4-large.o sha1_glue.o
sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o
sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
Expand Down
179 changes: 179 additions & 0 deletions arch/arm/crypto/aes-cipher-core.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
/*
* Scalar AES core transform
*
* Copyright (C) 2017 Linaro Ltd.
* Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/

#include <linux/linkage.h>

.text
.align 5

rk .req r0
rounds .req r1
in .req r2
out .req r3
tt .req ip

t0 .req lr
t1 .req r2
t2 .req r3

.macro __select, out, in, idx
.if __LINUX_ARM_ARCH__ < 7
and \out, \in, #0xff << (8 * \idx)
.else
ubfx \out, \in, #(8 * \idx), #8
.endif
.endm

.macro __load, out, in, idx
.if __LINUX_ARM_ARCH__ < 7 && \idx > 0
ldr \out, [tt, \in, lsr #(8 * \idx) - 2]
.else
ldr \out, [tt, \in, lsl #2]
.endif
.endm

.macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc
__select \out0, \in0, 0
__select t0, \in1, 1
__load \out0, \out0, 0
__load t0, t0, 1

.if \enc
__select \out1, \in1, 0
__select t1, \in2, 1
.else
__select \out1, \in3, 0
__select t1, \in0, 1
.endif
__load \out1, \out1, 0
__select t2, \in2, 2
__load t1, t1, 1
__load t2, t2, 2

eor \out0, \out0, t0, ror #24

__select t0, \in3, 3
.if \enc
__select \t3, \in3, 2
__select \t4, \in0, 3
.else
__select \t3, \in1, 2
__select \t4, \in2, 3
.endif
__load \t3, \t3, 2
__load t0, t0, 3
__load \t4, \t4, 3

eor \out1, \out1, t1, ror #24
eor \out0, \out0, t2, ror #16
ldm rk!, {t1, t2}
eor \out1, \out1, \t3, ror #16
eor \out0, \out0, t0, ror #8
eor \out1, \out1, \t4, ror #8
eor \out0, \out0, t1
eor \out1, \out1, t2
.endm

.macro fround, out0, out1, out2, out3, in0, in1, in2, in3
__hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
__hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
.endm

.macro iround, out0, out1, out2, out3, in0, in1, in2, in3
__hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
__hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
.endm

.macro __rev, out, in
.if __LINUX_ARM_ARCH__ < 6
lsl t0, \in, #24
and t1, \in, #0xff00
and t2, \in, #0xff0000
orr \out, t0, \in, lsr #24
orr \out, \out, t1, lsl #8
orr \out, \out, t2, lsr #8
.else
rev \out, \in
.endif
.endm

.macro __adrl, out, sym, c
.if __LINUX_ARM_ARCH__ < 7
ldr\c \out, =\sym
.else
movw\c \out, #:lower16:\sym
movt\c \out, #:upper16:\sym
.endif
.endm

.macro do_crypt, round, ttab, ltab
push {r3-r11, lr}

ldr r4, [in]
ldr r5, [in, #4]
ldr r6, [in, #8]
ldr r7, [in, #12]

ldm rk!, {r8-r11}

#ifdef CONFIG_CPU_BIG_ENDIAN
__rev r4, r4
__rev r5, r5
__rev r6, r6
__rev r7, r7
#endif

eor r4, r4, r8
eor r5, r5, r9
eor r6, r6, r10
eor r7, r7, r11

__adrl tt, \ttab

tst rounds, #2
bne 1f

0: \round r8, r9, r10, r11, r4, r5, r6, r7
\round r4, r5, r6, r7, r8, r9, r10, r11

1: subs rounds, rounds, #4
\round r8, r9, r10, r11, r4, r5, r6, r7
__adrl tt, \ltab, ls
\round r4, r5, r6, r7, r8, r9, r10, r11
bhi 0b

#ifdef CONFIG_CPU_BIG_ENDIAN
__rev r4, r4
__rev r5, r5
__rev r6, r6
__rev r7, r7
#endif

ldr out, [sp]

str r4, [out]
str r5, [out, #4]
str r6, [out, #8]
str r7, [out, #12]

pop {r3-r11, pc}

.align 3
.ltorg
.endm

ENTRY(__aes_arm_encrypt)
do_crypt fround, crypto_ft_tab, crypto_fl_tab
ENDPROC(__aes_arm_encrypt)

ENTRY(__aes_arm_decrypt)
do_crypt iround, crypto_it_tab, crypto_il_tab
ENDPROC(__aes_arm_decrypt)
74 changes: 74 additions & 0 deletions arch/arm/crypto/aes-cipher-glue.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
* Scalar AES core transform
*
* Copyright (C) 2017 Linaro Ltd.
* Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/

#include <crypto/aes.h>
#include <linux/crypto.h>
#include <linux/module.h>

asmlinkage void __aes_arm_encrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
EXPORT_SYMBOL(__aes_arm_encrypt);

asmlinkage void __aes_arm_decrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
EXPORT_SYMBOL(__aes_arm_decrypt);

static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
int rounds = 6 + ctx->key_length / 4;

__aes_arm_encrypt(ctx->key_enc, rounds, in, out);
}

static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
int rounds = 6 + ctx->key_length / 4;

__aes_arm_decrypt(ctx->key_dec, rounds, in, out);
}

static struct crypto_alg aes_alg = {
.cra_name = "aes",
.cra_driver_name = "aes-arm",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_module = THIS_MODULE,

.cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE,
.cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE,
.cra_cipher.cia_setkey = crypto_aes_set_key,
.cra_cipher.cia_encrypt = aes_encrypt,
.cra_cipher.cia_decrypt = aes_decrypt,

#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
.cra_alignmask = 3,
#endif
};

static int __init aes_init(void)
{
return crypto_register_alg(&aes_alg);
}

static void __exit aes_fini(void)
{
crypto_unregister_alg(&aes_alg);
}

module_init(aes_init);
module_exit(aes_fini);

MODULE_DESCRIPTION("Scalar AES cipher for ARM");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("aes");
98 changes: 0 additions & 98 deletions arch/arm/crypto/aes_glue.c

This file was deleted.

0 comments on commit 81edb42

Please sign in to comment.