-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
crypto: arm64/aes - add scalar implementation
This adds a scalar implementation of AES, based on the precomputed tables that are exposed by the generic AES code. Since rotates are cheap on arm64, this implementation only uses the 4 core tables (of 1 KB each), and avoids the prerotated ones, reducing the D-cache footprint by 75%. On Cortex-A57, this code manages 13.0 cycles per byte, which is ~34% faster than the generic C code. (Note that this is still >13x slower than the code that uses the optional ARMv8 Crypto Extensions, which manages <1 cycles per byte.) Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
- Loading branch information
Ard Biesheuvel
authored and
Herbert Xu
committed
Jan 12, 2017
1 parent
293614c
commit bed593c
Showing
4 changed files
with
203 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
/* | ||
* Scalar AES core transform | ||
* | ||
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org> | ||
* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License version 2 as | ||
* published by the Free Software Foundation. | ||
*/ | ||
|
||
#include <linux/linkage.h> | ||
#include <asm/assembler.h> | ||
|
||
.text | ||
|
||
rk .req x0 | ||
out .req x1 | ||
in .req x2 | ||
rounds .req x3 | ||
tt .req x4 | ||
lt .req x2 | ||
|
||
.macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc | ||
ldp \out0, \out1, [rk], #8 | ||
|
||
ubfx w13, \in0, #0, #8 | ||
ubfx w14, \in1, #8, #8 | ||
ldr w13, [tt, w13, uxtw #2] | ||
ldr w14, [tt, w14, uxtw #2] | ||
|
||
.if \enc | ||
ubfx w17, \in1, #0, #8 | ||
ubfx w18, \in2, #8, #8 | ||
.else | ||
ubfx w17, \in3, #0, #8 | ||
ubfx w18, \in0, #8, #8 | ||
.endif | ||
ldr w17, [tt, w17, uxtw #2] | ||
ldr w18, [tt, w18, uxtw #2] | ||
|
||
ubfx w15, \in2, #16, #8 | ||
ubfx w16, \in3, #24, #8 | ||
ldr w15, [tt, w15, uxtw #2] | ||
ldr w16, [tt, w16, uxtw #2] | ||
|
||
.if \enc | ||
ubfx \t0, \in3, #16, #8 | ||
ubfx \t1, \in0, #24, #8 | ||
.else | ||
ubfx \t0, \in1, #16, #8 | ||
ubfx \t1, \in2, #24, #8 | ||
.endif | ||
ldr \t0, [tt, \t0, uxtw #2] | ||
ldr \t1, [tt, \t1, uxtw #2] | ||
|
||
eor \out0, \out0, w13 | ||
eor \out1, \out1, w17 | ||
eor \out0, \out0, w14, ror #24 | ||
eor \out1, \out1, w18, ror #24 | ||
eor \out0, \out0, w15, ror #16 | ||
eor \out1, \out1, \t0, ror #16 | ||
eor \out0, \out0, w16, ror #8 | ||
eor \out1, \out1, \t1, ror #8 | ||
.endm | ||
|
||
.macro fround, out0, out1, out2, out3, in0, in1, in2, in3 | ||
__hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1 | ||
__hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1 | ||
.endm | ||
|
||
.macro iround, out0, out1, out2, out3, in0, in1, in2, in3 | ||
__hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0 | ||
__hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0 | ||
.endm | ||
|
||
.macro do_crypt, round, ttab, ltab | ||
ldp w5, w6, [in] | ||
ldp w7, w8, [in, #8] | ||
ldp w9, w10, [rk], #16 | ||
ldp w11, w12, [rk, #-8] | ||
|
||
CPU_BE( rev w5, w5 ) | ||
CPU_BE( rev w6, w6 ) | ||
CPU_BE( rev w7, w7 ) | ||
CPU_BE( rev w8, w8 ) | ||
|
||
eor w5, w5, w9 | ||
eor w6, w6, w10 | ||
eor w7, w7, w11 | ||
eor w8, w8, w12 | ||
|
||
ldr tt, =\ttab | ||
ldr lt, =\ltab | ||
|
||
tbnz rounds, #1, 1f | ||
|
||
0: \round w9, w10, w11, w12, w5, w6, w7, w8 | ||
\round w5, w6, w7, w8, w9, w10, w11, w12 | ||
|
||
1: subs rounds, rounds, #4 | ||
\round w9, w10, w11, w12, w5, w6, w7, w8 | ||
csel tt, tt, lt, hi | ||
\round w5, w6, w7, w8, w9, w10, w11, w12 | ||
b.hi 0b | ||
|
||
CPU_BE( rev w5, w5 ) | ||
CPU_BE( rev w6, w6 ) | ||
CPU_BE( rev w7, w7 ) | ||
CPU_BE( rev w8, w8 ) | ||
|
||
stp w5, w6, [out] | ||
stp w7, w8, [out, #8] | ||
ret | ||
|
||
.align 4 | ||
.ltorg | ||
.endm | ||
|
||
.align 5 | ||
ENTRY(__aes_arm64_encrypt) | ||
do_crypt fround, crypto_ft_tab, crypto_fl_tab | ||
ENDPROC(__aes_arm64_encrypt) | ||
|
||
.align 5 | ||
ENTRY(__aes_arm64_decrypt) | ||
do_crypt iround, crypto_it_tab, crypto_il_tab | ||
ENDPROC(__aes_arm64_decrypt) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
/* | ||
* Scalar AES core transform | ||
* | ||
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org> | ||
* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License version 2 as | ||
* published by the Free Software Foundation. | ||
*/ | ||
|
||
#include <crypto/aes.h> | ||
#include <linux/crypto.h> | ||
#include <linux/module.h> | ||
|
||
asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds); | ||
EXPORT_SYMBOL(__aes_arm64_encrypt); | ||
|
||
asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds); | ||
EXPORT_SYMBOL(__aes_arm64_decrypt); | ||
|
||
static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) | ||
{ | ||
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
int rounds = 6 + ctx->key_length / 4; | ||
|
||
__aes_arm64_encrypt(ctx->key_enc, out, in, rounds); | ||
} | ||
|
||
static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) | ||
{ | ||
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
int rounds = 6 + ctx->key_length / 4; | ||
|
||
__aes_arm64_decrypt(ctx->key_dec, out, in, rounds); | ||
} | ||
|
||
static struct crypto_alg aes_alg = { | ||
.cra_name = "aes", | ||
.cra_driver_name = "aes-arm64", | ||
.cra_priority = 200, | ||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
.cra_blocksize = AES_BLOCK_SIZE, | ||
.cra_ctxsize = sizeof(struct crypto_aes_ctx), | ||
.cra_module = THIS_MODULE, | ||
|
||
.cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE, | ||
.cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE, | ||
.cra_cipher.cia_setkey = crypto_aes_set_key, | ||
.cra_cipher.cia_encrypt = aes_encrypt, | ||
.cra_cipher.cia_decrypt = aes_decrypt | ||
}; | ||
|
||
static int __init aes_init(void) | ||
{ | ||
return crypto_register_alg(&aes_alg); | ||
} | ||
|
||
static void __exit aes_fini(void) | ||
{ | ||
crypto_unregister_alg(&aes_alg); | ||
} | ||
|
||
module_init(aes_init); | ||
module_exit(aes_fini); | ||
|
||
MODULE_DESCRIPTION("Scalar AES cipher for arm64"); | ||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | ||
MODULE_LICENSE("GPL v2"); | ||
MODULE_ALIAS_CRYPTO("aes"); |