Skip to content

Commit

Permalink
crypto: arm64/ghash-ce - replace NEON yield check with block limit
Browse files Browse the repository at this point in the history
Checking the TIF_NEED_RESCHED flag is disproportionately costly on cores
with fast crypto instructions and comparatively slow memory accesses.

On algorithms such as GHASH, which executes at ~1 cycle per byte on
cores that implement support for 64 bit polynomial multiplication,
there is really no need to check the TIF_NEED_RESCHED particularly
often, and so we can remove the NEON yield check from the assembler
routines.

However, unlike the AEAD or skcipher APIs, the shash/ahash APIs take
arbitrary input lengths, and so there needs to be some sanity check
to ensure that we don't hog the CPU for excessive amounts of time.

So let's simply cap the maximum input size that is processed in one go
to 64 KB.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
  • Loading branch information
Ard Biesheuvel authored and Herbert Xu committed Aug 7, 2018
1 parent 8418cf5 commit 8e492ef
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 32 deletions.
39 changes: 11 additions & 28 deletions arch/arm64/crypto/ghash-ce-core.S
Original file line number Diff line number Diff line change
Expand Up @@ -213,31 +213,23 @@
.endm

.macro __pmull_ghash, pn
frame_push 5

mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4

0: ld1 {SHASH.2d}, [x22]
ld1 {XL.2d}, [x20]
ld1 {SHASH.2d}, [x3]
ld1 {XL.2d}, [x1]
ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
eor SHASH2.16b, SHASH2.16b, SHASH.16b

__pmull_pre_\pn

/* do the head block first, if supplied */
cbz x23, 1f
ld1 {T1.2d}, [x23]
mov x23, xzr
b 2f
cbz x4, 0f
ld1 {T1.2d}, [x4]
mov x4, xzr
b 1f

1: ld1 {T1.2d}, [x21], #16
sub w19, w19, #1
0: ld1 {T1.2d}, [x2], #16
sub w0, w0, #1

2: /* multiply XL by SHASH in GF(2^128) */
1: /* multiply XL by SHASH in GF(2^128) */
CPU_LE( rev64 T1.16b, T1.16b )

ext T2.16b, XL.16b, XL.16b, #8
Expand All @@ -259,18 +251,9 @@ CPU_LE( rev64 T1.16b, T1.16b )
eor T2.16b, T2.16b, XH.16b
eor XL.16b, XL.16b, T2.16b

cbz w19, 3f

if_will_cond_yield_neon
st1 {XL.2d}, [x20]
do_cond_yield_neon
b 0b
endif_yield_neon

b 1b
cbnz w0, 0b

3: st1 {XL.2d}, [x20]
frame_pop
st1 {XL.2d}, [x1]
ret
.endm

Expand Down
16 changes: 12 additions & 4 deletions arch/arm64/crypto/ghash-ce-glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ static void ghash_do_update(int blocks, u64 dg[], const char *src,
}
}

/* avoid hogging the CPU for too long */
#define MAX_BLOCKS (SZ_64K / GHASH_BLOCK_SIZE)

static int ghash_update(struct shash_desc *desc, const u8 *src,
unsigned int len)
{
Expand All @@ -136,11 +139,16 @@ static int ghash_update(struct shash_desc *desc, const u8 *src,
blocks = len / GHASH_BLOCK_SIZE;
len %= GHASH_BLOCK_SIZE;

ghash_do_update(blocks, ctx->digest, src, key,
partial ? ctx->buf : NULL);
do {
int chunk = min(blocks, MAX_BLOCKS);

ghash_do_update(chunk, ctx->digest, src, key,
partial ? ctx->buf : NULL);

src += blocks * GHASH_BLOCK_SIZE;
partial = 0;
blocks -= chunk;
src += chunk * GHASH_BLOCK_SIZE;
partial = 0;
} while (unlikely(blocks > 0));
}
if (len)
memcpy(ctx->buf + partial, src, len);
Expand Down

0 comments on commit 8e492ef

Please sign in to comment.