Skip to content

Commit

Permalink
crypto: x86/twofish-avx - use optimized XTS code
Browse files Browse the repository at this point in the history
Change twofish-avx to use the new XTS code, for smaller stack usage and small
boost to performance.

tcrypt results, with Intel i5-2450M:
        enc     dec
16B     1.03x   1.02x
64B     0.91x   0.91x
256B    1.10x   1.09x
1024B   1.12x   1.11x
8192B   1.12x   1.11x

Since XTS is practically always used with data blocks of size 512 bytes or
more, I chose to not make use of twofish-3way for block sized smaller than
128 bytes. This causes slower result in tcrypt for 64 bytes.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
  • Loading branch information
Jussi Kivilinna authored and Herbert Xu committed Apr 25, 2013
1 parent a05248e commit 18be452
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 41 deletions.
48 changes: 47 additions & 1 deletion arch/x86/crypto/twofish-avx-x86_64-asm_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
* Copyright (C) 2012 Johannes Goetzfried
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
* Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -33,6 +33,8 @@

.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
.Lxts_gf128mul_and_shl1_mask:
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0

.text

Expand Down Expand Up @@ -408,3 +410,47 @@ ENTRY(twofish_ctr_8way)

ret;
ENDPROC(twofish_ctr_8way)

ENTRY(twofish_xts_enc_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
*/

movq %rsi, %r11;

/* regs <= src, dst <= IVs, regs <= regs xor IVs */
load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
RX0, RX1, RY0, .Lxts_gf128mul_and_shl1_mask);

call __twofish_enc_blk8;

/* dst <= regs xor IVs(in dst) */
store_xts_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);

ret;
ENDPROC(twofish_xts_enc_8way)

ENTRY(twofish_xts_dec_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
*/

movq %rsi, %r11;

/* regs <= src, dst <= IVs, regs <= regs xor IVs */
load_xts_8way(%rcx, %rdx, %rsi, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2,
RX0, RX1, RY0, .Lxts_gf128mul_and_shl1_mask);

call __twofish_dec_blk8;

/* dst <= regs xor IVs(in dst) */
store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);

ret;
ENDPROC(twofish_xts_dec_8way)
91 changes: 51 additions & 40 deletions arch/x86/crypto/twofish_avx_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
* Copyright (C) 2012 Johannes Goetzfried
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
* Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
Expand Down Expand Up @@ -56,12 +58,29 @@ asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst,
asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);

asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);

static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src)
{
__twofish_enc_blk_3way(ctx, dst, src, false);
}

static void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(twofish_enc_blk));
}

static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(twofish_dec_blk));
}


static const struct common_glue_ctx twofish_enc = {
.num_funcs = 3,
Expand Down Expand Up @@ -95,6 +114,19 @@ static const struct common_glue_ctx twofish_ctr = {
} }
};

static const struct common_glue_ctx twofish_enc_xts = {
.num_funcs = 2,
.fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,

.funcs = { {
.num_blocks = TWOFISH_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc) }
} }
};

static const struct common_glue_ctx twofish_dec = {
.num_funcs = 3,
.fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
Expand Down Expand Up @@ -127,6 +159,19 @@ static const struct common_glue_ctx twofish_dec_cbc = {
} }
};

static const struct common_glue_ctx twofish_dec_xts = {
.num_funcs = 2,
.fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,

.funcs = { {
.num_blocks = TWOFISH_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec) }
} }
};

static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
Expand Down Expand Up @@ -275,54 +320,20 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[TWOFISH_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->crypt_ctx,
.fpu_enabled = false,
};
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),

.tweak_ctx = &ctx->tweak_ctx,
.tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
.crypt_ctx = &crypt_ctx,
.crypt_fn = encrypt_callback,
};
int ret;

desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = xts_crypt(desc, dst, src, nbytes, &req);
twofish_fpu_end(crypt_ctx.fpu_enabled);

return ret;
return glue_xts_crypt_128bit(&twofish_enc_xts, desc, dst, src, nbytes,
XTS_TWEAK_CAST(twofish_enc_blk),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}

static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[TWOFISH_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->crypt_ctx,
.fpu_enabled = false,
};
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),

.tweak_ctx = &ctx->tweak_ctx,
.tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
.crypt_ctx = &crypt_ctx,
.crypt_fn = decrypt_callback,
};
int ret;

desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = xts_crypt(desc, dst, src, nbytes, &req);
twofish_fpu_end(crypt_ctx.fpu_enabled);

return ret;
return glue_xts_crypt_128bit(&twofish_dec_xts, desc, dst, src, nbytes,
XTS_TWEAK_CAST(twofish_enc_blk),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}

static struct crypto_alg twofish_algs[10] = { {
Expand Down

0 comments on commit 18be452

Please sign in to comment.