From b095b2fe2ffba9738ba2c0dd5cc5f06e029c290b Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Tue, 20 Dec 2011 12:20:16 +0200 Subject: [PATCH] --- yaml --- r: 292239 b: refs/heads/master c: a522ee85ba979e7897a75b1c97db1b0304b68b5c h: refs/heads/master i: 292237: e93daff1dfdd18415744bdff9c7c698707d71b33 292235: a19e489e2efbe60387aa5519dc664fb629e83177 292231: a4a6af98c0f6b19f7f0a596ae05e59f21894dc35 292223: 8e3795929ea525b2c504cc8d75c1573a7511b16c v: v3 --- [refs] | 2 +- trunk/arch/x86/crypto/twofish_glue_3way.c | 47 +++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/[refs] b/[refs] index fd007e2b0b47..85396e8d5654 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: f1df57d02a0f83e764b4dc9187f58665d70f190e +refs/heads/master: a522ee85ba979e7897a75b1c97db1b0304b68b5c diff --git a/trunk/arch/x86/crypto/twofish_glue_3way.c b/trunk/arch/x86/crypto/twofish_glue_3way.c index 7fee8c152f93..0afd134d8c9c 100644 --- a/trunk/arch/x86/crypto/twofish_glue_3way.c +++ b/trunk/arch/x86/crypto/twofish_glue_3way.c @@ -25,6 +25,7 @@ * */ +#include #include #include #include @@ -637,10 +638,56 @@ static struct crypto_alg blk_xts_alg = { }, }; +static bool is_blacklisted_cpu(void) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return false; + + if (boot_cpu_data.x86 == 0x06 && + (boot_cpu_data.x86_model == 0x1c || + boot_cpu_data.x86_model == 0x26 || + boot_cpu_data.x86_model == 0x36)) { + /* + * On Atom, twofish-3way is slower than original assembler + * implementation. Twofish-3way trades off some performance in + * storing blocks in 64bit registers to allow three blocks to + * be processed parallel. Parallel operation then allows gaining + * more performance than was trade off, on out-of-order CPUs. + * However Atom does not benefit from this parallellism and + * should be blacklisted. + */ + return true; + } + + if (boot_cpu_data.x86 == 0x0f) { + /* + * On Pentium 4, twofish-3way is slower than original assembler + * implementation because excessive uses of 64bit rotate and + * left-shifts (which are really slow on P4) needed to store and + * handle 128bit block in two 64bit registers. + */ + return true; + } + + return false; +} + +static int force; +module_param(force, int, 0); +MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); + int __init init(void) { int err; + if (!force && is_blacklisted_cpu()) { + printk(KERN_INFO + "twofish-x86_64-3way: performance on this CPU " + "would be suboptimal: disabling " + "twofish-x86_64-3way.\n"); + return -ENODEV; + } + err = crypto_register_alg(&blk_ecb_alg); if (err) goto ecb_err;