From 1a445e8efaa4334457c4d1f48a5d1d829b503f0c Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Fri, 13 May 2016 14:02:00 +0200 Subject: [PATCH 001/180] crypto: sha-ssse3 - add MODULE_ALIAS Add the MODULE_ALIAS for the cra_driver_name of the different ciphers to allow an automated loading if a driver name is used. Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- arch/x86/crypto/sha1_ssse3_glue.c | 6 ++++++ arch/x86/crypto/sha256_ssse3_glue.c | 10 ++++++++++ arch/x86/crypto/sha512_ssse3_glue.c | 6 ++++++ 3 files changed, 22 insertions(+) diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 1024e378a358f..fc61739150e7c 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -374,3 +374,9 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, Supplemental SSE3 accelerated"); MODULE_ALIAS_CRYPTO("sha1"); +MODULE_ALIAS_CRYPTO("sha1-ssse3"); +MODULE_ALIAS_CRYPTO("sha1-avx"); +MODULE_ALIAS_CRYPTO("sha1-avx2"); +#ifdef CONFIG_AS_SHA1_NI +MODULE_ALIAS_CRYPTO("sha1-ni"); +#endif diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index 3ae0f43ebd376..9e79baf03a4b2 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -427,4 +427,14 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated"); MODULE_ALIAS_CRYPTO("sha256"); +MODULE_ALIAS_CRYPTO("sha256-ssse3"); +MODULE_ALIAS_CRYPTO("sha256-avx"); +MODULE_ALIAS_CRYPTO("sha256-avx2"); MODULE_ALIAS_CRYPTO("sha224"); +MODULE_ALIAS_CRYPTO("sha224-ssse3"); +MODULE_ALIAS_CRYPTO("sha224-avx"); +MODULE_ALIAS_CRYPTO("sha224-avx2"); +#ifdef CONFIG_AS_SHA256_NI +MODULE_ALIAS_CRYPTO("sha256-ni"); +MODULE_ALIAS_CRYPTO("sha224-ni"); +#endif diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index 0b17c83d027dd..2b0e2a6825f33 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -346,4 +346,10 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated"); MODULE_ALIAS_CRYPTO("sha512"); +MODULE_ALIAS_CRYPTO("sha512-ssse3"); +MODULE_ALIAS_CRYPTO("sha512-avx"); +MODULE_ALIAS_CRYPTO("sha512-avx2"); MODULE_ALIAS_CRYPTO("sha384"); +MODULE_ALIAS_CRYPTO("sha384-ssse3"); +MODULE_ALIAS_CRYPTO("sha384-avx"); +MODULE_ALIAS_CRYPTO("sha384-avx2"); From eed1e1afd8d542d9644534c1b712599b5d680007 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Mon, 16 May 2016 02:53:36 +0200 Subject: [PATCH 002/180] crypto: user - no parsing of CRYPTO_MSG_GETALG The CRYPTO_MSG_GETALG netlink message type provides a buffer to the kernel to retrieve information from the kernel. The data buffer will not provide any input and will not be read. Hence the nlmsg_parse is not applicable to this netlink message type. This patch fixes the following kernel log message when using this netlink interface: netlink: 208 bytes leftover after parsing attributes in process `XXX'. Patch successfully tested with libkcapi from [1] which uses CRYPTO_MSG_GETALG to obtain cipher-specific information from the kernel. [1] http://www.chronox.de/libkcapi.html Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/crypto_user.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 43fe85f20d577..f71960dea8822 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -516,10 +516,12 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return err; } - err = nlmsg_parse(nlh, crypto_msg_min[type], attrs, CRYPTOCFGA_MAX, - crypto_policy); - if (err < 0) - return err; + if (type != (CRYPTO_MSG_GETALG - CRYPTO_MSG_BASE)) { + err = nlmsg_parse(nlh, crypto_msg_min[type], attrs, + CRYPTOCFGA_MAX, crypto_policy); + if (err < 0) + return err; + } if (link->doit == NULL) return -EINVAL; From 21a3d3b234cd170deded6beb42409f2a9cd01917 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Tue, 17 May 2016 10:53:51 -0700 Subject: [PATCH 003/180] crypto: qat - fix typos sizeof for ctx The sizeof(*ctx->dec_cd) and sizeof(*ctx->enc_cd) are equal, but we should use the correct one for freeing memory anyway. Signed-off-by: Tadeusz Struk Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_algs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 1e8852a8a0574..769148dbaeb34 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -947,13 +947,13 @@ static int qat_alg_ablkcipher_setkey(struct crypto_ablkcipher *tfm, return 0; out_free_all: - memset(ctx->dec_cd, 0, sizeof(*ctx->enc_cd)); - dma_free_coherent(dev, sizeof(*ctx->enc_cd), + memset(ctx->dec_cd, 0, sizeof(*ctx->dec_cd)); + dma_free_coherent(dev, sizeof(*ctx->dec_cd), ctx->dec_cd, ctx->dec_cd_paddr); ctx->dec_cd = NULL; out_free_enc: - memset(ctx->enc_cd, 0, sizeof(*ctx->dec_cd)); - dma_free_coherent(dev, sizeof(*ctx->dec_cd), + memset(ctx->enc_cd, 0, sizeof(*ctx->enc_cd)); + dma_free_coherent(dev, sizeof(*ctx->enc_cd), ctx->enc_cd, ctx->enc_cd_paddr); ctx->enc_cd = NULL; return -ENOMEM; From 7a1aedba7061948eee4d188740e24ffb20ff7ab7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Thu, 19 May 2016 18:10:43 +0300 Subject: [PATCH 004/180] asm-generic/io.h: allow barriers in io{read,write}{16,32}be MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While reviewing the addition of io{read,write}64be accessors, Arnd -finds a potential problem: "If an architecture overrides readq/writeq to have barriers but does not override ioread64be/iowrite64be, this will lack the barriers and behave differently from the little-endian version. I think the only affected architecture is ARC, since ARM and ARM64 both override the big-endian accessors to have the correct barriers, and all others don't use barriers at all." -suggests a fix for the same problem in existing code (16/32-bit accessors); the fix leads "to a double-swap on architectures that don't override the io{read,write}{16,32}be accessors, but it will work correctly on all architectures without them having to override these accessors." Suggested-by: Arnd Bergmann Acked-by: Arnd Bergmann Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- include/asm-generic/io.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 002b81f6f2bc2..325c8908f6de3 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -613,7 +613,7 @@ static inline void iowrite32(u32 value, volatile void __iomem *addr) #define ioread16be ioread16be static inline u16 ioread16be(const volatile void __iomem *addr) { - return __be16_to_cpu(__raw_readw(addr)); + return swab16(readw(addr)); } #endif @@ -621,7 +621,7 @@ static inline u16 ioread16be(const volatile void __iomem *addr) #define ioread32be ioread32be static inline u32 ioread32be(const volatile void __iomem *addr) { - return __be32_to_cpu(__raw_readl(addr)); + return swab32(readl(addr)); } #endif @@ -629,7 +629,7 @@ static inline u32 ioread32be(const volatile void __iomem *addr) #define iowrite16be iowrite16be static inline void iowrite16be(u16 value, void volatile __iomem *addr) { - __raw_writew(__cpu_to_be16(value), addr); + writew(swab16(value), addr); } #endif @@ -637,7 +637,7 @@ static inline void iowrite16be(u16 value, void volatile __iomem *addr) #define iowrite32be iowrite32be static inline void iowrite32be(u32 value, volatile void __iomem *addr) { - __raw_writel(__cpu_to_be32(value), addr); + writel(swab32(value), addr); } #endif From 9e44fb1816dba8f283809a25c29ff7969a3acb99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Thu, 19 May 2016 18:10:56 +0300 Subject: [PATCH 005/180] asm-generic/io.h: add io{read,write}64 accessors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will allow device drivers to consistently use io{read,write}XX also for 64-bit accesses. Acked-by: Arnd Bergmann Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- include/asm-generic/io.h | 63 +++++++++++++++++++++++++++++++++++++ include/asm-generic/iomap.h | 8 +++++ 2 files changed, 71 insertions(+) diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 325c8908f6de3..7ef015eb3403f 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -585,6 +585,16 @@ static inline u32 ioread32(const volatile void __iomem *addr) } #endif +#ifdef CONFIG_64BIT +#ifndef ioread64 +#define ioread64 ioread64 +static inline u64 ioread64(const volatile void __iomem *addr) +{ + return readq(addr); +} +#endif +#endif /* CONFIG_64BIT */ + #ifndef iowrite8 #define iowrite8 iowrite8 static inline void iowrite8(u8 value, volatile void __iomem *addr) @@ -609,6 +619,16 @@ static inline void iowrite32(u32 value, volatile void __iomem *addr) } #endif +#ifdef CONFIG_64BIT +#ifndef iowrite64 +#define iowrite64 iowrite64 +static inline void iowrite64(u64 value, volatile void __iomem *addr) +{ + writeq(value, addr); +} +#endif +#endif /* CONFIG_64BIT */ + #ifndef ioread16be #define ioread16be ioread16be static inline u16 ioread16be(const volatile void __iomem *addr) @@ -625,6 +645,16 @@ static inline u32 ioread32be(const volatile void __iomem *addr) } #endif +#ifdef CONFIG_64BIT +#ifndef ioread64be +#define ioread64be ioread64be +static inline u64 ioread64be(const volatile void __iomem *addr) +{ + return swab64(readq(addr)); +} +#endif +#endif /* CONFIG_64BIT */ + #ifndef iowrite16be #define iowrite16be iowrite16be static inline void iowrite16be(u16 value, void volatile __iomem *addr) @@ -641,6 +671,16 @@ static inline void iowrite32be(u32 value, volatile void __iomem *addr) } #endif +#ifdef CONFIG_64BIT +#ifndef iowrite64be +#define iowrite64be iowrite64be +static inline void iowrite64be(u64 value, volatile void __iomem *addr) +{ + writeq(swab64(value), addr); +} +#endif +#endif /* CONFIG_64BIT */ + #ifndef ioread8_rep #define ioread8_rep ioread8_rep static inline void ioread8_rep(const volatile void __iomem *addr, void *buffer, @@ -668,6 +708,17 @@ static inline void ioread32_rep(const volatile void __iomem *addr, } #endif +#ifdef CONFIG_64BIT +#ifndef ioread64_rep +#define ioread64_rep ioread64_rep +static inline void ioread64_rep(const volatile void __iomem *addr, + void *buffer, unsigned int count) +{ + readsq(addr, buffer, count); +} +#endif +#endif /* CONFIG_64BIT */ + #ifndef iowrite8_rep #define iowrite8_rep iowrite8_rep static inline void iowrite8_rep(volatile void __iomem *addr, @@ -697,6 +748,18 @@ static inline void iowrite32_rep(volatile void __iomem *addr, writesl(addr, buffer, count); } #endif + +#ifdef CONFIG_64BIT +#ifndef iowrite64_rep +#define iowrite64_rep iowrite64_rep +static inline void iowrite64_rep(volatile void __iomem *addr, + const void *buffer, + unsigned int count) +{ + writesq(addr, buffer, count); +} +#endif +#endif /* CONFIG_64BIT */ #endif /* CONFIG_GENERIC_IOMAP */ #ifdef __KERNEL__ diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h index d8f8622fa044d..650fede33c253 100644 --- a/include/asm-generic/iomap.h +++ b/include/asm-generic/iomap.h @@ -30,12 +30,20 @@ extern unsigned int ioread16(void __iomem *); extern unsigned int ioread16be(void __iomem *); extern unsigned int ioread32(void __iomem *); extern unsigned int ioread32be(void __iomem *); +#ifdef CONFIG_64BIT +extern u64 ioread64(void __iomem *); +extern u64 ioread64be(void __iomem *); +#endif extern void iowrite8(u8, void __iomem *); extern void iowrite16(u16, void __iomem *); extern void iowrite16be(u16, void __iomem *); extern void iowrite32(u32, void __iomem *); extern void iowrite32be(u32, void __iomem *); +#ifdef CONFIG_64BIT +extern void iowrite64(u64, void __iomem *); +extern void iowrite64be(u64, void __iomem *); +#endif /* * "string" versions of the above. Note that they From 2a41bfbc0393e6f2db419b80a7000708d90c2eeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Thu, 19 May 2016 18:11:04 +0300 Subject: [PATCH 006/180] arm64: add io{read,write}64be accessors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will allow device drivers to consistently use io{read,write}XXbe also for 64-bit accesses. Acked-by: Catalin Marinas Signed-off-by: Alex Porosanu Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- arch/arm64/include/asm/io.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 44be1e03ed656..9b6e408cfa51f 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -174,13 +174,15 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size); #define iounmap __iounmap /* - * io{read,write}{16,32}be() macros + * io{read,write}{16,32,64}be() macros */ #define ioread16be(p) ({ __u16 __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(); __v; }) #define ioread32be(p) ({ __u32 __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(); __v; }) +#define ioread64be(p) ({ __u64 __v = be64_to_cpu((__force __be64)__raw_readq(p)); __iormb(); __v; }) #define iowrite16be(v,p) ({ __iowmb(); __raw_writew((__force __u16)cpu_to_be16(v), p); }) #define iowrite32be(v,p) ({ __iowmb(); __raw_writel((__force __u32)cpu_to_be32(v), p); }) +#define iowrite64be(v,p) ({ __iowmb(); __raw_writeq((__force __u64)cpu_to_be64(v), p); }) /* * Convert a physical pointer to a virtual kernel pointer for /dev/mem From d54fc90cc9723d73eef25d09efcef412a6c89d60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Thu, 19 May 2016 18:11:10 +0300 Subject: [PATCH 007/180] powerpc: add io{read,write}64 accessors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will allow device drivers to consistently use io{read,write}XX also for 64-bit accesses. Acked-by: Michael Ellerman Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- arch/powerpc/kernel/iomap.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index 12e48d56f771e..3963f0b68d523 100644 --- a/arch/powerpc/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c @@ -38,6 +38,18 @@ EXPORT_SYMBOL(ioread16); EXPORT_SYMBOL(ioread16be); EXPORT_SYMBOL(ioread32); EXPORT_SYMBOL(ioread32be); +#ifdef __powerpc64__ +u64 ioread64(void __iomem *addr) +{ + return readq(addr); +} +u64 ioread64be(void __iomem *addr) +{ + return readq_be(addr); +} +EXPORT_SYMBOL(ioread64); +EXPORT_SYMBOL(ioread64be); +#endif /* __powerpc64__ */ void iowrite8(u8 val, void __iomem *addr) { @@ -64,6 +76,18 @@ EXPORT_SYMBOL(iowrite16); EXPORT_SYMBOL(iowrite16be); EXPORT_SYMBOL(iowrite32); EXPORT_SYMBOL(iowrite32be); +#ifdef __powerpc64__ +void iowrite64(u64 val, void __iomem *addr) +{ + writeq(val, addr); +} +void iowrite64be(u64 val, void __iomem *addr) +{ + writeq_be(val, addr); +} +EXPORT_SYMBOL(iowrite64); +EXPORT_SYMBOL(iowrite64be); +#endif /* __powerpc64__ */ /* * These are the "repeat read/write" functions. Note the From bd52f1c23255a7c355268215c3c75aabbe11a67a Mon Sep 17 00:00:00 2001 From: Cristian Stoica Date: Thu, 19 May 2016 18:11:18 +0300 Subject: [PATCH 008/180] crypto: caam - fix offset field in hw sg entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The offset field is 13 bits wide; make sure we don't overwrite more than that in the caam hardware scatter gather structure. Signed-off-by: Cristian Stoica Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/desc.h | 2 +- drivers/crypto/caam/sg_sw_sec4.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index 1e93c6af2275b..fe30ff69088ce 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h @@ -20,7 +20,7 @@ #define SEC4_SG_BPID_MASK 0x000000ff #define SEC4_SG_BPID_SHIFT 16 #define SEC4_SG_LEN_MASK 0x3fffffff /* Excludes EXT and FINAL */ -#define SEC4_SG_OFFS_MASK 0x00001fff +#define SEC4_SG_OFFSET_MASK 0x00001fff struct sec4_sg_entry { #ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h index 12ec6616e89d1..2311341b73569 100644 --- a/drivers/crypto/caam/sg_sw_sec4.h +++ b/drivers/crypto/caam/sg_sw_sec4.h @@ -11,12 +11,12 @@ struct sec4_sg_entry; * convert single dma address to h/w link table format */ static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr, - dma_addr_t dma, u32 len, u32 offset) + dma_addr_t dma, u32 len, u16 offset) { sec4_sg_ptr->ptr = dma; sec4_sg_ptr->len = len; sec4_sg_ptr->buf_pool_id = 0; - sec4_sg_ptr->offset = offset; + sec4_sg_ptr->offset = offset & SEC4_SG_OFFSET_MASK; #ifdef DEBUG print_hex_dump(KERN_ERR, "sec4_sg_ptr@: ", DUMP_PREFIX_ADDRESS, 16, 4, sec4_sg_ptr, @@ -30,7 +30,7 @@ static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr, */ static inline struct sec4_sg_entry * sg_to_sec4_sg(struct scatterlist *sg, int sg_count, - struct sec4_sg_entry *sec4_sg_ptr, u32 offset) + struct sec4_sg_entry *sec4_sg_ptr, u16 offset) { while (sg_count) { dma_to_sec4_sg_one(sec4_sg_ptr, sg_dma_address(sg), @@ -48,7 +48,7 @@ sg_to_sec4_sg(struct scatterlist *sg, int sg_count, */ static inline void sg_to_sec4_sg_last(struct scatterlist *sg, int sg_count, struct sec4_sg_entry *sec4_sg_ptr, - u32 offset) + u16 offset) { sec4_sg_ptr = sg_to_sec4_sg(sg, sg_count, sec4_sg_ptr, offset); sec4_sg_ptr->len |= SEC4_SG_LEN_FIN; From 261ea058f016bc04fa064348ad9bf39d94379381 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Thu, 19 May 2016 18:11:26 +0300 Subject: [PATCH 009/180] crypto: caam - handle core endianness != caam endianness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are SoCs like LS1043A where CAAM endianness (BE) does not match the default endianness of the core (LE). Moreover, there are requirements for the driver to handle cases like CPU_BIG_ENDIAN=y on ARM-based SoCs. This requires for a complete rewrite of the I/O accessors. PPC-specific accessors - {in,out}_{le,be}XX - are replaced with generic ones - io{read,write}[be]XX. Endianness is detected dynamically (at runtime) to allow for multiplatform kernels, for e.g. running the same kernel image on LS1043A (BE CAAM) and LS2080A (LE CAAM) armv8-based SoCs. While here: debugfs entries need to take into consideration the endianness of the core when displaying data. Add the necessary glue code so the entries remain the same, but they are properly read, regardless of the core and/or SEC endianness. Note: pdb.h fixes only what is currently being used (IPsec). Reviewed-by: Tudor Ambarus Signed-off-by: Horia Geantă Signed-off-by: Alex Porosanu Signed-off-by: Herbert Xu --- drivers/crypto/caam/Kconfig | 4 - drivers/crypto/caam/caamhash.c | 5 +- drivers/crypto/caam/ctrl.c | 125 ++++++++++++++++--------- drivers/crypto/caam/desc.h | 7 +- drivers/crypto/caam/desc_constr.h | 44 ++++++--- drivers/crypto/caam/jr.c | 22 ++--- drivers/crypto/caam/pdb.h | 137 +++++++++++++++++++++------ drivers/crypto/caam/regs.h | 151 ++++++++++++++++++++---------- drivers/crypto/caam/sg_sw_sec4.h | 11 ++- 9 files changed, 340 insertions(+), 166 deletions(-) diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig index 5652a53415dc2..d2c2909a40203 100644 --- a/drivers/crypto/caam/Kconfig +++ b/drivers/crypto/caam/Kconfig @@ -116,10 +116,6 @@ config CRYPTO_DEV_FSL_CAAM_IMX def_bool SOC_IMX6 || SOC_IMX7D depends on CRYPTO_DEV_FSL_CAAM -config CRYPTO_DEV_FSL_CAAM_LE - def_bool CRYPTO_DEV_FSL_CAAM_IMX || SOC_LS1021A - depends on CRYPTO_DEV_FSL_CAAM - config CRYPTO_DEV_FSL_CAAM_DEBUG bool "Enable debug output in CAAM driver" depends on CRYPTO_DEV_FSL_CAAM diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 5845d4a087972..f1ecc8df8d41e 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -847,7 +847,7 @@ static int ahash_update_ctx(struct ahash_request *req) *next_buflen, 0); } else { (edesc->sec4_sg + sec4_sg_src_index - 1)->len |= - SEC4_SG_LEN_FIN; + cpu_to_caam32(SEC4_SG_LEN_FIN); } state->current_buf = !state->current_buf; @@ -949,7 +949,8 @@ static int ahash_final_ctx(struct ahash_request *req) state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1, buf, state->buf_dma, buflen, last_buflen); - (edesc->sec4_sg + sec4_sg_src_index - 1)->len |= SEC4_SG_LEN_FIN; + (edesc->sec4_sg + sec4_sg_src_index - 1)->len |= + cpu_to_caam32(SEC4_SG_LEN_FIN); edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, sec4_sg_bytes, DMA_TO_DEVICE); diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 5ad5f3009ae01..0ec112ee52043 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -15,6 +15,9 @@ #include "desc_constr.h" #include "error.h" +bool caam_little_end; +EXPORT_SYMBOL(caam_little_end); + /* * i.MX targets tend to have clock control subsystems that can * enable/disable clocking to our device. @@ -106,7 +109,7 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, if (ctrlpriv->virt_en == 1) { - setbits32(&ctrl->deco_rsr, DECORSR_JR0); + clrsetbits_32(&ctrl->deco_rsr, 0, DECORSR_JR0); while (!(rd_reg32(&ctrl->deco_rsr) & DECORSR_VALID) && --timeout) @@ -115,7 +118,7 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, timeout = 100000; } - setbits32(&ctrl->deco_rq, DECORR_RQD0ENABLE); + clrsetbits_32(&ctrl->deco_rq, 0, DECORR_RQD0ENABLE); while (!(rd_reg32(&ctrl->deco_rq) & DECORR_DEN0) && --timeout) @@ -123,12 +126,12 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, if (!timeout) { dev_err(ctrldev, "failed to acquire DECO 0\n"); - clrbits32(&ctrl->deco_rq, DECORR_RQD0ENABLE); + clrsetbits_32(&ctrl->deco_rq, DECORR_RQD0ENABLE, 0); return -ENODEV; } for (i = 0; i < desc_len(desc); i++) - wr_reg32(&deco->descbuf[i], *(desc + i)); + wr_reg32(&deco->descbuf[i], caam32_to_cpu(*(desc + i))); flags = DECO_JQCR_WHL; /* @@ -139,7 +142,7 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, flags |= DECO_JQCR_FOUR; /* Instruct the DECO to execute it */ - setbits32(&deco->jr_ctl_hi, flags); + clrsetbits_32(&deco->jr_ctl_hi, 0, flags); timeout = 10000000; do { @@ -158,10 +161,10 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, DECO_OP_STATUS_HI_ERR_MASK; if (ctrlpriv->virt_en == 1) - clrbits32(&ctrl->deco_rsr, DECORSR_JR0); + clrsetbits_32(&ctrl->deco_rsr, DECORSR_JR0, 0); /* Mark the DECO as free */ - clrbits32(&ctrl->deco_rq, DECORR_RQD0ENABLE); + clrsetbits_32(&ctrl->deco_rq, DECORR_RQD0ENABLE, 0); if (!timeout) return -EAGAIN; @@ -349,7 +352,7 @@ static void kick_trng(struct platform_device *pdev, int ent_delay) r4tst = &ctrl->r4tst[0]; /* put RNG4 into program mode */ - setbits32(&r4tst->rtmctl, RTMCTL_PRGM); + clrsetbits_32(&r4tst->rtmctl, 0, RTMCTL_PRGM); /* * Performance-wise, it does not make sense to @@ -363,7 +366,7 @@ static void kick_trng(struct platform_device *pdev, int ent_delay) >> RTSDCTL_ENT_DLY_SHIFT; if (ent_delay <= val) { /* put RNG4 into run mode */ - clrbits32(&r4tst->rtmctl, RTMCTL_PRGM); + clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM, 0); return; } @@ -381,9 +384,9 @@ static void kick_trng(struct platform_device *pdev, int ent_delay) * select raw sampling in both entropy shifter * and statistical checker */ - setbits32(&val, RTMCTL_SAMP_MODE_RAW_ES_SC); + clrsetbits_32(&val, 0, RTMCTL_SAMP_MODE_RAW_ES_SC); /* put RNG4 into run mode */ - clrbits32(&val, RTMCTL_PRGM); + clrsetbits_32(&val, RTMCTL_PRGM, 0); /* write back the control register */ wr_reg32(&r4tst->rtmctl, val); } @@ -406,6 +409,23 @@ int caam_get_era(void) } EXPORT_SYMBOL(caam_get_era); +#ifdef CONFIG_DEBUG_FS +static int caam_debugfs_u64_get(void *data, u64 *val) +{ + *val = caam64_to_cpu(*(u64 *)data); + return 0; +} + +static int caam_debugfs_u32_get(void *data, u64 *val) +{ + *val = caam32_to_cpu(*(u32 *)data); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u32_ro, caam_debugfs_u32_get, NULL, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u64_ro, caam_debugfs_u64_get, NULL, "%llu\n"); +#endif + /* Probe routine for CAAM top (controller) level */ static int caam_probe(struct platform_device *pdev) { @@ -504,6 +524,10 @@ static int caam_probe(struct platform_device *pdev) ret = -ENOMEM; goto disable_caam_emi_slow; } + + caam_little_end = !(bool)(rd_reg32(&ctrl->perfmon.status) & + (CSTA_PLEND | CSTA_ALT_PLEND)); + /* Finding the page size for using the CTPR_MS register */ comp_params = rd_reg32(&ctrl->perfmon.comp_parms_ms); pg_size = (comp_params & CTPR_MS_PG_SZ_MASK) >> CTPR_MS_PG_SZ_SHIFT; @@ -559,9 +583,9 @@ static int caam_probe(struct platform_device *pdev) } if (ctrlpriv->virt_en == 1) - setbits32(&ctrl->jrstart, JRSTART_JR0_START | - JRSTART_JR1_START | JRSTART_JR2_START | - JRSTART_JR3_START); + clrsetbits_32(&ctrl->jrstart, 0, JRSTART_JR0_START | + JRSTART_JR1_START | JRSTART_JR2_START | + JRSTART_JR3_START); if (sizeof(dma_addr_t) == sizeof(u64)) if (of_device_is_compatible(nprop, "fsl,sec-v5.0")) @@ -693,7 +717,7 @@ static int caam_probe(struct platform_device *pdev) ctrlpriv->rng4_sh_init = ~ctrlpriv->rng4_sh_init & RDSTA_IFMASK; /* Enable RDB bit so that RNG works faster */ - setbits32(&ctrl->scfgr, SCFGR_RDBENABLE); + clrsetbits_32(&ctrl->scfgr, 0, SCFGR_RDBENABLE); } /* NOTE: RTIC detection ought to go here, around Si time */ @@ -719,48 +743,59 @@ static int caam_probe(struct platform_device *pdev) ctrlpriv->ctl = debugfs_create_dir("ctl", ctrlpriv->dfs_root); /* Controller-level - performance monitor counters */ + ctrlpriv->ctl_rq_dequeued = - debugfs_create_u64("rq_dequeued", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->req_dequeued); + debugfs_create_file("rq_dequeued", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->req_dequeued, + &caam_fops_u64_ro); ctrlpriv->ctl_ob_enc_req = - debugfs_create_u64("ob_rq_encrypted", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ob_enc_req); + debugfs_create_file("ob_rq_encrypted", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ob_enc_req, + &caam_fops_u64_ro); ctrlpriv->ctl_ib_dec_req = - debugfs_create_u64("ib_rq_decrypted", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ib_dec_req); + debugfs_create_file("ib_rq_decrypted", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ib_dec_req, + &caam_fops_u64_ro); ctrlpriv->ctl_ob_enc_bytes = - debugfs_create_u64("ob_bytes_encrypted", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ob_enc_bytes); + debugfs_create_file("ob_bytes_encrypted", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ob_enc_bytes, + &caam_fops_u64_ro); ctrlpriv->ctl_ob_prot_bytes = - debugfs_create_u64("ob_bytes_protected", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ob_prot_bytes); + debugfs_create_file("ob_bytes_protected", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ob_prot_bytes, + &caam_fops_u64_ro); ctrlpriv->ctl_ib_dec_bytes = - debugfs_create_u64("ib_bytes_decrypted", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ib_dec_bytes); + debugfs_create_file("ib_bytes_decrypted", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ib_dec_bytes, + &caam_fops_u64_ro); ctrlpriv->ctl_ib_valid_bytes = - debugfs_create_u64("ib_bytes_validated", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ib_valid_bytes); + debugfs_create_file("ib_bytes_validated", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ib_valid_bytes, + &caam_fops_u64_ro); /* Controller level - global status values */ ctrlpriv->ctl_faultaddr = - debugfs_create_u64("fault_addr", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->faultaddr); + debugfs_create_file("fault_addr", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->faultaddr, + &caam_fops_u32_ro); ctrlpriv->ctl_faultdetail = - debugfs_create_u32("fault_detail", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->faultdetail); + debugfs_create_file("fault_detail", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->faultdetail, + &caam_fops_u32_ro); ctrlpriv->ctl_faultstatus = - debugfs_create_u32("fault_status", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->status); + debugfs_create_file("fault_status", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->status, + &caam_fops_u32_ro); /* Internal covering keys (useful in non-secure mode only) */ ctrlpriv->ctl_kek_wrap.data = &ctrlpriv->ctrl->kek[0]; diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index fe30ff69088ce..d8d5584b600b4 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h @@ -23,16 +23,15 @@ #define SEC4_SG_OFFSET_MASK 0x00001fff struct sec4_sg_entry { -#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX +#if !defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && \ + defined(CONFIG_CRYPTO_DEV_FSL_CAAM_IMX) u32 rsvd1; dma_addr_t ptr; #else u64 ptr; #endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_IMX */ u32 len; - u8 rsvd2; - u8 buf_pool_id; - u16 offset; + u32 bpid_offset; }; /* Max size of any CAAM descriptor in 32-bit words, inclusive of header */ diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h index 98d07de24fc48..ae3aef6e9feeb 100644 --- a/drivers/crypto/caam/desc_constr.h +++ b/drivers/crypto/caam/desc_constr.h @@ -5,6 +5,7 @@ */ #include "desc.h" +#include "regs.h" #define IMMEDIATE (1 << 23) #define CAAM_CMD_SZ sizeof(u32) @@ -30,9 +31,11 @@ LDST_SRCDST_WORD_DECOCTRL | \ (LDOFF_ENABLE_AUTO_NFIFO << LDST_OFFSET_SHIFT)) +extern bool caam_little_end; + static inline int desc_len(u32 *desc) { - return *desc & HDR_DESCLEN_MASK; + return caam32_to_cpu(*desc) & HDR_DESCLEN_MASK; } static inline int desc_bytes(void *desc) @@ -52,7 +55,7 @@ static inline void *sh_desc_pdb(u32 *desc) static inline void init_desc(u32 *desc, u32 options) { - *desc = (options | HDR_ONE) + 1; + *desc = cpu_to_caam32((options | HDR_ONE) + 1); } static inline void init_sh_desc(u32 *desc, u32 options) @@ -78,9 +81,10 @@ static inline void append_ptr(u32 *desc, dma_addr_t ptr) { dma_addr_t *offset = (dma_addr_t *)desc_end(desc); - *offset = ptr; + *offset = cpu_to_caam_dma(ptr); - (*desc) += CAAM_PTR_SZ / CAAM_CMD_SZ; + (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + + CAAM_PTR_SZ / CAAM_CMD_SZ); } static inline void init_job_desc_shared(u32 *desc, dma_addr_t ptr, int len, @@ -99,16 +103,17 @@ static inline void append_data(u32 *desc, void *data, int len) if (len) /* avoid sparse warning: memcpy with byte count of 0 */ memcpy(offset, data, len); - (*desc) += (len + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ; + (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + + (len + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ); } static inline void append_cmd(u32 *desc, u32 command) { u32 *cmd = desc_end(desc); - *cmd = command; + *cmd = cpu_to_caam32(command); - (*desc)++; + (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + 1); } #define append_u32 append_cmd @@ -117,16 +122,22 @@ static inline void append_u64(u32 *desc, u64 data) { u32 *offset = desc_end(desc); - *offset = upper_32_bits(data); - *(++offset) = lower_32_bits(data); + /* Only 32-bit alignment is guaranteed in descriptor buffer */ + if (caam_little_end) { + *offset = cpu_to_caam32(lower_32_bits(data)); + *(++offset) = cpu_to_caam32(upper_32_bits(data)); + } else { + *offset = cpu_to_caam32(upper_32_bits(data)); + *(++offset) = cpu_to_caam32(lower_32_bits(data)); + } - (*desc) += 2; + (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + 2); } /* Write command without affecting header, and return pointer to next word */ static inline u32 *write_cmd(u32 *desc, u32 command) { - *desc = command; + *desc = cpu_to_caam32(command); return desc + 1; } @@ -168,14 +179,17 @@ APPEND_CMD_RET(move, MOVE) static inline void set_jump_tgt_here(u32 *desc, u32 *jump_cmd) { - *jump_cmd = *jump_cmd | (desc_len(desc) - (jump_cmd - desc)); + *jump_cmd = cpu_to_caam32(caam32_to_cpu(*jump_cmd) | + (desc_len(desc) - (jump_cmd - desc))); } static inline void set_move_tgt_here(u32 *desc, u32 *move_cmd) { - *move_cmd &= ~MOVE_OFFSET_MASK; - *move_cmd = *move_cmd | ((desc_len(desc) << (MOVE_OFFSET_SHIFT + 2)) & - MOVE_OFFSET_MASK); + u32 val = caam32_to_cpu(*move_cmd); + + val &= ~MOVE_OFFSET_MASK; + val |= (desc_len(desc) << (MOVE_OFFSET_SHIFT + 2)) & MOVE_OFFSET_MASK; + *move_cmd = cpu_to_caam32(val); } #define APPEND_CMD(cmd, op) \ diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index 5ef4be22eb809..a81f551ac222a 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -31,7 +31,7 @@ static int caam_reset_hw_jr(struct device *dev) * mask interrupts since we are going to poll * for reset completion status */ - setbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK); + clrsetbits_32(&jrp->rregs->rconfig_lo, 0, JRCFG_IMSK); /* initiate flush (required prior to reset) */ wr_reg32(&jrp->rregs->jrcommand, JRCR_RESET); @@ -57,7 +57,7 @@ static int caam_reset_hw_jr(struct device *dev) } /* unmask interrupts */ - clrbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK); + clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0); return 0; } @@ -147,7 +147,7 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev) } /* mask valid interrupts */ - setbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK); + clrsetbits_32(&jrp->rregs->rconfig_lo, 0, JRCFG_IMSK); /* Have valid interrupt at this point, just ACK and trigger */ wr_reg32(&jrp->rregs->jrintstatus, irqstate); @@ -182,7 +182,7 @@ static void caam_jr_dequeue(unsigned long devarg) sw_idx = (tail + i) & (JOBR_DEPTH - 1); if (jrp->outring[hw_idx].desc == - jrp->entinfo[sw_idx].desc_addr_dma) + caam_dma_to_cpu(jrp->entinfo[sw_idx].desc_addr_dma)) break; /* found */ } /* we should never fail to find a matching descriptor */ @@ -200,7 +200,7 @@ static void caam_jr_dequeue(unsigned long devarg) usercall = jrp->entinfo[sw_idx].callbk; userarg = jrp->entinfo[sw_idx].cbkarg; userdesc = jrp->entinfo[sw_idx].desc_addr_virt; - userstatus = jrp->outring[hw_idx].jrstatus; + userstatus = caam32_to_cpu(jrp->outring[hw_idx].jrstatus); /* * Make sure all information from the job has been obtained @@ -236,7 +236,7 @@ static void caam_jr_dequeue(unsigned long devarg) } /* reenable / unmask IRQs */ - clrbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK); + clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0); } /** @@ -330,7 +330,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc, int head, tail, desc_size; dma_addr_t desc_dma; - desc_size = (*desc & HDR_JD_LENGTH_MASK) * sizeof(u32); + desc_size = (caam32_to_cpu(*desc) & HDR_JD_LENGTH_MASK) * sizeof(u32); desc_dma = dma_map_single(dev, desc, desc_size, DMA_TO_DEVICE); if (dma_mapping_error(dev, desc_dma)) { dev_err(dev, "caam_jr_enqueue(): can't map jobdesc\n"); @@ -356,7 +356,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc, head_entry->cbkarg = areq; head_entry->desc_addr_dma = desc_dma; - jrp->inpring[jrp->inp_ring_write_index] = desc_dma; + jrp->inpring[jrp->inp_ring_write_index] = cpu_to_caam_dma(desc_dma); /* * Guarantee that the descriptor's DMA address has been written to @@ -444,9 +444,9 @@ static int caam_jr_init(struct device *dev) spin_lock_init(&jrp->outlock); /* Select interrupt coalescing parameters */ - setbits32(&jrp->rregs->rconfig_lo, JOBR_INTC | - (JOBR_INTC_COUNT_THLD << JRCFG_ICDCT_SHIFT) | - (JOBR_INTC_TIME_THLD << JRCFG_ICTT_SHIFT)); + clrsetbits_32(&jrp->rregs->rconfig_lo, 0, JOBR_INTC | + (JOBR_INTC_COUNT_THLD << JRCFG_ICDCT_SHIFT) | + (JOBR_INTC_TIME_THLD << JRCFG_ICTT_SHIFT)); return 0; diff --git a/drivers/crypto/caam/pdb.h b/drivers/crypto/caam/pdb.h index 3a87c0cf879ac..d383573d3dd45 100644 --- a/drivers/crypto/caam/pdb.h +++ b/drivers/crypto/caam/pdb.h @@ -11,8 +11,8 @@ /* * PDB- IPSec ESP Header Modification Options */ -#define PDBHMO_ESP_DECAP_SHIFT 12 -#define PDBHMO_ESP_ENCAP_SHIFT 4 +#define PDBHMO_ESP_DECAP_SHIFT 28 +#define PDBHMO_ESP_ENCAP_SHIFT 28 /* * Encap and Decap - Decrement TTL (Hop Limit) - Based on the value of the * Options Byte IP version (IPvsn) field: @@ -32,12 +32,23 @@ */ #define PDBHMO_ESP_DFBIT (0x04 << PDBHMO_ESP_ENCAP_SHIFT) +#define PDBNH_ESP_ENCAP_SHIFT 16 +#define PDBNH_ESP_ENCAP_MASK (0xff << PDBNH_ESP_ENCAP_SHIFT) + +#define PDBHDRLEN_ESP_DECAP_SHIFT 16 +#define PDBHDRLEN_MASK (0x0fff << PDBHDRLEN_ESP_DECAP_SHIFT) + +#define PDB_NH_OFFSET_SHIFT 8 +#define PDB_NH_OFFSET_MASK (0xff << PDB_NH_OFFSET_SHIFT) + /* * PDB - IPSec ESP Encap/Decap Options */ #define PDBOPTS_ESP_ARSNONE 0x00 /* no antireplay window */ #define PDBOPTS_ESP_ARS32 0x40 /* 32-entry antireplay window */ +#define PDBOPTS_ESP_ARS128 0x80 /* 128-entry antireplay window */ #define PDBOPTS_ESP_ARS64 0xc0 /* 64-entry antireplay window */ +#define PDBOPTS_ESP_ARS_MASK 0xc0 /* antireplay window mask */ #define PDBOPTS_ESP_IVSRC 0x20 /* IV comes from internal random gen */ #define PDBOPTS_ESP_ESN 0x10 /* extended sequence included */ #define PDBOPTS_ESP_OUTFMT 0x08 /* output only decapsulation (decap) */ @@ -54,35 +65,73 @@ /* * General IPSec encap/decap PDB definitions */ + +/** + * ipsec_encap_cbc - PDB part for IPsec CBC encapsulation + * @iv: 16-byte array initialization vector + */ struct ipsec_encap_cbc { - u32 iv[4]; + u8 iv[16]; }; +/** + * ipsec_encap_ctr - PDB part for IPsec CTR encapsulation + * @ctr_nonce: 4-byte array nonce + * @ctr_initial: initial count constant + * @iv: initialization vector + */ struct ipsec_encap_ctr { - u32 ctr_nonce; + u8 ctr_nonce[4]; u32 ctr_initial; - u32 iv[2]; + u64 iv; }; +/** + * ipsec_encap_ccm - PDB part for IPsec CCM encapsulation + * @salt: 3-byte array salt (lower 24 bits) + * @ccm_opt: CCM algorithm options - MSB-LSB description: + * b0_flags (8b) - CCM B0; use 0x5B for 8-byte ICV, 0x6B for 12-byte ICV, + * 0x7B for 16-byte ICV (cf. RFC4309, RFC3610) + * ctr_flags (8b) - counter flags; constant equal to 0x3 + * ctr_initial (16b) - initial count constant + * @iv: initialization vector + */ struct ipsec_encap_ccm { - u32 salt; /* lower 24 bits */ - u8 b0_flags; - u8 ctr_flags; - u16 ctr_initial; - u32 iv[2]; + u8 salt[4]; + u32 ccm_opt; + u64 iv; }; +/** + * ipsec_encap_gcm - PDB part for IPsec GCM encapsulation + * @salt: 3-byte array salt (lower 24 bits) + * @rsvd: reserved, do not use + * @iv: initialization vector + */ struct ipsec_encap_gcm { - u32 salt; /* lower 24 bits */ + u8 salt[4]; u32 rsvd1; - u32 iv[2]; + u64 iv; }; +/** + * ipsec_encap_pdb - PDB for IPsec encapsulation + * @options: MSB-LSB description + * hmo (header manipulation options) - 4b + * reserved - 4b + * next header - 8b + * next header offset - 8b + * option flags (depend on selected algorithm) - 8b + * @seq_num_ext_hi: (optional) IPsec Extended Sequence Number (ESN) + * @seq_num: IPsec sequence number + * @spi: IPsec SPI (Security Parameters Index) + * @ip_hdr_len: optional IP Header length (in bytes) + * reserved - 16b + * Opt. IP Hdr Len - 16b + * @ip_hdr: optional IP Header content + */ struct ipsec_encap_pdb { - u8 hmo_rsvd; - u8 ip_nh; - u8 ip_nh_offset; - u8 options; + u32 options; u32 seq_num_ext_hi; u32 seq_num; union { @@ -92,36 +141,65 @@ struct ipsec_encap_pdb { struct ipsec_encap_gcm gcm; }; u32 spi; - u16 rsvd1; - u16 ip_hdr_len; - u32 ip_hdr[0]; /* optional IP Header content */ + u32 ip_hdr_len; + u32 ip_hdr[0]; }; +/** + * ipsec_decap_cbc - PDB part for IPsec CBC decapsulation + * @rsvd: reserved, do not use + */ struct ipsec_decap_cbc { u32 rsvd[2]; }; +/** + * ipsec_decap_ctr - PDB part for IPsec CTR decapsulation + * @ctr_nonce: 4-byte array nonce + * @ctr_initial: initial count constant + */ struct ipsec_decap_ctr { - u32 salt; + u8 ctr_nonce[4]; u32 ctr_initial; }; +/** + * ipsec_decap_ccm - PDB part for IPsec CCM decapsulation + * @salt: 3-byte salt (lower 24 bits) + * @ccm_opt: CCM algorithm options - MSB-LSB description: + * b0_flags (8b) - CCM B0; use 0x5B for 8-byte ICV, 0x6B for 12-byte ICV, + * 0x7B for 16-byte ICV (cf. RFC4309, RFC3610) + * ctr_flags (8b) - counter flags; constant equal to 0x3 + * ctr_initial (16b) - initial count constant + */ struct ipsec_decap_ccm { - u32 salt; - u8 iv_flags; - u8 ctr_flags; - u16 ctr_initial; + u8 salt[4]; + u32 ccm_opt; }; +/** + * ipsec_decap_gcm - PDB part for IPsec GCN decapsulation + * @salt: 4-byte salt + * @rsvd: reserved, do not use + */ struct ipsec_decap_gcm { - u32 salt; + u8 salt[4]; u32 resvd; }; +/** + * ipsec_decap_pdb - PDB for IPsec decapsulation + * @options: MSB-LSB description + * hmo (header manipulation options) - 4b + * IP header length - 12b + * next header offset - 8b + * option flags (depend on selected algorithm) - 8b + * @seq_num_ext_hi: (optional) IPsec Extended Sequence Number (ESN) + * @seq_num: IPsec sequence number + * @anti_replay: Anti-replay window; size depends on ARS (option flags) + */ struct ipsec_decap_pdb { - u16 hmo_ip_hdr_len; - u8 ip_nh_offset; - u8 options; + u32 options; union { struct ipsec_decap_cbc cbc; struct ipsec_decap_ctr ctr; @@ -130,8 +208,7 @@ struct ipsec_decap_pdb { }; u32 seq_num_ext_hi; u32 seq_num; - u32 anti_replay[2]; - u32 end_index[0]; + __be32 anti_replay[4]; }; /* diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index 0ba9c40597dcb..8c766cf9202c1 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -8,6 +8,7 @@ #define REGS_H #include +#include #include /* @@ -65,46 +66,56 @@ * */ -#ifdef CONFIG_ARM -/* These are common macros for Power, put here for ARM */ -#define setbits32(_addr, _v) writel((readl(_addr) | (_v)), (_addr)) -#define clrbits32(_addr, _v) writel((readl(_addr) & ~(_v)), (_addr)) +extern bool caam_little_end; -#define out_arch(type, endian, a, v) __raw_write##type(cpu_to_##endian(v), a) -#define in_arch(type, endian, a) endian##_to_cpu(__raw_read##type(a)) +#define caam_to_cpu(len) \ +static inline u##len caam##len ## _to_cpu(u##len val) \ +{ \ + if (caam_little_end) \ + return le##len ## _to_cpu(val); \ + else \ + return be##len ## _to_cpu(val); \ +} -#define out_le32(a, v) out_arch(l, le32, a, v) -#define in_le32(a) in_arch(l, le32, a) +#define cpu_to_caam(len) \ +static inline u##len cpu_to_caam##len(u##len val) \ +{ \ + if (caam_little_end) \ + return cpu_to_le##len(val); \ + else \ + return cpu_to_be##len(val); \ +} -#define out_be32(a, v) out_arch(l, be32, a, v) -#define in_be32(a) in_arch(l, be32, a) +caam_to_cpu(16) +caam_to_cpu(32) +caam_to_cpu(64) +cpu_to_caam(16) +cpu_to_caam(32) +cpu_to_caam(64) -#define clrsetbits(type, addr, clear, set) \ - out_##type((addr), (in_##type(addr) & ~(clear)) | (set)) +static inline void wr_reg32(void __iomem *reg, u32 data) +{ + if (caam_little_end) + iowrite32(data, reg); + else + iowrite32be(data, reg); +} -#define clrsetbits_be32(addr, clear, set) clrsetbits(be32, addr, clear, set) -#define clrsetbits_le32(addr, clear, set) clrsetbits(le32, addr, clear, set) -#endif +static inline u32 rd_reg32(void __iomem *reg) +{ + if (caam_little_end) + return ioread32(reg); -#ifdef __BIG_ENDIAN -#define wr_reg32(reg, data) out_be32(reg, data) -#define rd_reg32(reg) in_be32(reg) -#define clrsetbits_32(addr, clear, set) clrsetbits_be32(addr, clear, set) -#ifdef CONFIG_64BIT -#define wr_reg64(reg, data) out_be64(reg, data) -#define rd_reg64(reg) in_be64(reg) -#endif -#else -#ifdef __LITTLE_ENDIAN -#define wr_reg32(reg, data) __raw_writel(data, reg) -#define rd_reg32(reg) __raw_readl(reg) -#define clrsetbits_32(addr, clear, set) clrsetbits_le32(addr, clear, set) -#ifdef CONFIG_64BIT -#define wr_reg64(reg, data) __raw_writeq(data, reg) -#define rd_reg64(reg) __raw_readq(reg) -#endif -#endif -#endif + return ioread32be(reg); +} + +static inline void clrsetbits_32(void __iomem *reg, u32 clear, u32 set) +{ + if (caam_little_end) + iowrite32((ioread32(reg) & ~clear) | set, reg); + else + iowrite32be((ioread32be(reg) & ~clear) | set, reg); +} /* * The only users of these wr/rd_reg64 functions is the Job Ring (JR). @@ -123,29 +134,67 @@ * base + 0x0000 : least-significant 32 bits * base + 0x0004 : most-significant 32 bits */ +#ifdef CONFIG_64BIT +static inline void wr_reg64(void __iomem *reg, u64 data) +{ + if (caam_little_end) + iowrite64(data, reg); + else + iowrite64be(data, reg); +} -#ifndef CONFIG_64BIT -#if !defined(CONFIG_CRYPTO_DEV_FSL_CAAM_LE) || \ - defined(CONFIG_CRYPTO_DEV_FSL_CAAM_IMX) -#define REG64_MS32(reg) ((u32 __iomem *)(reg)) -#define REG64_LS32(reg) ((u32 __iomem *)(reg) + 1) -#else -#define REG64_MS32(reg) ((u32 __iomem *)(reg) + 1) -#define REG64_LS32(reg) ((u32 __iomem *)(reg)) -#endif - -static inline void wr_reg64(u64 __iomem *reg, u64 data) +static inline u64 rd_reg64(void __iomem *reg) { - wr_reg32(REG64_MS32(reg), data >> 32); - wr_reg32(REG64_LS32(reg), data); + if (caam_little_end) + return ioread64(reg); + else + return ioread64be(reg); } -static inline u64 rd_reg64(u64 __iomem *reg) +#else /* CONFIG_64BIT */ +static inline void wr_reg64(void __iomem *reg, u64 data) { - return ((u64)rd_reg32(REG64_MS32(reg)) << 32 | - (u64)rd_reg32(REG64_LS32(reg))); +#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX + if (caam_little_end) { + wr_reg32((u32 __iomem *)(reg) + 1, data >> 32); + wr_reg32((u32 __iomem *)(reg), data); + } else +#endif + { + wr_reg32((u32 __iomem *)(reg), data >> 32); + wr_reg32((u32 __iomem *)(reg) + 1, data); + } } + +static inline u64 rd_reg64(void __iomem *reg) +{ +#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX + if (caam_little_end) + return ((u64)rd_reg32((u32 __iomem *)(reg) + 1) << 32 | + (u64)rd_reg32((u32 __iomem *)(reg))); + else #endif + return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 | + (u64)rd_reg32((u32 __iomem *)(reg) + 1)); +} +#endif /* CONFIG_64BIT */ + +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT +#ifdef CONFIG_SOC_IMX7D +#define cpu_to_caam_dma(value) \ + (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \ + (u64)cpu_to_caam32(higher_32_bits(value))) +#define caam_dma_to_cpu(value) \ + (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) | \ + (u64)caam32_to_cpu(higher_32_bits(value))) +#else +#define cpu_to_caam_dma(value) cpu_to_caam64(value) +#define caam_dma_to_cpu(value) caam64_to_cpu(value) +#endif /* CONFIG_SOC_IMX7D */ +#else +#define cpu_to_caam_dma(value) cpu_to_caam32(value) +#define caam_dma_to_cpu(value) caam32_to_cpu(value) +#endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT */ /* * jr_outentry @@ -249,6 +298,8 @@ struct caam_perfmon { u32 faultliodn; /* FALR - Fault Address LIODN */ u32 faultdetail; /* FADR - Fault Addr Detail */ u32 rsvd2; +#define CSTA_PLEND BIT(10) +#define CSTA_ALT_PLEND BIT(18) u32 status; /* CSTA - CAAM Status */ u64 rsvd3; diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h index 2311341b73569..19dc64fede0d1 100644 --- a/drivers/crypto/caam/sg_sw_sec4.h +++ b/drivers/crypto/caam/sg_sw_sec4.h @@ -5,6 +5,8 @@ * */ +#include "regs.h" + struct sec4_sg_entry; /* @@ -13,10 +15,9 @@ struct sec4_sg_entry; static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr, dma_addr_t dma, u32 len, u16 offset) { - sec4_sg_ptr->ptr = dma; - sec4_sg_ptr->len = len; - sec4_sg_ptr->buf_pool_id = 0; - sec4_sg_ptr->offset = offset & SEC4_SG_OFFSET_MASK; + sec4_sg_ptr->ptr = cpu_to_caam_dma(dma); + sec4_sg_ptr->len = cpu_to_caam32(len); + sec4_sg_ptr->bpid_offset = cpu_to_caam32(offset & SEC4_SG_OFFSET_MASK); #ifdef DEBUG print_hex_dump(KERN_ERR, "sec4_sg_ptr@: ", DUMP_PREFIX_ADDRESS, 16, 4, sec4_sg_ptr, @@ -51,7 +52,7 @@ static inline void sg_to_sec4_sg_last(struct scatterlist *sg, int sg_count, u16 offset) { sec4_sg_ptr = sg_to_sec4_sg(sg, sg_count, sec4_sg_ptr, offset); - sec4_sg_ptr->len |= SEC4_SG_LEN_FIN; + sec4_sg_ptr->len |= cpu_to_caam32(SEC4_SG_LEN_FIN); } static inline struct sec4_sg_entry *sg_to_sec4_sg_len( From 9e217795e08a0b0f06afd121401180d756eb1b54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Thu, 19 May 2016 18:11:33 +0300 Subject: [PATCH 010/180] crypto: caam - add ARCH_LAYERSCAPE to supported architectures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This basically adds support for ls1043a platform. Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig index d2c2909a40203..ff54c42e6e510 100644 --- a/drivers/crypto/caam/Kconfig +++ b/drivers/crypto/caam/Kconfig @@ -1,6 +1,6 @@ config CRYPTO_DEV_FSL_CAAM tristate "Freescale CAAM-Multicore driver backend" - depends on FSL_SOC || ARCH_MXC + depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE help Enables the driver module for Freescale's Cryptographic Accelerator and Assurance Module (CAAM), also known as the SEC version 4 (SEC4). From b142f54ebf3809b9133673928d398641d692ddf0 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Sat, 21 May 2016 13:36:43 +0200 Subject: [PATCH 011/180] crypto: doc - Fix typo Signed-off-by: Andrea Gelmini Signed-off-by: Herbert Xu --- Documentation/crypto/asymmetric-keys.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/crypto/asymmetric-keys.txt b/Documentation/crypto/asymmetric-keys.txt index 8c07e0ea6bc0e..2b7816dea370a 100644 --- a/Documentation/crypto/asymmetric-keys.txt +++ b/Documentation/crypto/asymmetric-keys.txt @@ -76,7 +76,7 @@ the criterion string: Looking in /proc/keys, the last 8 hex digits of the key fingerprint are displayed, along with the subtype: - 1a39e171 I----- 1 perm 3f010000 0 0 asymmetri modsign.0: DSA 5acc2142 [] + 1a39e171 I----- 1 perm 3f010000 0 0 asymmetric modsign.0: DSA 5acc2142 [] ========================= From eee09d2b192926db4a35cdbecfb01ad9e769d9e6 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Sat, 21 May 2016 14:03:38 +0200 Subject: [PATCH 012/180] crypto: powerpc - Fix typo Signed-off-by: Andrea Gelmini Signed-off-by: Herbert Xu --- arch/powerpc/crypto/aes-spe-regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/crypto/aes-spe-regs.h b/arch/powerpc/crypto/aes-spe-regs.h index 30d217b399c3e..2cc3a2caadaee 100644 --- a/arch/powerpc/crypto/aes-spe-regs.h +++ b/arch/powerpc/crypto/aes-spe-regs.h @@ -18,7 +18,7 @@ #define rLN r7 /* length of data to be processed */ #define rIP r8 /* potiner to IV (CBC/CTR/XTS modes) */ #define rKT r9 /* pointer to tweak key (XTS mode) */ -#define rT0 r11 /* pointers to en-/decrpytion tables */ +#define rT0 r11 /* pointers to en-/decryption tables */ #define rT1 r10 #define rD0 r9 /* data */ #define rD1 r14 From fe1c445b60d397d3298cdab22e5c9c3d886c73c7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 22 May 2016 11:05:51 +0200 Subject: [PATCH 013/180] MAINTAINERS: Add file patterns for crypto device tree bindings Submitters of device tree binding documentation may forget to CC the subsystem maintainer if this is missing. Signed-off-by: Geert Uytterhoeven Cc: Herbert Xu Cc: David S. Miller Cc: linux-crypto@vger.kernel.org Signed-off-by: Herbert Xu --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 7304d2e37a982..3150ceb1a5268 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3280,6 +3280,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6.git S: Maintained F: Documentation/crypto/ +F: Documentation/devicetree/bindings/crypto/ F: Documentation/DocBook/crypto-API.tmpl F: arch/*/crypto/ F: crypto/ From f6c60b15e13344998fdc16453d01a3192817751c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 22 May 2016 11:05:56 +0200 Subject: [PATCH 014/180] MAINTAINERS: Add file patterns for rng device tree bindings Submitters of device tree binding documentation may forget to CC the subsystem maintainer if this is missing. Signed-off-by: Geert Uytterhoeven Cc: Matt Mackall Cc: Herbert Xu Cc: linux-crypto@vger.kernel.org Signed-off-by: Herbert Xu --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3150ceb1a5268..f4a9c13208edd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5259,6 +5259,7 @@ M: Matt Mackall M: Herbert Xu L: linux-crypto@vger.kernel.org S: Odd fixes +F: Documentation/devicetree/bindings/rng/ F: Documentation/hw_random.txt F: drivers/char/hw_random/ F: include/linux/hw_random.h From 1ff69adf747e75425d5e67df2f2ac866e7ebd165 Mon Sep 17 00:00:00 2001 From: Maxime Coquelin Date: Thu, 26 May 2016 11:34:57 +0200 Subject: [PATCH 015/180] hwrng: stm32 - fix maybe uninitialized variable warning This patch fixes the following warning: drivers/char/hw_random/stm32-rng.c: In function 'stm32_rng_read': drivers/char/hw_random/stm32-rng.c:82:19: warning: 'sr' may be used uninitialized in this function Reported-by: Sudip Mukherjee Suggested-by: Arnd Bergmann Cc: Daniel Thompson Signed-off-by: Maxime Coquelin Reviewed-by: Daniel Thompson Signed-off-by: Herbert Xu --- drivers/char/hw_random/stm32-rng.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/char/hw_random/stm32-rng.c b/drivers/char/hw_random/stm32-rng.c index 92a810648bd00..63d84e6f18918 100644 --- a/drivers/char/hw_random/stm32-rng.c +++ b/drivers/char/hw_random/stm32-rng.c @@ -69,8 +69,12 @@ static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait) } /* If error detected or data not ready... */ - if (sr != RNG_SR_DRDY) + if (sr != RNG_SR_DRDY) { + if (WARN_ONCE(sr & (RNG_SR_SEIS | RNG_SR_CEIS), + "bad RNG status - %x\n", sr)) + writel_relaxed(0, priv->base + RNG_SR); break; + } *(u32 *)data = readl_relaxed(priv->base + RNG_DR); @@ -79,10 +83,6 @@ static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait) max -= sizeof(u32); } - if (WARN_ONCE(sr & (RNG_SR_SEIS | RNG_SR_CEIS), - "bad RNG status - %x\n", sr)) - writel_relaxed(0, priv->base + RNG_SR); - pm_runtime_mark_last_busy((struct device *) priv->rng.priv); pm_runtime_put_sync_autosuspend((struct device *) priv->rng.priv); From 4bdf1cfca5d46294142cdf49cd997b8b83491adf Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Thu, 26 May 2016 12:57:50 +0200 Subject: [PATCH 016/180] lib/mpi: purge mpi_set_buffer() mpi_set_buffer() has no in-tree users and similar functionality is provided by mpi_read_raw_data(). Remove mpi_set_buffer(). Signed-off-by: Nicolai Stange Signed-off-by: Herbert Xu --- include/linux/mpi.h | 1 - lib/mpi/mpicoder.c | 76 --------------------------------------------- 2 files changed, 77 deletions(-) diff --git a/include/linux/mpi.h b/include/linux/mpi.h index 3a5abe95affd3..f219559e5e800 100644 --- a/include/linux/mpi.h +++ b/include/linux/mpi.h @@ -80,7 +80,6 @@ void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign); int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes, int *sign); void *mpi_get_secure_buffer(MPI a, unsigned *nbytes, int *sign); -int mpi_set_buffer(MPI a, const void *buffer, unsigned nbytes, int sign); int mpi_write_to_sgl(MPI a, struct scatterlist *sg, unsigned *nbytes, int *sign); diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index 747606f9e4a35..c7420335a5bb7 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -250,82 +250,6 @@ void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign) } EXPORT_SYMBOL_GPL(mpi_get_buffer); -/**************** - * Use BUFFER to update MPI. - */ -int mpi_set_buffer(MPI a, const void *xbuffer, unsigned nbytes, int sign) -{ - const uint8_t *buffer = xbuffer, *p; - mpi_limb_t alimb; - int nlimbs; - int i; - - nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); - if (RESIZE_IF_NEEDED(a, nlimbs) < 0) - return -ENOMEM; - a->sign = sign; - - for (i = 0, p = buffer + nbytes - 1; p >= buffer + BYTES_PER_MPI_LIMB;) { -#if BYTES_PER_MPI_LIMB == 4 - alimb = (mpi_limb_t) *p--; - alimb |= (mpi_limb_t) *p-- << 8; - alimb |= (mpi_limb_t) *p-- << 16; - alimb |= (mpi_limb_t) *p-- << 24; -#elif BYTES_PER_MPI_LIMB == 8 - alimb = (mpi_limb_t) *p--; - alimb |= (mpi_limb_t) *p-- << 8; - alimb |= (mpi_limb_t) *p-- << 16; - alimb |= (mpi_limb_t) *p-- << 24; - alimb |= (mpi_limb_t) *p-- << 32; - alimb |= (mpi_limb_t) *p-- << 40; - alimb |= (mpi_limb_t) *p-- << 48; - alimb |= (mpi_limb_t) *p-- << 56; -#else -#error please implement for this limb size. -#endif - a->d[i++] = alimb; - } - if (p >= buffer) { -#if BYTES_PER_MPI_LIMB == 4 - alimb = *p--; - if (p >= buffer) - alimb |= (mpi_limb_t) *p-- << 8; - if (p >= buffer) - alimb |= (mpi_limb_t) *p-- << 16; - if (p >= buffer) - alimb |= (mpi_limb_t) *p-- << 24; -#elif BYTES_PER_MPI_LIMB == 8 - alimb = (mpi_limb_t) *p--; - if (p >= buffer) - alimb |= (mpi_limb_t) *p-- << 8; - if (p >= buffer) - alimb |= (mpi_limb_t) *p-- << 16; - if (p >= buffer) - alimb |= (mpi_limb_t) *p-- << 24; - if (p >= buffer) - alimb |= (mpi_limb_t) *p-- << 32; - if (p >= buffer) - alimb |= (mpi_limb_t) *p-- << 40; - if (p >= buffer) - alimb |= (mpi_limb_t) *p-- << 48; - if (p >= buffer) - alimb |= (mpi_limb_t) *p-- << 56; -#else -#error please implement for this limb size. -#endif - a->d[i++] = alimb; - } - a->nlimbs = i; - - if (i != nlimbs) { - pr_emerg("MPI: mpi_set_buffer: Assertion failed (%d != %d)", i, - nlimbs); - BUG(); - } - return 0; -} -EXPORT_SYMBOL_GPL(mpi_set_buffer); - /** * mpi_write_to_sgl() - Funnction exports MPI to an sgl (msb first) * From dfd90510672f69206942546e8df4f63cfa91eae6 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Thu, 26 May 2016 13:05:31 +0200 Subject: [PATCH 017/180] lib/mpi: mpi_read_raw_data(): purge redundant clearing of nbits In mpi_read_raw_data(), unsigned nbits is calculated as follows: nbits = nbytes * 8; and redundantly cleared later on if nbytes == 0: if (nbytes > 0) ... else nbits = 0; Purge this redundant clearing for the sake of clarity. Signed-off-by: Nicolai Stange Signed-off-by: Herbert Xu --- lib/mpi/mpicoder.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index c7420335a5bb7..37d6a4edb33cb 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -51,8 +51,6 @@ MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes) } if (nbytes > 0) nbits -= count_leading_zeros(buffer[0]); - else - nbits = 0; nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); val = mpi_alloc(nlimbs); From eef0df6a59537032ab6b708f30b28d9530f8760e Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Thu, 26 May 2016 13:05:32 +0200 Subject: [PATCH 018/180] lib/mpi: mpi_read_raw_data(): fix nbits calculation The number of bits, nbits, is calculated in mpi_read_raw_data() as follows: nbits = nbytes * 8; Afterwards, the number of leading zero bits of the first byte get subtracted: nbits -= count_leading_zeros(buffer[0]); However, count_leading_zeros() takes an unsigned long and thus, the u8 gets promoted to an unsigned long. Thus, the above doesn't subtract the number of leading zeros in the most significant nonzero input byte from nbits, but the number of leading zeros of the most significant nonzero input byte promoted to unsigned long, i.e. BITS_PER_LONG - 8 too many. Fix this by subtracting count_leading_zeros(...) - (BITS_PER_LONG - 8) from nbits only. Fixes: e1045992949 ("MPILIB: Provide a function to read raw data into an MPI") Signed-off-by: Nicolai Stange Signed-off-by: Herbert Xu --- lib/mpi/mpicoder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index 37d6a4edb33cb..eda34aba017e1 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -50,7 +50,7 @@ MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes) return NULL; } if (nbytes > 0) - nbits -= count_leading_zeros(buffer[0]); + nbits -= count_leading_zeros(buffer[0]) - (BITS_PER_LONG - 8); nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); val = mpi_alloc(nlimbs); From 03cdfaad491e82e4a66593c6e149ddae0421df59 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Thu, 26 May 2016 23:19:51 +0200 Subject: [PATCH 019/180] lib/mpi: mpi_read_from_buffer(): return error code mpi_read_from_buffer() reads a MPI from a buffer into a newly allocated MPI instance. It expects the buffer's leading two bytes to contain the number of bits, followed by the actual payload. On failure, it returns NULL and updates the in/out argument ret_nread somewhat inconsistently: - If the given buffer is too short to contain the leading two bytes encoding the number of bits or their value is unsupported, then ret_nread will be cleared. - If the allocation of the resulting MPI instance fails, ret_nread is left as is. The only user of mpi_read_from_buffer(), digsig_verify_rsa(), simply checks for a return value of NULL and returns -ENOMEM if that happens. While this is all of cosmetic nature only, there is another error condition which currently isn't detectable by the caller of mpi_read_from_buffer(): if the given buffer is too small to hold the number of bits as encoded in its first two bytes, the return value will be non-NULL and *ret_nread > 0. In preparation of communicating this condition to the caller, let mpi_read_from_buffer() return error values by means of the ERR_PTR() mechanism. Make the sole caller of mpi_read_from_buffer(), digsig_verify_rsa(), check the return value for IS_ERR() rather than == NULL. If IS_ERR() is true, return the associated error value rather than the fixed -ENOMEM. Signed-off-by: Nicolai Stange Signed-off-by: Herbert Xu --- lib/digsig.c | 12 ++++++++---- lib/mpi/mpicoder.c | 6 +++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/lib/digsig.c b/lib/digsig.c index 07be6c1ef4e25..a121cbc5a46bb 100644 --- a/lib/digsig.c +++ b/lib/digsig.c @@ -104,16 +104,18 @@ static int digsig_verify_rsa(struct key *key, datap = pkh->mpi; endp = ukp->data + ukp->datalen; - err = -ENOMEM; - for (i = 0; i < pkh->nmpi; i++) { unsigned int remaining = endp - datap; pkey[i] = mpi_read_from_buffer(datap, &remaining); - if (!pkey[i]) + if (IS_ERR(pkey[i])) { + err = PTR_ERR(pkey[i]); goto err; + } datap += remaining; } + err = -ENOMEM; + mblen = mpi_get_nbits(pkey[0]); mlen = DIV_ROUND_UP(mblen, 8); @@ -126,8 +128,10 @@ static int digsig_verify_rsa(struct key *key, nret = siglen; in = mpi_read_from_buffer(sig, &nret); - if (!in) + if (IS_ERR(in)) { + err = PTR_ERR(in); goto err; + } res = mpi_alloc(mpi_get_nlimbs(in) * 2); if (!res) diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index eda34aba017e1..350abaf4bee73 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -86,12 +86,12 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) MPI val = NULL; if (*ret_nread < 2) - goto leave; + return ERR_PTR(-EINVAL); nbits = buffer[0] << 8 | buffer[1]; if (nbits > MAX_EXTERN_MPI_BITS) { pr_info("MPI: mpi too large (%u bits)\n", nbits); - goto leave; + return ERR_PTR(-EINVAL); } buffer += 2; nread = 2; @@ -100,7 +100,7 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); val = mpi_alloc(nlimbs); if (!val) - return NULL; + return ERR_PTR(-ENOMEM); i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; i %= BYTES_PER_MPI_LIMB; val->nbits = nbits; From c5ce7c697c983693c441573d2948e0ab8d62726e Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Thu, 26 May 2016 23:19:52 +0200 Subject: [PATCH 020/180] lib/digsig: digsig_verify_rsa(): return -EINVAL if modulo length is zero Currently, if digsig_verify_rsa() detects that the modulo's length is zero, i.e. mlen == 0, it returns -ENOMEM which doesn't really fit here. Make digsig_verify_rsa() return -EINVAL upon mlen == 0. Signed-off-by: Nicolai Stange Signed-off-by: Herbert Xu --- lib/digsig.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/digsig.c b/lib/digsig.c index a121cbc5a46bb..55b8b2f41a9e0 100644 --- a/lib/digsig.c +++ b/lib/digsig.c @@ -114,13 +114,15 @@ static int digsig_verify_rsa(struct key *key, datap += remaining; } - err = -ENOMEM; - mblen = mpi_get_nbits(pkey[0]); mlen = DIV_ROUND_UP(mblen, 8); - if (mlen == 0) + if (mlen == 0) { + err = -EINVAL; goto err; + } + + err = -ENOMEM; out1 = kzalloc(mlen, GFP_KERNEL); if (!out1) From 7af791e0f0d00c14f01ba2ffe3b6e2b50a35fc6f Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Thu, 26 May 2016 23:19:53 +0200 Subject: [PATCH 021/180] lib/mpi: mpi_read_from_buffer(): return -EINVAL upon too short buffer Currently, if the input buffer is shorter than the expected length as indicated by its first two bytes, an MPI instance of this expected length will be allocated and filled with as much data as is available. The rest will remain uninitialized. Instead of leaving this condition undetected, an error code should be reported to the caller. Since this situation indicates that the input buffer's first two bytes, encoding the number of expected bits, are garbled, -EINVAL is appropriate here. If the input buffer is shorter than indicated by its first two bytes, make mpi_read_from_buffer() return -EINVAL. Get rid of the 'nread' variable: with the new semantics, the total number of bytes read from the input buffer is known in advance. Signed-off-by: Nicolai Stange Signed-off-by: Herbert Xu --- lib/mpi/mpicoder.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index 350abaf4bee73..9c6f6b986682a 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -81,7 +81,7 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) { const uint8_t *buffer = xbuffer; int i, j; - unsigned nbits, nbytes, nlimbs, nread = 0; + unsigned nbits, nbytes, nlimbs; mpi_limb_t a; MPI val = NULL; @@ -94,9 +94,14 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) return ERR_PTR(-EINVAL); } buffer += 2; - nread = 2; nbytes = DIV_ROUND_UP(nbits, 8); + if (nbytes + 2 > *ret_nread) { + printk("MPI: mpi larger than buffer nread=%d ret_nread=%d\n", + *ret_nread + 1, *ret_nread); + return ERR_PTR(-EINVAL); + } + nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); val = mpi_alloc(nlimbs); if (!val) @@ -109,12 +114,6 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) for (; j > 0; j--) { a = 0; for (; i < BYTES_PER_MPI_LIMB; i++) { - if (++nread > *ret_nread) { - printk - ("MPI: mpi larger than buffer nread=%d ret_nread=%d\n", - nread, *ret_nread); - goto leave; - } a <<= 8; a |= *buffer++; } @@ -122,8 +121,7 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) val->d[j - 1] = a; } -leave: - *ret_nread = nread; + *ret_nread = nbytes + 2; return val; } EXPORT_SYMBOL_GPL(mpi_read_from_buffer); From cdf24b42c6740ec429e85a8405e5e917abac8595 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Thu, 26 May 2016 23:19:54 +0200 Subject: [PATCH 022/180] lib/mpi: mpi_read_from_buffer(): sanitize short buffer printk The first two bytes of the input buffer encode its expected length and mpi_read_from_buffer() prints a console message if the given buffer is too short. However, there are some oddities with how this message is printed: - It is printed at the default loglevel. This is different from the one used in the case that the first two bytes' value is unsupportedly large, i.e. KERN_INFO. - The format specifier '%d' is used for unsigned ints. - It prints the values of nread and *ret_nread. This is redundant since the former is always the latter + 1. Clean this up as follows: - Use pr_info() rather than printk() with no loglevel. - Use the format specifiers '%u' in place if '%d'. - Do not print the redundant 'nread' but the more helpful 'nbytes' value. Signed-off-by: Nicolai Stange Signed-off-by: Herbert Xu --- lib/mpi/mpicoder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index 9c6f6b986682a..f4f9e3396f3ee 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -97,8 +97,8 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) nbytes = DIV_ROUND_UP(nbits, 8); if (nbytes + 2 > *ret_nread) { - printk("MPI: mpi larger than buffer nread=%d ret_nread=%d\n", - *ret_nread + 1, *ret_nread); + pr_info("MPI: mpi larger than buffer nbytes=%u ret_nread=%u\n", + nbytes, *ret_nread); return ERR_PTR(-EINVAL); } From 20b5b7f3c2df2fb69b3b27dc83314b8891614ba5 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Thu, 26 May 2016 23:19:55 +0200 Subject: [PATCH 023/180] lib/mpi: refactor mpi_read_from_buffer() in terms of mpi_read_raw_data() mpi_read_from_buffer() and mpi_read_raw_data() do basically the same thing except that the former extracts the number of payload bits from the first two bytes of the input buffer. Besides that, the data copying logic is exactly the same. Replace the open coded buffer to MPI instance conversion by a call to mpi_read_raw_data(). Signed-off-by: Nicolai Stange Signed-off-by: Herbert Xu --- lib/mpi/mpicoder.c | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index f4f9e3396f3ee..823cf5f5196b3 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -80,10 +80,8 @@ EXPORT_SYMBOL_GPL(mpi_read_raw_data); MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) { const uint8_t *buffer = xbuffer; - int i, j; - unsigned nbits, nbytes, nlimbs; - mpi_limb_t a; - MPI val = NULL; + unsigned int nbits, nbytes; + MPI val; if (*ret_nread < 2) return ERR_PTR(-EINVAL); @@ -93,7 +91,6 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) pr_info("MPI: mpi too large (%u bits)\n", nbits); return ERR_PTR(-EINVAL); } - buffer += 2; nbytes = DIV_ROUND_UP(nbits, 8); if (nbytes + 2 > *ret_nread) { @@ -102,24 +99,9 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) return ERR_PTR(-EINVAL); } - nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); - val = mpi_alloc(nlimbs); + val = mpi_read_raw_data(buffer + 2, nbytes); if (!val) return ERR_PTR(-ENOMEM); - i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; - i %= BYTES_PER_MPI_LIMB; - val->nbits = nbits; - j = val->nlimbs = nlimbs; - val->sign = 0; - for (; j > 0; j--) { - a = 0; - for (; i < BYTES_PER_MPI_LIMB; i++) { - a <<= 8; - a |= *buffer++; - } - i = 0; - val->d[j - 1] = a; - } *ret_nread = nbytes + 2; return val; From f8e4f9a0a85d3589d20c4c9f20e576cc8d3c99b6 Mon Sep 17 00:00:00 2001 From: Yendapally Reddy Dhananjaya Reddy Date: Fri, 27 May 2016 06:10:38 -0400 Subject: [PATCH 024/180] dt-bindings: rng: Northstar Plus SoC rng bindings Document the bindings used by Northstar Plus(NSP) SoC random number generator. Signed-off-by: Yendapally Reddy Dhananjaya Reddy Acked-by: Eric Anholt Signed-off-by: Herbert Xu --- Documentation/devicetree/bindings/rng/brcm,bcm2835.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt b/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt index 07ccdaa68324a..aa304d4120588 100644 --- a/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt +++ b/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt @@ -2,7 +2,7 @@ BCM2835 Random number generator Required properties: -- compatible : should be "brcm,bcm2835-rng" +- compatible : should be "brcm,bcm2835-rng" or "brcm,bcm-nsp-rng" - reg : Specifies base physical address and size of the registers. Example: @@ -11,3 +11,8 @@ rng { compatible = "brcm,bcm2835-rng"; reg = <0x7e104000 0x10>; }; + +rng@18033000 { + compatible = "brcm,bcm-nsp-rng"; + reg = <0x18033000 0x14>; +}; From 422a7491459b4d989259cd94fd21f4fbde382930 Mon Sep 17 00:00:00 2001 From: Yendapally Reddy Dhananjaya Reddy Date: Fri, 27 May 2016 06:10:39 -0400 Subject: [PATCH 025/180] hwrng: bcm2835 - Support Broadcom NSP SoC rng This supports the random number generator available in NSP SoC. Masks the rng interrupt for NSP. Signed-off-by: Yendapally Reddy Dhananjaya Reddy Acked-by: Eric Anholt Signed-off-by: Herbert Xu --- drivers/char/hw_random/Kconfig | 2 +- drivers/char/hw_random/bcm2835-rng.c | 34 ++++++++++++++++++++++++---- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index ac51149e97775..43d527ea90146 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -90,7 +90,7 @@ config HW_RANDOM_BCM63XX config HW_RANDOM_BCM2835 tristate "Broadcom BCM2835 Random Number Generator support" - depends on ARCH_BCM2835 + depends on ARCH_BCM2835 || ARCH_BCM_NSP default HW_RANDOM ---help--- This driver provides kernel-side support for the Random Number diff --git a/drivers/char/hw_random/bcm2835-rng.c b/drivers/char/hw_random/bcm2835-rng.c index 7192ec25f6676..b1e8b7847e9ac 100644 --- a/drivers/char/hw_random/bcm2835-rng.c +++ b/drivers/char/hw_random/bcm2835-rng.c @@ -19,6 +19,7 @@ #define RNG_CTRL 0x0 #define RNG_STATUS 0x4 #define RNG_DATA 0x8 +#define RNG_INT_MASK 0x10 /* enable rng */ #define RNG_RBGEN 0x1 @@ -26,6 +27,18 @@ /* the initial numbers generated are "less random" so will be discarded */ #define RNG_WARMUP_COUNT 0x40000 +#define RNG_INT_OFF 0x1 + +static void __init nsp_rng_init(void __iomem *base) +{ + u32 val; + + /* mask the interrupt */ + val = readl(base + RNG_INT_MASK); + val |= RNG_INT_OFF; + writel(val, base + RNG_INT_MASK); +} + static int bcm2835_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait) { @@ -46,10 +59,18 @@ static struct hwrng bcm2835_rng_ops = { .read = bcm2835_rng_read, }; +static const struct of_device_id bcm2835_rng_of_match[] = { + { .compatible = "brcm,bcm2835-rng"}, + { .compatible = "brcm,bcm-nsp-rng", .data = nsp_rng_init}, + {}, +}; + static int bcm2835_rng_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct device_node *np = dev->of_node; + void (*rng_setup)(void __iomem *base); + const struct of_device_id *rng_id; void __iomem *rng_base; int err; @@ -61,6 +82,15 @@ static int bcm2835_rng_probe(struct platform_device *pdev) } bcm2835_rng_ops.priv = (unsigned long)rng_base; + rng_id = of_match_node(bcm2835_rng_of_match, np); + if (!rng_id) + return -EINVAL; + + /* Check for rng init function, execute it */ + rng_setup = rng_id->data; + if (rng_setup) + rng_setup(rng_base); + /* set warm-up count & enable */ __raw_writel(RNG_WARMUP_COUNT, rng_base + RNG_STATUS); __raw_writel(RNG_RBGEN, rng_base + RNG_CTRL); @@ -90,10 +120,6 @@ static int bcm2835_rng_remove(struct platform_device *pdev) return 0; } -static const struct of_device_id bcm2835_rng_of_match[] = { - { .compatible = "brcm,bcm2835-rng", }, - {}, -}; MODULE_DEVICE_TABLE(of, bcm2835_rng_of_match); static struct platform_driver bcm2835_rng_driver = { From 33a36a5bc0a158fa5d37ab15edc90f5928051a7e Mon Sep 17 00:00:00 2001 From: Yendapally Reddy Dhananjaya Reddy Date: Fri, 27 May 2016 06:10:40 -0400 Subject: [PATCH 026/180] ARM: dts: nsp: Add rng device tree entry Add support for the random number generator to the Northstar Plus SoC device tree. Signed-off-by: Yendapally Reddy Dhananjaya Reddy Signed-off-by: Herbert Xu --- arch/arm/boot/dts/bcm-nsp.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi index def9e783b5c69..1ed829e699d42 100644 --- a/arch/arm/boot/dts/bcm-nsp.dtsi +++ b/arch/arm/boot/dts/bcm-nsp.dtsi @@ -206,6 +206,11 @@ brcm,nand-has-wp; }; + rng: rng@33000 { + compatible = "brcm,bcm-nsp-rng"; + reg = <0x33000 0x14>; + }; + ccbtimer0: timer@34000 { compatible = "arm,sp804"; reg = <0x34000 0x1000>; From 4f8de65bab611a68f40355ac9169edb4d0d33974 Mon Sep 17 00:00:00 2001 From: Yendapally Reddy Dhananjaya Reddy Date: Fri, 27 May 2016 06:10:41 -0400 Subject: [PATCH 027/180] hwrng: bcm2835 - Read as much data as available Read the requested number of data from the fifo Signed-off-by: Yendapally Reddy Dhananjaya Reddy Reviewed-by: Eric Anholt Signed-off-by: Herbert Xu --- drivers/char/hw_random/bcm2835-rng.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/char/hw_random/bcm2835-rng.c b/drivers/char/hw_random/bcm2835-rng.c index b1e8b7847e9ac..75ca820730be8 100644 --- a/drivers/char/hw_random/bcm2835-rng.c +++ b/drivers/char/hw_random/bcm2835-rng.c @@ -43,6 +43,8 @@ static int bcm2835_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait) { void __iomem *rng_base = (void __iomem *)rng->priv; + u32 max_words = max / sizeof(u32); + u32 num_words, count; while ((__raw_readl(rng_base + RNG_STATUS) >> 24) == 0) { if (!wait) @@ -50,8 +52,14 @@ static int bcm2835_rng_read(struct hwrng *rng, void *buf, size_t max, cpu_relax(); } - *(u32 *)buf = __raw_readl(rng_base + RNG_DATA); - return sizeof(u32); + num_words = readl(rng_base + RNG_STATUS) >> 24; + if (num_words > max_words) + num_words = max_words; + + for (count = 0; count < num_words; count++) + ((u32 *)buf)[count] = readl(rng_base + RNG_DATA); + + return num_words * sizeof(u32); } static struct hwrng bcm2835_rng_ops = { From 5318c53d5b4bbf097ccba2e74831d67e4d63b1a1 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 27 May 2016 13:49:40 +0200 Subject: [PATCH 028/180] crypto: s5p-sss - Use consistent indentation for variables and members Bring some consistency by: 1. Replacing fixed-space indentation of structure members with just tabs. 2. Remove indentation in declaration of local variable between type and name. Driver was mixing usage of such indentation and lack of it. When removing indentation, reorder variables in reversed-christmas-tree order with first variables being initialized ones. Signed-off-by: Krzysztof Kozlowski Acked-by: Vladimir Zapolskiy Signed-off-by: Herbert Xu --- drivers/crypto/s5p-sss.c | 80 ++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index 2b3a0cfe33315..dce1af0ce85ce 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c @@ -155,43 +155,43 @@ * expansion of its usage. */ struct samsung_aes_variant { - unsigned int aes_offset; + unsigned int aes_offset; }; struct s5p_aes_reqctx { - unsigned long mode; + unsigned long mode; }; struct s5p_aes_ctx { - struct s5p_aes_dev *dev; + struct s5p_aes_dev *dev; - uint8_t aes_key[AES_MAX_KEY_SIZE]; - uint8_t nonce[CTR_RFC3686_NONCE_SIZE]; - int keylen; + uint8_t aes_key[AES_MAX_KEY_SIZE]; + uint8_t nonce[CTR_RFC3686_NONCE_SIZE]; + int keylen; }; struct s5p_aes_dev { - struct device *dev; - struct clk *clk; - void __iomem *ioaddr; - void __iomem *aes_ioaddr; - int irq_fc; + struct device *dev; + struct clk *clk; + void __iomem *ioaddr; + void __iomem *aes_ioaddr; + int irq_fc; - struct ablkcipher_request *req; - struct s5p_aes_ctx *ctx; - struct scatterlist *sg_src; - struct scatterlist *sg_dst; + struct ablkcipher_request *req; + struct s5p_aes_ctx *ctx; + struct scatterlist *sg_src; + struct scatterlist *sg_dst; /* In case of unaligned access: */ - struct scatterlist *sg_src_cpy; - struct scatterlist *sg_dst_cpy; + struct scatterlist *sg_src_cpy; + struct scatterlist *sg_dst_cpy; - struct tasklet_struct tasklet; - struct crypto_queue queue; - bool busy; - spinlock_t lock; + struct tasklet_struct tasklet; + struct crypto_queue queue; + bool busy; + spinlock_t lock; - struct samsung_aes_variant *variant; + struct samsung_aes_variant *variant; }; static struct s5p_aes_dev *s5p_dev; @@ -421,11 +421,11 @@ static bool s5p_aes_rx(struct s5p_aes_dev *dev) static irqreturn_t s5p_aes_interrupt(int irq, void *dev_id) { struct platform_device *pdev = dev_id; - struct s5p_aes_dev *dev = platform_get_drvdata(pdev); - uint32_t status; - unsigned long flags; - bool set_dma_tx = false; - bool set_dma_rx = false; + struct s5p_aes_dev *dev = platform_get_drvdata(pdev); + bool set_dma_tx = false; + bool set_dma_rx = false; + unsigned long flags; + uint32_t status; spin_lock_irqsave(&dev->lock, flags); @@ -538,10 +538,10 @@ static int s5p_set_outdata_start(struct s5p_aes_dev *dev, static void s5p_aes_crypt_start(struct s5p_aes_dev *dev, unsigned long mode) { - struct ablkcipher_request *req = dev->req; - uint32_t aes_control; - int err; - unsigned long flags; + struct ablkcipher_request *req = dev->req; + uint32_t aes_control; + unsigned long flags; + int err; aes_control = SSS_AES_KEY_CHANGE_MODE; if (mode & FLAGS_AES_DECRYPT) @@ -653,10 +653,10 @@ static int s5p_aes_handle_req(struct s5p_aes_dev *dev, static int s5p_aes_crypt(struct ablkcipher_request *req, unsigned long mode) { - struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); - struct s5p_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); - struct s5p_aes_reqctx *reqctx = ablkcipher_request_ctx(req); - struct s5p_aes_dev *dev = ctx->dev; + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct s5p_aes_reqctx *reqctx = ablkcipher_request_ctx(req); + struct s5p_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct s5p_aes_dev *dev = ctx->dev; if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) { dev_err(dev->dev, "request size is not exact amount of AES blocks\n"); @@ -671,7 +671,7 @@ static int s5p_aes_crypt(struct ablkcipher_request *req, unsigned long mode) static int s5p_aes_setkey(struct crypto_ablkcipher *cipher, const uint8_t *key, unsigned int keylen) { - struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher); + struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher); struct s5p_aes_ctx *ctx = crypto_tfm_ctx(tfm); if (keylen != AES_KEYSIZE_128 && @@ -763,11 +763,11 @@ static struct crypto_alg algs[] = { static int s5p_aes_probe(struct platform_device *pdev) { - int i, j, err = -ENODEV; - struct s5p_aes_dev *pdata; - struct device *dev = &pdev->dev; - struct resource *res; + struct device *dev = &pdev->dev; + int i, j, err = -ENODEV; struct samsung_aes_variant *variant; + struct s5p_aes_dev *pdata; + struct resource *res; if (s5p_dev) return -EEXIST; From ed494d4fa234443a005489afb6b9583415ad89ff Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Tue, 31 May 2016 13:11:57 +0200 Subject: [PATCH 029/180] crypto: drbg - reduce number of setkey calls The CTR DRBG code always set the key for each sym cipher invocation even though the key has not been changed. The patch ensures that the setkey is only invoked when a new key is generated by the DRBG. With this patch, the CTR DRBG performance increases by more than 150%. Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/drbg.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/crypto/drbg.c b/crypto/drbg.c index 0a3538f6cf22b..0aca2b908c766 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -252,8 +252,10 @@ MODULE_ALIAS_CRYPTO("drbg_nopr_ctr_aes192"); MODULE_ALIAS_CRYPTO("drbg_pr_ctr_aes128"); MODULE_ALIAS_CRYPTO("drbg_nopr_ctr_aes128"); -static int drbg_kcapi_sym(struct drbg_state *drbg, const unsigned char *key, - unsigned char *outval, const struct drbg_string *in); +static void drbg_kcapi_symsetkey(struct drbg_state *drbg, + const unsigned char *key); +static int drbg_kcapi_sym(struct drbg_state *drbg, unsigned char *outval, + const struct drbg_string *in); static int drbg_init_sym_kernel(struct drbg_state *drbg); static int drbg_fini_sym_kernel(struct drbg_state *drbg); @@ -270,6 +272,7 @@ static int drbg_ctr_bcc(struct drbg_state *drbg, drbg_string_fill(&data, out, drbg_blocklen(drbg)); /* 10.4.3 step 2 / 4 */ + drbg_kcapi_symsetkey(drbg, key); list_for_each_entry(curr, in, list) { const unsigned char *pos = curr->buf; size_t len = curr->len; @@ -278,7 +281,7 @@ static int drbg_ctr_bcc(struct drbg_state *drbg, /* 10.4.3 step 4.2 */ if (drbg_blocklen(drbg) == cnt) { cnt = 0; - ret = drbg_kcapi_sym(drbg, key, out, &data); + ret = drbg_kcapi_sym(drbg, out, &data); if (ret) return ret; } @@ -290,7 +293,7 @@ static int drbg_ctr_bcc(struct drbg_state *drbg, } /* 10.4.3 step 4.2 for last block */ if (cnt) - ret = drbg_kcapi_sym(drbg, key, out, &data); + ret = drbg_kcapi_sym(drbg, out, &data); return ret; } @@ -425,6 +428,7 @@ static int drbg_ctr_df(struct drbg_state *drbg, /* 10.4.2 step 12: overwriting of outval is implemented in next step */ /* 10.4.2 step 13 */ + drbg_kcapi_symsetkey(drbg, temp); while (generated_len < bytes_to_return) { short blocklen = 0; /* @@ -432,7 +436,7 @@ static int drbg_ctr_df(struct drbg_state *drbg, * implicit as the key is only drbg_blocklen in size based on * the implementation of the cipher function callback */ - ret = drbg_kcapi_sym(drbg, temp, X, &cipherin); + ret = drbg_kcapi_sym(drbg, X, &cipherin); if (ret) goto out; blocklen = (drbg_blocklen(drbg) < @@ -488,6 +492,7 @@ static int drbg_ctr_update(struct drbg_state *drbg, struct list_head *seed, ret = drbg_ctr_df(drbg, df_data, drbg_statelen(drbg), seed); if (ret) goto out; + drbg_kcapi_symsetkey(drbg, drbg->C); } drbg_string_fill(&cipherin, drbg->V, drbg_blocklen(drbg)); @@ -500,7 +505,7 @@ static int drbg_ctr_update(struct drbg_state *drbg, struct list_head *seed, crypto_inc(drbg->V, drbg_blocklen(drbg)); /* * 10.2.1.2 step 2.2 */ - ret = drbg_kcapi_sym(drbg, drbg->C, temp + len, &cipherin); + ret = drbg_kcapi_sym(drbg, temp + len, &cipherin); if (ret) goto out; /* 10.2.1.2 step 2.3 and 3 */ @@ -517,6 +522,7 @@ static int drbg_ctr_update(struct drbg_state *drbg, struct list_head *seed, /* 10.2.1.2 step 5 */ memcpy(drbg->C, temp, drbg_keylen(drbg)); + drbg_kcapi_symsetkey(drbg, drbg->C); /* 10.2.1.2 step 6 */ memcpy(drbg->V, temp + drbg_keylen(drbg), drbg_blocklen(drbg)); ret = 0; @@ -546,6 +552,7 @@ static int drbg_ctr_generate(struct drbg_state *drbg, ret = drbg_ctr_update(drbg, addtl, 2); if (ret) return 0; + drbg_kcapi_symsetkey(drbg, drbg->C); } /* 10.2.1.5.2 step 4.1 */ @@ -554,7 +561,7 @@ static int drbg_ctr_generate(struct drbg_state *drbg, while (len < buflen) { int outlen = 0; /* 10.2.1.5.2 step 4.2 */ - ret = drbg_kcapi_sym(drbg, drbg->C, drbg->scratchpad, &data); + ret = drbg_kcapi_sym(drbg, drbg->scratchpad, &data); if (ret) { len = ret; goto out; @@ -1653,13 +1660,21 @@ static int drbg_fini_sym_kernel(struct drbg_state *drbg) return 0; } -static int drbg_kcapi_sym(struct drbg_state *drbg, const unsigned char *key, - unsigned char *outval, const struct drbg_string *in) +static void drbg_kcapi_symsetkey(struct drbg_state *drbg, + const unsigned char *key) { struct crypto_cipher *tfm = (struct crypto_cipher *)drbg->priv_data; crypto_cipher_setkey(tfm, key, (drbg_keylen(drbg))); +} + +static int drbg_kcapi_sym(struct drbg_state *drbg, unsigned char *outval, + const struct drbg_string *in) +{ + struct crypto_cipher *tfm = + (struct crypto_cipher *)drbg->priv_data; + /* there is only component in *in */ BUG_ON(in->len < drbg_blocklen(drbg)); crypto_cipher_encrypt_one(tfm, outval, in->buf); From 0a7f330c12f2b3cab53b396d4dc369c2dcf272eb Mon Sep 17 00:00:00 2001 From: Megha Dey Date: Tue, 31 May 2016 14:42:20 -0700 Subject: [PATCH 030/180] crypto: sha1-mb - stylistic cleanup Currently there are several checkpatch warnings in the sha1_mb.c file: 'WARNING: line over 80 characters' in the sha1_mb.c file. Also, the syntax of some multi-line comments are not correct. This patch fixes these issues. Signed-off-by: Megha Dey Signed-off-by: Herbert Xu --- arch/x86/crypto/sha-mb/sha1_mb.c | 110 +++++++++++++++++++++---------- 1 file changed, 74 insertions(+), 36 deletions(-) diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c index 9c5af331a956f..0a464919542ca 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb.c +++ b/arch/x86/crypto/sha-mb/sha1_mb.c @@ -77,7 +77,8 @@ struct sha1_mb_ctx { struct mcryptd_ahash *mcryptd_tfm; }; -static inline struct mcryptd_hash_request_ctx *cast_hash_to_mcryptd_ctx(struct sha1_hash_ctx *hash_ctx) +static inline struct mcryptd_hash_request_ctx + *cast_hash_to_mcryptd_ctx(struct sha1_hash_ctx *hash_ctx) { struct shash_desc *desc; @@ -85,7 +86,8 @@ static inline struct mcryptd_hash_request_ctx *cast_hash_to_mcryptd_ctx(struct s return container_of(desc, struct mcryptd_hash_request_ctx, desc); } -static inline struct ahash_request *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx) +static inline struct ahash_request + *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx) { return container_of((void *) ctx, struct ahash_request, __ctx); } @@ -97,10 +99,12 @@ static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx, } static asmlinkage void (*sha1_job_mgr_init)(struct sha1_mb_mgr *state); -static asmlinkage struct job_sha1* (*sha1_job_mgr_submit)(struct sha1_mb_mgr *state, - struct job_sha1 *job); -static asmlinkage struct job_sha1* (*sha1_job_mgr_flush)(struct sha1_mb_mgr *state); -static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)(struct sha1_mb_mgr *state); +static asmlinkage struct job_sha1* (*sha1_job_mgr_submit) + (struct sha1_mb_mgr *state, struct job_sha1 *job); +static asmlinkage struct job_sha1* (*sha1_job_mgr_flush) + (struct sha1_mb_mgr *state); +static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job) + (struct sha1_mb_mgr *state); static inline void sha1_init_digest(uint32_t *digest) { @@ -131,7 +135,8 @@ static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], return i >> SHA1_LOG2_BLOCK_SIZE; } -static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr, struct sha1_hash_ctx *ctx) +static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr, + struct sha1_hash_ctx *ctx) { while (ctx) { if (ctx->status & HASH_CTX_STS_COMPLETE) { @@ -177,8 +182,8 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr, str ctx->job.buffer = (uint8_t *) buffer; ctx->job.len = len; - ctx = (struct sha1_hash_ctx *) sha1_job_mgr_submit(&mgr->mgr, - &ctx->job); + ctx = (struct sha1_hash_ctx *)sha1_job_mgr_submit(&mgr->mgr, + &ctx->job); continue; } } @@ -191,13 +196,15 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr, str if (ctx->status & HASH_CTX_STS_LAST) { uint8_t *buf = ctx->partial_block_buffer; - uint32_t n_extra_blocks = sha1_pad(buf, ctx->total_length); + uint32_t n_extra_blocks = + sha1_pad(buf, ctx->total_length); ctx->status = (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_COMPLETE); ctx->job.buffer = buf; ctx->job.len = (uint32_t) n_extra_blocks; - ctx = (struct sha1_hash_ctx *) sha1_job_mgr_submit(&mgr->mgr, &ctx->job); + ctx = (struct sha1_hash_ctx *) + sha1_job_mgr_submit(&mgr->mgr, &ctx->job); continue; } @@ -208,14 +215,17 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr, str return NULL; } -static struct sha1_hash_ctx *sha1_ctx_mgr_get_comp_ctx(struct sha1_ctx_mgr *mgr) +static struct sha1_hash_ctx + *sha1_ctx_mgr_get_comp_ctx(struct sha1_ctx_mgr *mgr) { /* * If get_comp_job returns NULL, there are no jobs complete. - * If get_comp_job returns a job, verify that it is safe to return to the user. + * If get_comp_job returns a job, verify that it is safe to return to + * the user. * If it is not ready, resubmit the job to finish processing. * If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned. - * Otherwise, all jobs currently being managed by the hash_ctx_mgr still need processing. + * Otherwise, all jobs currently being managed by the hash_ctx_mgr + * still need processing. */ struct sha1_hash_ctx *ctx; @@ -235,7 +245,10 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr, int flags) { if (flags & (~HASH_ENTIRE)) { - /* User should not pass anything other than FIRST, UPDATE, or LAST */ + /* + * User should not pass anything other than FIRST, UPDATE, or + * LAST + */ ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; return ctx; } @@ -264,14 +277,20 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr, ctx->partial_block_buffer_length = 0; } - /* If we made it here, there were no errors during this call to submit */ + /* + * If we made it here, there were no errors during this call to + * submit + */ ctx->error = HASH_CTX_ERROR_NONE; /* Store buffer ptr info from user */ ctx->incoming_buffer = buffer; ctx->incoming_buffer_length = len; - /* Store the user's request flags and mark this ctx as currently being processed. */ + /* + * Store the user's request flags and mark this ctx as currently + * being processed. + */ ctx->status = (flags & HASH_LAST) ? (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) : HASH_CTX_STS_PROCESSING; @@ -286,8 +305,12 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr, * append as much as possible to the extra block. */ if ((ctx->partial_block_buffer_length) | (len < SHA1_BLOCK_SIZE)) { - /* Compute how many bytes to copy from user buffer into extra block */ - uint32_t copy_len = SHA1_BLOCK_SIZE - ctx->partial_block_buffer_length; + /* + * Compute how many bytes to copy from user buffer into + * extra block + */ + uint32_t copy_len = SHA1_BLOCK_SIZE - + ctx->partial_block_buffer_length; if (len < copy_len) copy_len = len; @@ -297,20 +320,28 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr, buffer, copy_len); ctx->partial_block_buffer_length += copy_len; - ctx->incoming_buffer = (const void *)((const char *)buffer + copy_len); + ctx->incoming_buffer = (const void *) + ((const char *)buffer + copy_len); ctx->incoming_buffer_length = len - copy_len; } - /* The extra block should never contain more than 1 block here */ + /* + * The extra block should never contain more than 1 block + * here + */ assert(ctx->partial_block_buffer_length <= SHA1_BLOCK_SIZE); - /* If the extra block buffer contains exactly 1 block, it can be hashed. */ + /* + * If the extra block buffer contains exactly 1 block, it can + * be hashed. + */ if (ctx->partial_block_buffer_length >= SHA1_BLOCK_SIZE) { ctx->partial_block_buffer_length = 0; ctx->job.buffer = ctx->partial_block_buffer; ctx->job.len = 1; - ctx = (struct sha1_hash_ctx *) sha1_job_mgr_submit(&mgr->mgr, &ctx->job); + ctx = (struct sha1_hash_ctx *) + sha1_job_mgr_submit(&mgr->mgr, &ctx->job); } } @@ -329,14 +360,15 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_flush(struct sha1_ctx_mgr *mgr) return NULL; /* - * If flush returned a job, resubmit the job to finish processing. + * If flush returned a job, resubmit the job to finish + * processing. */ ctx = sha1_ctx_mgr_resubmit(mgr, ctx); /* - * If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned. - * Otherwise, all jobs currently being managed by the sha1_ctx_mgr - * still need processing. Loop. + * If sha1_ctx_mgr_resubmit returned a job, it is ready to be + * returned. Otherwise, all jobs currently being managed by the + * sha1_ctx_mgr still need processing. Loop. */ if (ctx) return ctx; @@ -394,9 +426,11 @@ static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx, flag |= HASH_LAST; } - sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(&rctx->desc); + sha_ctx = (struct sha1_hash_ctx *) + shash_desc_ctx(&rctx->desc); kernel_fpu_begin(); - sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, nbytes, flag); + sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, + rctx->walk.data, nbytes, flag); if (!sha_ctx) { if (flush) sha_ctx = sha1_ctx_mgr_flush(cstate->mgr); @@ -489,7 +523,7 @@ static int sha1_mb_update(struct shash_desc *desc, const u8 *data, unsigned int len) { struct mcryptd_hash_request_ctx *rctx = - container_of(desc, struct mcryptd_hash_request_ctx, desc); + container_of(desc, struct mcryptd_hash_request_ctx, desc); struct mcryptd_alg_cstate *cstate = this_cpu_ptr(sha1_mb_alg_state.alg_cstate); @@ -521,7 +555,8 @@ static int sha1_mb_update(struct shash_desc *desc, const u8 *data, sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(desc); sha1_mb_add_list(rctx, cstate); kernel_fpu_begin(); - sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, nbytes, HASH_UPDATE); + sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, + nbytes, HASH_UPDATE); kernel_fpu_end(); /* check if anything is returned */ @@ -548,7 +583,7 @@ static int sha1_mb_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { struct mcryptd_hash_request_ctx *rctx = - container_of(desc, struct mcryptd_hash_request_ctx, desc); + container_of(desc, struct mcryptd_hash_request_ctx, desc); struct mcryptd_alg_cstate *cstate = this_cpu_ptr(sha1_mb_alg_state.alg_cstate); @@ -584,7 +619,8 @@ static int sha1_mb_finup(struct shash_desc *desc, const u8 *data, sha1_mb_add_list(rctx, cstate); kernel_fpu_begin(); - sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, nbytes, flag); + sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, + nbytes, flag); kernel_fpu_end(); /* check if anything is returned */ @@ -608,7 +644,7 @@ static int sha1_mb_finup(struct shash_desc *desc, const u8 *data, static int sha1_mb_final(struct shash_desc *desc, u8 *out) { struct mcryptd_hash_request_ctx *rctx = - container_of(desc, struct mcryptd_hash_request_ctx, desc); + container_of(desc, struct mcryptd_hash_request_ctx, desc); struct mcryptd_alg_cstate *cstate = this_cpu_ptr(sha1_mb_alg_state.alg_cstate); @@ -632,7 +668,8 @@ static int sha1_mb_final(struct shash_desc *desc, u8 *out) /* flag HASH_FINAL and 0 data size */ sha1_mb_add_list(rctx, cstate); kernel_fpu_begin(); - sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0, HASH_LAST); + sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0, + HASH_LAST); kernel_fpu_end(); /* check if anything is returned */ @@ -866,7 +903,8 @@ static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate) if (time_before(cur_time, rctx->tag.expire)) break; kernel_fpu_begin(); - sha_ctx = (struct sha1_hash_ctx *) sha1_ctx_mgr_flush(cstate->mgr); + sha_ctx = (struct sha1_hash_ctx *) + sha1_ctx_mgr_flush(cstate->mgr); kernel_fpu_end(); if (!sha_ctx) { pr_err("sha1_mb error: nothing got flushed for non-empty list\n"); From 3741bbb207f7b9e92ad3b878a30ccd1ddcdb8ac8 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Wed, 1 Jun 2016 11:56:02 +0300 Subject: [PATCH 031/180] crypto: omap-aes - Fix registration of algorithms Algorithms can be registered only once. So skip registration of algorithms if already registered (i.e. in case we have two AES cores in the system.) Signed-off-by: Lokesh Vutla Signed-off-by: Tero Kristo Signed-off-by: Herbert Xu --- drivers/crypto/omap-aes.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index ce174d3b842ce..4a0e6a545ba26 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -1185,17 +1185,19 @@ static int omap_aes_probe(struct platform_device *pdev) spin_unlock(&list_lock); for (i = 0; i < dd->pdata->algs_info_size; i++) { - for (j = 0; j < dd->pdata->algs_info[i].size; j++) { - algp = &dd->pdata->algs_info[i].algs_list[j]; + if (!dd->pdata->algs_info[i].registered) { + for (j = 0; j < dd->pdata->algs_info[i].size; j++) { + algp = &dd->pdata->algs_info[i].algs_list[j]; - pr_debug("reg alg: %s\n", algp->cra_name); - INIT_LIST_HEAD(&algp->cra_list); + pr_debug("reg alg: %s\n", algp->cra_name); + INIT_LIST_HEAD(&algp->cra_list); - err = crypto_register_alg(algp); - if (err) - goto err_algs; + err = crypto_register_alg(algp); + if (err) + goto err_algs; - dd->pdata->algs_info[i].registered++; + dd->pdata->algs_info[i].registered++; + } } } From 88aff46040a8a13ade85e8e450d781f89899e9fb Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 2 Jun 2016 13:48:38 +0800 Subject: [PATCH 032/180] crypto: skcipher - remove unused header cpumask.h Remove unused header cpumask.h from crypto/ablkcipher.c. Signed-off-by: Geliang Tang Signed-off-by: Herbert Xu --- crypto/ablkcipher.c | 1 - 1 file changed, 1 deletion(-) diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c index e5b5721809e21..6b80516778c64 100644 --- a/crypto/ablkcipher.c +++ b/crypto/ablkcipher.c @@ -14,7 +14,6 @@ */ #include -#include #include #include #include From 63044c4fd49f9e2ce8ee9c5ca24afe6ebbb9fb46 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Thu, 2 Jun 2016 13:28:55 +0100 Subject: [PATCH 033/180] crypto: hash - shrink hash down to two types Move hash to 0xe to free up the space for acomp/scomp Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- include/linux/crypto.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 6e28c895c3769..d844cbc365f72 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -48,15 +48,15 @@ #define CRYPTO_ALG_TYPE_BLKCIPHER 0x00000004 #define CRYPTO_ALG_TYPE_ABLKCIPHER 0x00000005 #define CRYPTO_ALG_TYPE_GIVCIPHER 0x00000006 -#define CRYPTO_ALG_TYPE_DIGEST 0x00000008 -#define CRYPTO_ALG_TYPE_HASH 0x00000008 -#define CRYPTO_ALG_TYPE_SHASH 0x00000009 -#define CRYPTO_ALG_TYPE_AHASH 0x0000000a #define CRYPTO_ALG_TYPE_RNG 0x0000000c #define CRYPTO_ALG_TYPE_AKCIPHER 0x0000000d +#define CRYPTO_ALG_TYPE_DIGEST 0x0000000e +#define CRYPTO_ALG_TYPE_HASH 0x0000000e +#define CRYPTO_ALG_TYPE_SHASH 0x0000000e +#define CRYPTO_ALG_TYPE_AHASH 0x0000000f #define CRYPTO_ALG_TYPE_HASH_MASK 0x0000000e -#define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000c +#define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000e #define CRYPTO_ALG_TYPE_BLKCIPHER_MASK 0x0000000c #define CRYPTO_ALG_LARVAL 0x00000010 From d3ede2dba3d800912523838a6db35d562e042101 Mon Sep 17 00:00:00 2001 From: Bob Ham Date: Fri, 3 Jun 2016 12:13:07 +0100 Subject: [PATCH 034/180] hwrng: chaoskey - Add support for Araneus Alea I USB RNG Adds support for the Araneus Alea I USB hardware Random Number Generator which is interfaced with in exactly the same way as the Altus Metrum ChaosKey. We just add the appropriate device ID and modify the config help text. Signed-off-by: Bob Ham Signed-off-by: Herbert Xu --- drivers/usb/misc/Kconfig | 11 ++++++----- drivers/usb/misc/chaoskey.c | 4 ++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/usb/misc/Kconfig b/drivers/usb/misc/Kconfig index e9e5ae521fa63..6e705971d637f 100644 --- a/drivers/usb/misc/Kconfig +++ b/drivers/usb/misc/Kconfig @@ -260,11 +260,12 @@ config USB_CHAOSKEY tristate "ChaosKey random number generator driver support" depends on HW_RANDOM help - Say Y here if you want to connect an AltusMetrum ChaosKey to - your computer's USB port. The ChaosKey is a hardware random - number generator which hooks into the kernel entropy pool to - ensure a large supply of entropy for /dev/random and - /dev/urandom and also provides direct access via /dev/chaoskeyX + Say Y here if you want to connect an AltusMetrum ChaosKey or + Araneus Alea I to your computer's USB port. These devices + are hardware random number generators which hook into the + kernel entropy pool to ensure a large supply of entropy for + /dev/random and /dev/urandom and also provides direct access + via /dev/chaoskeyX To compile this driver as a module, choose M here: the module will be called chaoskey. diff --git a/drivers/usb/misc/chaoskey.c b/drivers/usb/misc/chaoskey.c index 76350e4ee8073..9aef46b3dfa81 100644 --- a/drivers/usb/misc/chaoskey.c +++ b/drivers/usb/misc/chaoskey.c @@ -55,6 +55,9 @@ MODULE_LICENSE("GPL"); #define CHAOSKEY_VENDOR_ID 0x1d50 /* OpenMoko */ #define CHAOSKEY_PRODUCT_ID 0x60c6 /* ChaosKey */ +#define ALEA_VENDOR_ID 0x12d8 /* Araneus */ +#define ALEA_PRODUCT_ID 0x0001 /* Alea I */ + #define CHAOSKEY_BUF_LEN 64 /* max size of USB full speed packet */ #define NAK_TIMEOUT (HZ) /* stall/wait timeout for device */ @@ -69,6 +72,7 @@ MODULE_LICENSE("GPL"); static const struct usb_device_id chaoskey_table[] = { { USB_DEVICE(CHAOSKEY_VENDOR_ID, CHAOSKEY_PRODUCT_ID) }, + { USB_DEVICE(ALEA_VENDOR_ID, ALEA_PRODUCT_ID) }, { }, }; MODULE_DEVICE_TABLE(usb, chaoskey_table); From e4a886e811cd07dd5c6f389eae4d35870ec2a483 Mon Sep 17 00:00:00 2001 From: Bob Ham Date: Fri, 3 Jun 2016 12:13:08 +0100 Subject: [PATCH 035/180] hwrng: chaoskey - Fix URB warning due to timeout on Alea The first read on an Alea takes about 1.8 seconds, more than the timeout value waiting for the read. As a consequence, later URB reuse causes the warning given below. To avoid this, we increase the wait time for the first read on the Alea. [ 78.293247] WARNING: CPU: 3 PID: 1892 at drivers/usb/core/urb.c:338 usb_submit_urb+0x2b4/0x580 [usbcore] [ 78.293250] URB ffff8802135be3c0 submitted while active [ 78.293252] Modules linked in: chaoskey(+) rng_core rfcomm binfmt_misc bnep cfg80211 nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc bridge stp llc tun snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic iTCO_wdt iTCO_vendor_support nls_utf8 nls_cp437 vfat fat intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel efi_pstore kvm irqbypass pcspkr btusb btrtl btbcm btintel uvcvideo joydev bluetooth videobuf2_vmalloc videobuf2_memops efivars videobuf2_v4l2 serio_raw i2c_i801 videobuf2_core videodev cdc_mbim media lpc_ich shpchp mfd_core cdc_ncm usbnet mii cdc_wdm cdc_acm evdev snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core i915 snd_pcm snd_timer i2c_algo_bit drm_kms_helper wmi thinkpad_acpi drm nvram mei_me mei snd soundcore rfkill ac battery i2c_core [ 78.293335] video button tpm_tis tpm fuse parport_pc ppdev lp parport autofs4 ext4 crc16 jbd2 mbcache algif_skcipher af_alg hid_generic usbhid hid dm_crypt dm_mod sg sr_mod cdrom sd_mod crct10dif_pclmul crc32_pclmul crc32c_intel jitterentropy_rng sha256_generic hmac drbg aesni_intel xhci_pci aes_x86_64 ahci glue_helper xhci_hcd ehci_pci lrw libahci gf128mul ablk_helper cryptd libata sdhci_pci psmouse sdhci scsi_mod ehci_hcd mmc_core usbcore usb_common thermal [ 78.293402] CPU: 3 PID: 1892 Comm: hwrng Not tainted 4.7.0-rc1-linux-14+ #16 [ 78.293405] Hardware name: LENOVO 232577G/232577G, BIOS G2ET92WW (2.52 ) 02/22/2013 [ 78.293408] 0000000000000000 ffffffff812dfa0f ffff8801fa5b3d68 0000000000000000 [ 78.293413] ffffffff81072224 ffff8802135be3c0 ffff8801fa5b3db8 ffff880212e44210 [ 78.293418] 0000000000000040 ffff880209fb32c0 ffff880212e44200 ffffffff8107228f [ 78.293422] Call Trace: [ 78.293432] [] ? dump_stack+0x5c/0x7d [ 78.293437] [] ? __warn+0xc4/0xe0 [ 78.293441] [] ? warn_slowpath_fmt+0x4f/0x60 [ 78.293451] [] ? enqueue_task_fair+0xcd2/0x1260 [ 78.293463] [] ? usb_submit_urb+0x2b4/0x580 [usbcore] [ 78.293474] [] ? __pm_runtime_resume+0x55/0x70 [ 78.293484] [] ? _chaoskey_fill+0x132/0x250 [chaoskey] [ 78.293485] usbcore: registered new interface driver chaoskey [ 78.293493] [] ? wait_woken+0x90/0x90 [ 78.293500] [] ? devm_hwrng_register+0x80/0x80 [rng_core] [ 78.293505] [] ? chaoskey_rng_read+0x127/0x140 [chaoskey] [ 78.293511] [] ? devm_hwrng_register+0x80/0x80 [rng_core] [ 78.293515] [] ? hwrng_fillfn+0x6e/0x120 [rng_core] [ 78.293520] [] ? kthread+0xcf/0xf0 [ 78.293529] [] ? ret_from_fork+0x1f/0x40 [ 78.293535] [] ? kthread_park+0x50/0x50 Signed-off-by: Bob Ham Signed-off-by: Herbert Xu --- drivers/usb/misc/chaoskey.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/usb/misc/chaoskey.c b/drivers/usb/misc/chaoskey.c index 9aef46b3dfa81..6ddd08a32777c 100644 --- a/drivers/usb/misc/chaoskey.c +++ b/drivers/usb/misc/chaoskey.c @@ -60,7 +60,8 @@ MODULE_LICENSE("GPL"); #define CHAOSKEY_BUF_LEN 64 /* max size of USB full speed packet */ -#define NAK_TIMEOUT (HZ) /* stall/wait timeout for device */ +#define NAK_TIMEOUT (HZ) /* normal stall/wait timeout */ +#define ALEA_FIRST_TIMEOUT (HZ*3) /* first stall/wait timeout for Alea */ #ifdef CONFIG_USB_DYNAMIC_MINORS #define USB_CHAOSKEY_MINOR_BASE 0 @@ -88,6 +89,7 @@ struct chaoskey { int open; /* open count */ bool present; /* device not disconnected */ bool reading; /* ongoing IO */ + bool reads_started; /* track first read for Alea */ int size; /* size of buf */ int valid; /* bytes of buf read */ int used; /* bytes of buf consumed */ @@ -192,6 +194,9 @@ static int chaoskey_probe(struct usb_interface *interface, dev->in_ep = in_ep; + if (udev->descriptor.idVendor != ALEA_VENDOR_ID) + dev->reads_started = 1; + dev->size = size; dev->present = 1; @@ -361,6 +366,7 @@ static int _chaoskey_fill(struct chaoskey *dev) { DEFINE_WAIT(wait); int result; + bool started; usb_dbg(dev->interface, "fill"); @@ -393,10 +399,17 @@ static int _chaoskey_fill(struct chaoskey *dev) goto out; } + /* The first read on the Alea takes a little under 2 seconds. + * Reads after the first read take only a few microseconds + * though. Presumably the entropy-generating circuit needs + * time to ramp up. So, we wait longer on the first read. + */ + started = dev->reads_started; + dev->reads_started = true; result = wait_event_interruptible_timeout( dev->wait_q, !dev->reading, - NAK_TIMEOUT); + (started ? NAK_TIMEOUT : ALEA_FIRST_TIMEOUT) ); if (result < 0) goto out; From 63dac35b58f4cfd524142dc54f2252b84da773de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Thu, 19 May 2016 18:11:49 +0300 Subject: [PATCH 036/180] arm64: dts: ls1043a: add crypto node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LS1043A has a SEC v5.4 security engine. For now don't add rtic or sec_mon subnodes, since these features haven't been tested yet. Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- .../boot/dts/freescale/fsl-ls1043a-rdb.dts | 4 ++ .../arm64/boot/dts/freescale/fsl-ls1043a.dtsi | 43 +++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts index f895fc02ab068..40846319be69f 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts @@ -49,6 +49,10 @@ / { model = "LS1043A RDB Board"; + + aliases { + crypto = &crypto; + }; }; &i2c0 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi index de0323b48b1e7..6bd46c133010d 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi @@ -159,6 +159,49 @@ big-endian; }; + crypto: crypto@1700000 { + compatible = "fsl,sec-v5.4", "fsl,sec-v5.0", + "fsl,sec-v4.0"; + fsl,sec-era = <3>; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x0 0x00 0x1700000 0x100000>; + reg = <0x00 0x1700000 0x0 0x100000>; + interrupts = <0 75 0x4>; + + sec_jr0: jr@10000 { + compatible = "fsl,sec-v5.4-job-ring", + "fsl,sec-v5.0-job-ring", + "fsl,sec-v4.0-job-ring"; + reg = <0x10000 0x10000>; + interrupts = <0 71 0x4>; + }; + + sec_jr1: jr@20000 { + compatible = "fsl,sec-v5.4-job-ring", + "fsl,sec-v5.0-job-ring", + "fsl,sec-v4.0-job-ring"; + reg = <0x20000 0x10000>; + interrupts = <0 72 0x4>; + }; + + sec_jr2: jr@30000 { + compatible = "fsl,sec-v5.4-job-ring", + "fsl,sec-v5.0-job-ring", + "fsl,sec-v4.0-job-ring"; + reg = <0x30000 0x10000>; + interrupts = <0 73 0x4>; + }; + + sec_jr3: jr@40000 { + compatible = "fsl,sec-v5.4-job-ring", + "fsl,sec-v5.0-job-ring", + "fsl,sec-v4.0-job-ring"; + reg = <0x40000 0x10000>; + interrupts = <0 74 0x4>; + }; + }; + dcfg: dcfg@1ee0000 { compatible = "fsl,ls1043a-dcfg", "syscon"; reg = <0x0 0x1ee0000 0x0 0x10000>; From b096b544d6c3cb65c53551ddedd21180f9087ab9 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 6 Jun 2016 13:20:34 +0200 Subject: [PATCH 037/180] crypto: talitos - using helpers for all talitos_ptr operations Use helper for all modifications to talitos_ptr in preparation to the implementation of AEAD for SEC1 to_talitos_ptr_extent_clear() has been removed in favor of to_talitos_ptr_ext_set() to set any value and to_talitos_ptr_ext_or() to or the extent field with a value name has been shorten to help keeping single lines of 80 chars Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu --- drivers/crypto/talitos.c | 59 +++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index b7ee8d30147df..a92aa37f32a02 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -91,10 +91,17 @@ static unsigned short from_talitos_ptr_len(struct talitos_ptr *ptr, return be16_to_cpu(ptr->len); } -static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr, bool is_sec1) +static void to_talitos_ptr_ext_set(struct talitos_ptr *ptr, u8 val, + bool is_sec1) { if (!is_sec1) - ptr->j_extent = 0; + ptr->j_extent = val; +} + +static void to_talitos_ptr_ext_or(struct talitos_ptr *ptr, u8 val, bool is_sec1) +{ + if (!is_sec1) + ptr->j_extent |= val; } /* @@ -111,7 +118,7 @@ static void map_single_talitos_ptr(struct device *dev, to_talitos_ptr_len(ptr, len, is_sec1); to_talitos_ptr(ptr, dma_addr, is_sec1); - to_talitos_ptr_extent_clear(ptr, is_sec1); + to_talitos_ptr_ext_set(ptr, 0, is_sec1); } /* @@ -1050,8 +1057,8 @@ static int sg_to_link_tbl_offset(struct scatterlist *sg, int sg_count, to_talitos_ptr(link_tbl_ptr + count, sg_dma_address(sg) + offset, 0); - link_tbl_ptr[count].len = cpu_to_be16(len); - link_tbl_ptr[count].j_extent = 0; + to_talitos_ptr_len(link_tbl_ptr + count, len, 0); + to_talitos_ptr_ext_set(link_tbl_ptr + count, 0, 0); count++; cryptlen -= len; offset = 0; @@ -1062,7 +1069,8 @@ static int sg_to_link_tbl_offset(struct scatterlist *sg, int sg_count, /* tag end of link table */ if (count > 0) - link_tbl_ptr[count - 1].j_extent = DESC_PTR_LNKTBL_RETURN; + to_talitos_ptr_ext_set(link_tbl_ptr + count - 1, + DESC_PTR_LNKTBL_RETURN, 0); return count; } @@ -1102,14 +1110,14 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, (areq->src == areq->dst) ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); /* hmac data */ - desc->ptr[1].len = cpu_to_be16(areq->assoclen); + to_talitos_ptr_len(&desc->ptr[1], areq->assoclen, 0); if (sg_count > 1 && (ret = sg_to_link_tbl_offset(areq->src, sg_count, 0, areq->assoclen, &edesc->link_tbl[tbl_off])) > 1) { to_talitos_ptr(&desc->ptr[1], edesc->dma_link_tbl + tbl_off * sizeof(struct talitos_ptr), 0); - desc->ptr[1].j_extent = DESC_PTR_LNKTBL_JUMP; + to_talitos_ptr_ext_set(&desc->ptr[1], DESC_PTR_LNKTBL_JUMP, 0); dma_sync_single_for_device(dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); @@ -1117,13 +1125,13 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, tbl_off += ret; } else { to_talitos_ptr(&desc->ptr[1], sg_dma_address(areq->src), 0); - desc->ptr[1].j_extent = 0; + to_talitos_ptr_ext_set(&desc->ptr[1], 0, 0); } /* cipher iv */ to_talitos_ptr(&desc->ptr[2], edesc->iv_dma, 0); - desc->ptr[2].len = cpu_to_be16(ivsize); - desc->ptr[2].j_extent = 0; + to_talitos_ptr_len(&desc->ptr[2], ivsize, 0); + to_talitos_ptr_ext_set(&desc->ptr[2], 0, 0); /* cipher key */ map_single_talitos_ptr(dev, &desc->ptr[3], ctx->enckeylen, @@ -1136,8 +1144,8 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, * extent is bytes of HMAC postpended to ciphertext, * typically 12 for ipsec */ - desc->ptr[4].len = cpu_to_be16(cryptlen); - desc->ptr[4].j_extent = authsize; + to_talitos_ptr_len(&desc->ptr[4], cryptlen, 0); + to_talitos_ptr_ext_set(&desc->ptr[4], authsize, 0); sg_link_tbl_len = cryptlen; if (edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV) @@ -1150,7 +1158,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, areq->assoclen, sg_link_tbl_len, &edesc->link_tbl[tbl_off])) > 1) { - desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP; + to_talitos_ptr_ext_or(&desc->ptr[4], DESC_PTR_LNKTBL_JUMP, 0); to_talitos_ptr(&desc->ptr[4], edesc->dma_link_tbl + tbl_off * sizeof(struct talitos_ptr), 0); @@ -1163,8 +1171,8 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, } /* cipher out */ - desc->ptr[5].len = cpu_to_be16(cryptlen); - desc->ptr[5].j_extent = authsize; + to_talitos_ptr_len(&desc->ptr[5], cryptlen, 0); + to_talitos_ptr_ext_set(&desc->ptr[5], authsize, 0); if (areq->src != areq->dst) sg_count = dma_map_sg(dev, areq->dst, edesc->dst_nents ? : 1, @@ -1186,17 +1194,17 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, /* Add an entry to the link table for ICV data */ tbl_ptr += sg_count - 1; - tbl_ptr->j_extent = 0; + to_talitos_ptr_ext_set(tbl_ptr, 0, 0); tbl_ptr++; - tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN; - tbl_ptr->len = cpu_to_be16(authsize); + to_talitos_ptr_ext_set(tbl_ptr, DESC_PTR_LNKTBL_RETURN, 0); + to_talitos_ptr_len(tbl_ptr, authsize, 0); /* icv data follows link tables */ to_talitos_ptr(tbl_ptr, edesc->dma_link_tbl + (edesc->src_nents + edesc->dst_nents + 2) * sizeof(struct talitos_ptr) + authsize, 0); - desc->ptr[5].j_extent |= DESC_PTR_LNKTBL_JUMP; + to_talitos_ptr_ext_or(&desc->ptr[5], DESC_PTR_LNKTBL_JUMP, 0); dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); @@ -1493,7 +1501,7 @@ int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src, len, DMA_TO_DEVICE); } } else { - to_talitos_ptr_extent_clear(ptr, is_sec1); + to_talitos_ptr_ext_set(ptr, 0, is_sec1); sg_count = dma_map_sg(dev, src, edesc->src_nents ? : 1, dir); @@ -1504,7 +1512,8 @@ int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src, &edesc->link_tbl[0]); if (sg_count > 1) { to_talitos_ptr(ptr, edesc->dma_link_tbl, 0); - ptr->j_extent |= DESC_PTR_LNKTBL_JUMP; + to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, + 0); dma_sync_single_for_device(dev, edesc->dma_link_tbl, edesc->dma_len, @@ -1544,7 +1553,7 @@ void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst, len, DMA_FROM_DEVICE); } } else { - to_talitos_ptr_extent_clear(ptr, is_sec1); + to_talitos_ptr_ext_set(ptr, 0, is_sec1); if (sg_count == 1) { to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1); @@ -1555,7 +1564,7 @@ void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst, to_talitos_ptr(ptr, edesc->dma_link_tbl + (edesc->src_nents + 1) * sizeof(struct talitos_ptr), 0); - ptr->j_extent |= DESC_PTR_LNKTBL_JUMP; + to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, 0); sg_to_link_tbl(dst, sg_count, len, link_tbl_ptr); dma_sync_single_for_device(dev, edesc->dma_link_tbl, edesc->dma_len, @@ -1586,7 +1595,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc, /* cipher iv */ to_talitos_ptr(&desc->ptr[1], edesc->iv_dma, is_sec1); to_talitos_ptr_len(&desc->ptr[1], ivsize, is_sec1); - to_talitos_ptr_extent_clear(&desc->ptr[1], is_sec1); + to_talitos_ptr_ext_set(&desc->ptr[1], 0, is_sec1); /* cipher key */ map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen, From 246a87cda070a708361cee4309748bcb2cb6121d Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 6 Jun 2016 13:20:36 +0200 Subject: [PATCH 038/180] crypto: talitos - move mapping helpers before IPSEC functions In order to be able to use the mapping/unmapping helpers for IPSEC it needs to be move upper in the file Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu --- drivers/crypto/talitos.c | 244 +++++++++++++++++++-------------------- 1 file changed, 122 insertions(+), 122 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index a92aa37f32a02..beb369e557b90 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -926,6 +926,33 @@ static void talitos_sg_unmap(struct device *dev, dma_unmap_sg(dev, src, src_nents, DMA_BIDIRECTIONAL); } +static void unmap_sg_talitos_ptr(struct device *dev, struct scatterlist *src, + struct scatterlist *dst, unsigned int len, + struct talitos_edesc *edesc) +{ + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + + if (is_sec1) { + if (!edesc->src_nents) { + dma_unmap_sg(dev, src, 1, + dst != src ? DMA_TO_DEVICE + : DMA_BIDIRECTIONAL); + } + if (dst && edesc->dst_nents) { + dma_sync_single_for_device(dev, + edesc->dma_link_tbl + len, + len, DMA_FROM_DEVICE); + sg_copy_from_buffer(dst, edesc->dst_nents ? : 1, + edesc->buf + len, len); + } else if (dst && dst != src) { + dma_unmap_sg(dev, dst, 1, DMA_FROM_DEVICE); + } + } else { + talitos_sg_unmap(dev, edesc, src, dst); + } +} + static void ipsec_esp_unmap(struct device *dev, struct talitos_edesc *edesc, struct aead_request *areq) @@ -1083,6 +1110,101 @@ static inline int sg_to_link_tbl(struct scatterlist *sg, int sg_count, link_tbl_ptr); } +int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src, + unsigned int len, struct talitos_edesc *edesc, + enum dma_data_direction dir, struct talitos_ptr *ptr) +{ + int sg_count; + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + + to_talitos_ptr_len(ptr, len, is_sec1); + + if (is_sec1) { + sg_count = edesc->src_nents ? : 1; + + if (sg_count == 1) { + dma_map_sg(dev, src, 1, dir); + to_talitos_ptr(ptr, sg_dma_address(src), is_sec1); + } else { + sg_copy_to_buffer(src, sg_count, edesc->buf, len); + to_talitos_ptr(ptr, edesc->dma_link_tbl, is_sec1); + dma_sync_single_for_device(dev, edesc->dma_link_tbl, + len, DMA_TO_DEVICE); + } + } else { + to_talitos_ptr_ext_set(ptr, 0, is_sec1); + + sg_count = dma_map_sg(dev, src, edesc->src_nents ? : 1, dir); + + if (sg_count == 1) { + to_talitos_ptr(ptr, sg_dma_address(src), is_sec1); + } else { + sg_count = sg_to_link_tbl(src, sg_count, len, + &edesc->link_tbl[0]); + if (sg_count > 1) { + to_talitos_ptr(ptr, edesc->dma_link_tbl, 0); + to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, + 0); + dma_sync_single_for_device(dev, + edesc->dma_link_tbl, + edesc->dma_len, + DMA_BIDIRECTIONAL); + } else { + /* Only one segment now, so no link tbl needed*/ + to_talitos_ptr(ptr, sg_dma_address(src), + is_sec1); + } + } + } + return sg_count; +} + +void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst, + unsigned int len, struct talitos_edesc *edesc, + enum dma_data_direction dir, + struct talitos_ptr *ptr, int sg_count) +{ + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + + if (dir != DMA_NONE) + sg_count = dma_map_sg(dev, dst, edesc->dst_nents ? : 1, dir); + + to_talitos_ptr_len(ptr, len, is_sec1); + + if (is_sec1) { + if (sg_count == 1) { + if (dir != DMA_NONE) + dma_map_sg(dev, dst, 1, dir); + to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1); + } else { + to_talitos_ptr(ptr, edesc->dma_link_tbl + len, is_sec1); + dma_sync_single_for_device(dev, + edesc->dma_link_tbl + len, + len, DMA_FROM_DEVICE); + } + } else { + to_talitos_ptr_ext_set(ptr, 0, is_sec1); + + if (sg_count == 1) { + to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1); + } else { + struct talitos_ptr *link_tbl_ptr = + &edesc->link_tbl[edesc->src_nents + 1]; + + to_talitos_ptr(ptr, edesc->dma_link_tbl + + (edesc->src_nents + 1) * + sizeof(struct talitos_ptr), 0); + to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, 0); + sg_to_link_tbl(dst, sg_count, len, link_tbl_ptr); + dma_sync_single_for_device(dev, edesc->dma_link_tbl, + edesc->dma_len, + DMA_BIDIRECTIONAL); + } + } +} + /* * fill in and submit ipsec_esp descriptor */ @@ -1420,33 +1542,6 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *cipher, return 0; } -static void unmap_sg_talitos_ptr(struct device *dev, struct scatterlist *src, - struct scatterlist *dst, unsigned int len, - struct talitos_edesc *edesc) -{ - struct talitos_private *priv = dev_get_drvdata(dev); - bool is_sec1 = has_ftr_sec1(priv); - - if (is_sec1) { - if (!edesc->src_nents) { - dma_unmap_sg(dev, src, 1, - dst != src ? DMA_TO_DEVICE - : DMA_BIDIRECTIONAL); - } - if (dst && edesc->dst_nents) { - dma_sync_single_for_device(dev, - edesc->dma_link_tbl + len, - len, DMA_FROM_DEVICE); - sg_copy_from_buffer(dst, edesc->dst_nents ? : 1, - edesc->buf + len, len); - } else if (dst && dst != src) { - dma_unmap_sg(dev, dst, 1, DMA_FROM_DEVICE); - } - } else { - talitos_sg_unmap(dev, edesc, src, dst); - } -} - static void common_nonsnoop_unmap(struct device *dev, struct talitos_edesc *edesc, struct ablkcipher_request *areq) @@ -1478,101 +1573,6 @@ static void ablkcipher_done(struct device *dev, areq->base.complete(&areq->base, err); } -int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src, - unsigned int len, struct talitos_edesc *edesc, - enum dma_data_direction dir, struct talitos_ptr *ptr) -{ - int sg_count; - struct talitos_private *priv = dev_get_drvdata(dev); - bool is_sec1 = has_ftr_sec1(priv); - - to_talitos_ptr_len(ptr, len, is_sec1); - - if (is_sec1) { - sg_count = edesc->src_nents ? : 1; - - if (sg_count == 1) { - dma_map_sg(dev, src, 1, dir); - to_talitos_ptr(ptr, sg_dma_address(src), is_sec1); - } else { - sg_copy_to_buffer(src, sg_count, edesc->buf, len); - to_talitos_ptr(ptr, edesc->dma_link_tbl, is_sec1); - dma_sync_single_for_device(dev, edesc->dma_link_tbl, - len, DMA_TO_DEVICE); - } - } else { - to_talitos_ptr_ext_set(ptr, 0, is_sec1); - - sg_count = dma_map_sg(dev, src, edesc->src_nents ? : 1, dir); - - if (sg_count == 1) { - to_talitos_ptr(ptr, sg_dma_address(src), is_sec1); - } else { - sg_count = sg_to_link_tbl(src, sg_count, len, - &edesc->link_tbl[0]); - if (sg_count > 1) { - to_talitos_ptr(ptr, edesc->dma_link_tbl, 0); - to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, - 0); - dma_sync_single_for_device(dev, - edesc->dma_link_tbl, - edesc->dma_len, - DMA_BIDIRECTIONAL); - } else { - /* Only one segment now, so no link tbl needed*/ - to_talitos_ptr(ptr, sg_dma_address(src), - is_sec1); - } - } - } - return sg_count; -} - -void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst, - unsigned int len, struct talitos_edesc *edesc, - enum dma_data_direction dir, - struct talitos_ptr *ptr, int sg_count) -{ - struct talitos_private *priv = dev_get_drvdata(dev); - bool is_sec1 = has_ftr_sec1(priv); - - if (dir != DMA_NONE) - sg_count = dma_map_sg(dev, dst, edesc->dst_nents ? : 1, dir); - - to_talitos_ptr_len(ptr, len, is_sec1); - - if (is_sec1) { - if (sg_count == 1) { - if (dir != DMA_NONE) - dma_map_sg(dev, dst, 1, dir); - to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1); - } else { - to_talitos_ptr(ptr, edesc->dma_link_tbl + len, is_sec1); - dma_sync_single_for_device(dev, - edesc->dma_link_tbl + len, - len, DMA_FROM_DEVICE); - } - } else { - to_talitos_ptr_ext_set(ptr, 0, is_sec1); - - if (sg_count == 1) { - to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1); - } else { - struct talitos_ptr *link_tbl_ptr = - &edesc->link_tbl[edesc->src_nents + 1]; - - to_talitos_ptr(ptr, edesc->dma_link_tbl + - (edesc->src_nents + 1) * - sizeof(struct talitos_ptr), 0); - to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, 0); - sg_to_link_tbl(dst, sg_count, len, link_tbl_ptr); - dma_sync_single_for_device(dev, edesc->dma_link_tbl, - edesc->dma_len, - DMA_BIDIRECTIONAL); - } - } -} - static int common_nonsnoop(struct talitos_edesc *edesc, struct ablkcipher_request *areq, void (*callback) (struct device *dev, From 6a1e8d14156d4afb6a835d97170d181ea6b5238c Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 6 Jun 2016 13:20:38 +0200 Subject: [PATCH 039/180] crypto: talitos - making mapping helpers more generic In preparation of IPSEC for SEC1, first step is to make the mapping helpers more generic so that they can also be used by AEAD functions. First, the functions are moved before IPSEC functions in talitos.c talitos_sg_unmap() and unmap_sg_talitos_ptr() are merged as they are quite similar, the second one handling the SEC1 case an calling the first one for SEC2 map_sg_in_talitos_ptr() and map_sg_out_talitos_ptr() are merged into talitos_sg_map() and enhenced to support offseted zones as used for AEAD. The actual mapping is now performed outside that helper. The DMA sync is also done outside to not make it several times. talitos_edesc_alloc() size calculation are fixed to also take into account AEAD specific parts also for SEC1 Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu --- drivers/crypto/talitos.c | 230 ++++++++++++++++----------------------- 1 file changed, 93 insertions(+), 137 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index beb369e557b90..b0d3c24f7f79e 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -911,45 +911,28 @@ struct talitos_edesc { static void talitos_sg_unmap(struct device *dev, struct talitos_edesc *edesc, struct scatterlist *src, - struct scatterlist *dst) + struct scatterlist *dst, + unsigned int len, unsigned int offset) { + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); unsigned int src_nents = edesc->src_nents ? : 1; unsigned int dst_nents = edesc->dst_nents ? : 1; + if (is_sec1 && dst && dst_nents > 1) { + dma_sync_single_for_device(dev, edesc->dma_link_tbl + offset, + len, DMA_FROM_DEVICE); + sg_pcopy_from_buffer(dst, dst_nents, edesc->buf + offset, len, + offset); + } if (src != dst) { - dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE); + if (src_nents == 1 || !is_sec1) + dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE); - if (dst) { + if (dst && (dst_nents == 1 || !is_sec1)) dma_unmap_sg(dev, dst, dst_nents, DMA_FROM_DEVICE); - } - } else + } else if (src_nents == 1 || !is_sec1) { dma_unmap_sg(dev, src, src_nents, DMA_BIDIRECTIONAL); -} - -static void unmap_sg_talitos_ptr(struct device *dev, struct scatterlist *src, - struct scatterlist *dst, unsigned int len, - struct talitos_edesc *edesc) -{ - struct talitos_private *priv = dev_get_drvdata(dev); - bool is_sec1 = has_ftr_sec1(priv); - - if (is_sec1) { - if (!edesc->src_nents) { - dma_unmap_sg(dev, src, 1, - dst != src ? DMA_TO_DEVICE - : DMA_BIDIRECTIONAL); - } - if (dst && edesc->dst_nents) { - dma_sync_single_for_device(dev, - edesc->dma_link_tbl + len, - len, DMA_FROM_DEVICE); - sg_copy_from_buffer(dst, edesc->dst_nents ? : 1, - edesc->buf + len, len); - } else if (dst && dst != src) { - dma_unmap_sg(dev, dst, 1, DMA_FROM_DEVICE); - } - } else { - talitos_sg_unmap(dev, edesc, src, dst); } } @@ -962,7 +945,8 @@ static void ipsec_esp_unmap(struct device *dev, unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[0], DMA_TO_DEVICE); - talitos_sg_unmap(dev, edesc, areq->src, areq->dst); + talitos_sg_unmap(dev, edesc, areq->src, areq->dst, areq->cryptlen, + areq->assoclen); if (edesc->dma_len) dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len, @@ -1110,99 +1094,37 @@ static inline int sg_to_link_tbl(struct scatterlist *sg, int sg_count, link_tbl_ptr); } -int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src, - unsigned int len, struct talitos_edesc *edesc, - enum dma_data_direction dir, struct talitos_ptr *ptr) +int talitos_sg_map(struct device *dev, struct scatterlist *src, + unsigned int len, struct talitos_edesc *edesc, + struct talitos_ptr *ptr, + int sg_count, unsigned int offset, int tbl_off) { - int sg_count; struct talitos_private *priv = dev_get_drvdata(dev); bool is_sec1 = has_ftr_sec1(priv); to_talitos_ptr_len(ptr, len, is_sec1); + to_talitos_ptr_ext_set(ptr, 0, is_sec1); - if (is_sec1) { - sg_count = edesc->src_nents ? : 1; - - if (sg_count == 1) { - dma_map_sg(dev, src, 1, dir); - to_talitos_ptr(ptr, sg_dma_address(src), is_sec1); - } else { - sg_copy_to_buffer(src, sg_count, edesc->buf, len); - to_talitos_ptr(ptr, edesc->dma_link_tbl, is_sec1); - dma_sync_single_for_device(dev, edesc->dma_link_tbl, - len, DMA_TO_DEVICE); - } - } else { - to_talitos_ptr_ext_set(ptr, 0, is_sec1); - - sg_count = dma_map_sg(dev, src, edesc->src_nents ? : 1, dir); - - if (sg_count == 1) { - to_talitos_ptr(ptr, sg_dma_address(src), is_sec1); - } else { - sg_count = sg_to_link_tbl(src, sg_count, len, - &edesc->link_tbl[0]); - if (sg_count > 1) { - to_talitos_ptr(ptr, edesc->dma_link_tbl, 0); - to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, - 0); - dma_sync_single_for_device(dev, - edesc->dma_link_tbl, - edesc->dma_len, - DMA_BIDIRECTIONAL); - } else { - /* Only one segment now, so no link tbl needed*/ - to_talitos_ptr(ptr, sg_dma_address(src), - is_sec1); - } - } + if (sg_count == 1) { + to_talitos_ptr(ptr, sg_dma_address(src) + offset, is_sec1); + return sg_count; } - return sg_count; -} - -void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst, - unsigned int len, struct talitos_edesc *edesc, - enum dma_data_direction dir, - struct talitos_ptr *ptr, int sg_count) -{ - struct talitos_private *priv = dev_get_drvdata(dev); - bool is_sec1 = has_ftr_sec1(priv); - - if (dir != DMA_NONE) - sg_count = dma_map_sg(dev, dst, edesc->dst_nents ? : 1, dir); - - to_talitos_ptr_len(ptr, len, is_sec1); - if (is_sec1) { - if (sg_count == 1) { - if (dir != DMA_NONE) - dma_map_sg(dev, dst, 1, dir); - to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1); - } else { - to_talitos_ptr(ptr, edesc->dma_link_tbl + len, is_sec1); - dma_sync_single_for_device(dev, - edesc->dma_link_tbl + len, - len, DMA_FROM_DEVICE); - } - } else { - to_talitos_ptr_ext_set(ptr, 0, is_sec1); - - if (sg_count == 1) { - to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1); - } else { - struct talitos_ptr *link_tbl_ptr = - &edesc->link_tbl[edesc->src_nents + 1]; - - to_talitos_ptr(ptr, edesc->dma_link_tbl + - (edesc->src_nents + 1) * - sizeof(struct talitos_ptr), 0); - to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, 0); - sg_to_link_tbl(dst, sg_count, len, link_tbl_ptr); - dma_sync_single_for_device(dev, edesc->dma_link_tbl, - edesc->dma_len, - DMA_BIDIRECTIONAL); - } + to_talitos_ptr(ptr, edesc->dma_link_tbl + offset, is_sec1); + return sg_count; } + sg_count = sg_to_link_tbl_offset(src, sg_count, offset, len, + &edesc->link_tbl[tbl_off]); + if (sg_count == 1) { + /* Only one segment now, so no link tbl needed*/ + copy_talitos_ptr(ptr, &edesc->link_tbl[tbl_off], is_sec1); + return sg_count; + } + to_talitos_ptr(ptr, edesc->dma_link_tbl + + tbl_off * sizeof(struct talitos_ptr), is_sec1); + to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, is_sec1); + + return sg_count; } /* @@ -1363,7 +1285,7 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, bool encrypt) { struct talitos_edesc *edesc; - int src_nents, dst_nents, alloc_len, dma_len; + int src_nents, dst_nents, alloc_len, dma_len, src_len, dst_len; dma_addr_t iv_dma = 0; gfp_t flags = cryptoflags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; @@ -1381,8 +1303,8 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, iv_dma = dma_map_single(dev, iv, ivsize, DMA_TO_DEVICE); if (!dst || dst == src) { - src_nents = sg_nents_for_len(src, - assoclen + cryptlen + authsize); + src_len = assoclen + cryptlen + authsize; + src_nents = sg_nents_for_len(src, src_len); if (src_nents < 0) { dev_err(dev, "Invalid number of src SG.\n"); err = ERR_PTR(-EINVAL); @@ -1390,17 +1312,18 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, } src_nents = (src_nents == 1) ? 0 : src_nents; dst_nents = dst ? src_nents : 0; + dst_len = 0; } else { /* dst && dst != src*/ - src_nents = sg_nents_for_len(src, assoclen + cryptlen + - (encrypt ? 0 : authsize)); + src_len = assoclen + cryptlen + (encrypt ? 0 : authsize); + src_nents = sg_nents_for_len(src, src_len); if (src_nents < 0) { dev_err(dev, "Invalid number of src SG.\n"); err = ERR_PTR(-EINVAL); goto error_sg; } src_nents = (src_nents == 1) ? 0 : src_nents; - dst_nents = sg_nents_for_len(dst, assoclen + cryptlen + - (encrypt ? authsize : 0)); + dst_len = assoclen + cryptlen + (encrypt ? authsize : 0); + dst_nents = sg_nents_for_len(dst, dst_len); if (dst_nents < 0) { dev_err(dev, "Invalid number of dst SG.\n"); err = ERR_PTR(-EINVAL); @@ -1417,8 +1340,8 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, alloc_len = sizeof(struct talitos_edesc); if (src_nents || dst_nents) { if (is_sec1) - dma_len = (src_nents ? cryptlen : 0) + - (dst_nents ? cryptlen : 0); + dma_len = (src_nents ? src_len : 0) + + (dst_nents ? dst_len : 0); else dma_len = (src_nents + dst_nents + 2) * sizeof(struct talitos_ptr) + authsize * 2; @@ -1548,7 +1471,7 @@ static void common_nonsnoop_unmap(struct device *dev, { unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE); - unmap_sg_talitos_ptr(dev, areq->src, areq->dst, areq->nbytes, edesc); + talitos_sg_unmap(dev, edesc, areq->src, areq->dst, areq->nbytes, 0); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1], DMA_TO_DEVICE); @@ -1586,6 +1509,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc, unsigned int cryptlen = areq->nbytes; unsigned int ivsize = crypto_ablkcipher_ivsize(cipher); int sg_count, ret; + bool sync_needed = false; struct talitos_private *priv = dev_get_drvdata(dev); bool is_sec1 = has_ftr_sec1(priv); @@ -1601,19 +1525,33 @@ static int common_nonsnoop(struct talitos_edesc *edesc, map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen, (char *)&ctx->key, DMA_TO_DEVICE); + sg_count = edesc->src_nents ?: 1; + if (is_sec1 && sg_count > 1) + sg_copy_to_buffer(areq->src, sg_count, edesc->buf, + cryptlen); + else + sg_count = dma_map_sg(dev, areq->src, sg_count, + (areq->src == areq->dst) ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE); /* * cipher in */ - sg_count = map_sg_in_talitos_ptr(dev, areq->src, cryptlen, edesc, - (areq->src == areq->dst) ? - DMA_BIDIRECTIONAL : DMA_TO_DEVICE, - &desc->ptr[3]); + sg_count = talitos_sg_map(dev, areq->src, cryptlen, edesc, + &desc->ptr[3], sg_count, 0, 0); + if (sg_count > 1) + sync_needed = true; /* cipher out */ - map_sg_out_talitos_ptr(dev, areq->dst, cryptlen, edesc, - (areq->src == areq->dst) ? DMA_NONE - : DMA_FROM_DEVICE, - &desc->ptr[4], sg_count); + if (areq->src != areq->dst) { + sg_count = edesc->dst_nents ? : 1; + if (!is_sec1 || sg_count == 1) + dma_map_sg(dev, areq->dst, sg_count, DMA_FROM_DEVICE); + } + + ret = talitos_sg_map(dev, areq->dst, cryptlen, edesc, &desc->ptr[4], + sg_count, 0, (edesc->src_nents + 1)); + if (ret > 1) + sync_needed = true; /* iv out */ map_single_talitos_ptr(dev, &desc->ptr[5], ivsize, ctx->iv, @@ -1622,6 +1560,10 @@ static int common_nonsnoop(struct talitos_edesc *edesc, /* last DWORD empty */ desc->ptr[6] = zero_entry; + if (sync_needed) + dma_sync_single_for_device(dev, edesc->dma_link_tbl, + edesc->dma_len, DMA_BIDIRECTIONAL); + ret = talitos_submit(dev, ctx->ch, desc, callback, areq); if (ret != -EINPROGRESS) { common_nonsnoop_unmap(dev, edesc, areq); @@ -1685,7 +1627,7 @@ static void common_nonsnoop_hash_unmap(struct device *dev, unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE); - unmap_sg_talitos_ptr(dev, req_ctx->psrc, NULL, 0, edesc); + talitos_sg_unmap(dev, edesc, req_ctx->psrc, NULL, 0, 0); /* When using hashctx-in, must unmap it. */ if (from_talitos_ptr_len(&edesc->desc.ptr[1], is_sec1)) @@ -1756,8 +1698,10 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, struct device *dev = ctx->dev; struct talitos_desc *desc = &edesc->desc; int ret; + bool sync_needed = false; struct talitos_private *priv = dev_get_drvdata(dev); bool is_sec1 = has_ftr_sec1(priv); + int sg_count; /* first DWORD empty */ desc->ptr[0] = zero_entry; @@ -1782,11 +1726,19 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, else desc->ptr[2] = zero_entry; + sg_count = edesc->src_nents ?: 1; + if (is_sec1 && sg_count > 1) + sg_copy_to_buffer(areq->src, sg_count, edesc->buf, length); + else + sg_count = dma_map_sg(dev, req_ctx->psrc, sg_count, + DMA_TO_DEVICE); /* * data in */ - map_sg_in_talitos_ptr(dev, req_ctx->psrc, length, edesc, - DMA_TO_DEVICE, &desc->ptr[3]); + sg_count = talitos_sg_map(dev, req_ctx->psrc, length, edesc, + &desc->ptr[3], sg_count, 0, 0); + if (sg_count > 1) + sync_needed = true; /* fifth DWORD empty */ desc->ptr[4] = zero_entry; @@ -1807,6 +1759,10 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, if (is_sec1 && from_talitos_ptr_len(&desc->ptr[3], true) == 0) talitos_handle_buggy_hash(ctx, edesc, &desc->ptr[3]); + if (sync_needed) + dma_sync_single_for_device(dev, edesc->dma_link_tbl, + edesc->dma_len, DMA_BIDIRECTIONAL); + ret = talitos_submit(dev, ctx->ch, desc, callback, areq); if (ret != -EINPROGRESS) { common_nonsnoop_hash_unmap(dev, edesc, areq); From 549bd8bc598759607b5ffc2eb052a3c99a85552e Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 6 Jun 2016 13:20:40 +0200 Subject: [PATCH 040/180] crypto: talitos - Implement AEAD for SEC1 using HMAC_SNOOP_NO_AFEU This patchs enhances the IPSEC_ESP related functions for them to also supports the same operations with descriptor type HMAC_SNOOP_NO_AFEU. The differences between the two descriptor types are: * pointeurs 2 and 3 are swaped (Confidentiality key and Primary EU Context IN) * HMAC_SNOOP_NO_AFEU has CICV out in pointer 6 * HMAC_SNOOP_NO_AFEU has no primary EU context out so we get it from the end of data out Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu --- drivers/crypto/talitos.c | 209 +++++++++++++++++++++++---------------- 1 file changed, 124 insertions(+), 85 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index b0d3c24f7f79e..4ff03c3f62cd3 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -940,7 +940,13 @@ static void ipsec_esp_unmap(struct device *dev, struct talitos_edesc *edesc, struct aead_request *areq) { - unmap_single_talitos_ptr(dev, &edesc->desc.ptr[6], DMA_FROM_DEVICE); + struct crypto_aead *aead = crypto_aead_reqtfm(areq); + struct talitos_ctx *ctx = crypto_aead_ctx(aead); + unsigned int ivsize = crypto_aead_ivsize(aead); + + if (edesc->desc.hdr & DESC_HDR_TYPE_IPSEC_ESP) + unmap_single_talitos_ptr(dev, &edesc->desc.ptr[6], + DMA_FROM_DEVICE); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[3], DMA_TO_DEVICE); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[0], DMA_TO_DEVICE); @@ -951,6 +957,13 @@ static void ipsec_esp_unmap(struct device *dev, if (edesc->dma_len) dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); + + if (!(edesc->desc.hdr & DESC_HDR_TYPE_IPSEC_ESP)) { + unsigned int dst_nents = edesc->dst_nents ? : 1; + + sg_pcopy_to_buffer(areq->dst, dst_nents, ctx->iv, ivsize, + areq->assoclen + areq->cryptlen - ivsize); + } } /* @@ -960,6 +973,8 @@ static void ipsec_esp_encrypt_done(struct device *dev, struct talitos_desc *desc, void *context, int err) { + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); struct aead_request *areq = context; struct crypto_aead *authenc = crypto_aead_reqtfm(areq); unsigned int authsize = crypto_aead_authsize(authenc); @@ -973,8 +988,11 @@ static void ipsec_esp_encrypt_done(struct device *dev, /* copy the generated ICV to dst */ if (edesc->icv_ool) { - icvdata = &edesc->link_tbl[edesc->src_nents + - edesc->dst_nents + 2]; + if (is_sec1) + icvdata = edesc->buf + areq->assoclen + areq->cryptlen; + else + icvdata = &edesc->link_tbl[edesc->src_nents + + edesc->dst_nents + 2]; sg = sg_last(areq->dst, edesc->dst_nents); memcpy((char *)sg_virt(sg) + sg->length - authsize, icvdata, authsize); @@ -995,6 +1013,8 @@ static void ipsec_esp_decrypt_swauth_done(struct device *dev, struct talitos_edesc *edesc; struct scatterlist *sg; char *oicv, *icv; + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); edesc = container_of(desc, struct talitos_edesc, desc); @@ -1006,7 +1026,12 @@ static void ipsec_esp_decrypt_swauth_done(struct device *dev, icv = (char *)sg_virt(sg) + sg->length - authsize; if (edesc->dma_len) { - oicv = (char *)&edesc->link_tbl[edesc->src_nents + + if (is_sec1) + oicv = (char *)&edesc->dma_link_tbl + + req->assoclen + req->cryptlen; + else + oicv = (char *) + &edesc->link_tbl[edesc->src_nents + edesc->dst_nents + 2]; if (edesc->icv_ool) icv = oicv + authsize; @@ -1145,42 +1170,52 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, int tbl_off = 0; int sg_count, ret; int sg_link_tbl_len; + bool sync_needed = false; + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); /* hmac key */ map_single_talitos_ptr(dev, &desc->ptr[0], ctx->authkeylen, &ctx->key, DMA_TO_DEVICE); - sg_count = dma_map_sg(dev, areq->src, edesc->src_nents ?: 1, - (areq->src == areq->dst) ? DMA_BIDIRECTIONAL - : DMA_TO_DEVICE); - /* hmac data */ - to_talitos_ptr_len(&desc->ptr[1], areq->assoclen, 0); - if (sg_count > 1 && - (ret = sg_to_link_tbl_offset(areq->src, sg_count, 0, - areq->assoclen, - &edesc->link_tbl[tbl_off])) > 1) { - to_talitos_ptr(&desc->ptr[1], edesc->dma_link_tbl + tbl_off * - sizeof(struct talitos_ptr), 0); - to_talitos_ptr_ext_set(&desc->ptr[1], DESC_PTR_LNKTBL_JUMP, 0); + sg_count = edesc->src_nents ?: 1; + if (is_sec1 && sg_count > 1) + sg_copy_to_buffer(areq->src, sg_count, edesc->buf, + areq->assoclen + cryptlen); + else + sg_count = dma_map_sg(dev, areq->src, sg_count, + (areq->src == areq->dst) ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE); - dma_sync_single_for_device(dev, edesc->dma_link_tbl, - edesc->dma_len, DMA_BIDIRECTIONAL); + /* hmac data */ + ret = talitos_sg_map(dev, areq->src, areq->assoclen, edesc, + &desc->ptr[1], sg_count, 0, tbl_off); + if (ret > 1) { tbl_off += ret; - } else { - to_talitos_ptr(&desc->ptr[1], sg_dma_address(areq->src), 0); - to_talitos_ptr_ext_set(&desc->ptr[1], 0, 0); + sync_needed = true; } /* cipher iv */ - to_talitos_ptr(&desc->ptr[2], edesc->iv_dma, 0); - to_talitos_ptr_len(&desc->ptr[2], ivsize, 0); - to_talitos_ptr_ext_set(&desc->ptr[2], 0, 0); + if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) { + to_talitos_ptr(&desc->ptr[2], edesc->iv_dma, is_sec1); + to_talitos_ptr_len(&desc->ptr[2], ivsize, is_sec1); + to_talitos_ptr_ext_set(&desc->ptr[2], 0, is_sec1); + } else { + to_talitos_ptr(&desc->ptr[3], edesc->iv_dma, is_sec1); + to_talitos_ptr_len(&desc->ptr[3], ivsize, is_sec1); + to_talitos_ptr_ext_set(&desc->ptr[3], 0, is_sec1); + } /* cipher key */ - map_single_talitos_ptr(dev, &desc->ptr[3], ctx->enckeylen, - (char *)&ctx->key + ctx->authkeylen, - DMA_TO_DEVICE); + if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) + map_single_talitos_ptr(dev, &desc->ptr[3], ctx->enckeylen, + (char *)&ctx->key + ctx->authkeylen, + DMA_TO_DEVICE); + else + map_single_talitos_ptr(dev, &desc->ptr[2], ctx->enckeylen, + (char *)&ctx->key + ctx->authkeylen, + DMA_TO_DEVICE); /* * cipher in @@ -1188,78 +1223,82 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, * extent is bytes of HMAC postpended to ciphertext, * typically 12 for ipsec */ - to_talitos_ptr_len(&desc->ptr[4], cryptlen, 0); - to_talitos_ptr_ext_set(&desc->ptr[4], authsize, 0); + to_talitos_ptr_len(&desc->ptr[4], cryptlen, is_sec1); + to_talitos_ptr_ext_set(&desc->ptr[4], 0, is_sec1); sg_link_tbl_len = cryptlen; - if (edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV) - sg_link_tbl_len += authsize; - if (sg_count == 1) { - to_talitos_ptr(&desc->ptr[4], sg_dma_address(areq->src) + - areq->assoclen, 0); - } else if ((ret = sg_to_link_tbl_offset(areq->src, sg_count, - areq->assoclen, sg_link_tbl_len, - &edesc->link_tbl[tbl_off])) > - 1) { - to_talitos_ptr_ext_or(&desc->ptr[4], DESC_PTR_LNKTBL_JUMP, 0); - to_talitos_ptr(&desc->ptr[4], edesc->dma_link_tbl + - tbl_off * - sizeof(struct talitos_ptr), 0); - dma_sync_single_for_device(dev, edesc->dma_link_tbl, - edesc->dma_len, - DMA_BIDIRECTIONAL); - tbl_off += ret; - } else { - copy_talitos_ptr(&desc->ptr[4], &edesc->link_tbl[tbl_off], 0); + if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) { + to_talitos_ptr_ext_set(&desc->ptr[4], authsize, is_sec1); + + if (edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV) + sg_link_tbl_len += authsize; } - /* cipher out */ - to_talitos_ptr_len(&desc->ptr[5], cryptlen, 0); - to_talitos_ptr_ext_set(&desc->ptr[5], authsize, 0); + sg_count = talitos_sg_map(dev, areq->src, cryptlen, edesc, + &desc->ptr[4], sg_count, areq->assoclen, + tbl_off); + + if (sg_count > 1) { + tbl_off += sg_count; + sync_needed = true; + } - if (areq->src != areq->dst) - sg_count = dma_map_sg(dev, areq->dst, edesc->dst_nents ? : 1, - DMA_FROM_DEVICE); + /* cipher out */ + if (areq->src != areq->dst) { + sg_count = edesc->dst_nents ? : 1; + if (!is_sec1 || sg_count == 1) + dma_map_sg(dev, areq->dst, sg_count, DMA_FROM_DEVICE); + } - edesc->icv_ool = false; + sg_count = talitos_sg_map(dev, areq->dst, cryptlen, edesc, + &desc->ptr[5], sg_count, areq->assoclen, + tbl_off); - if (sg_count == 1) { - to_talitos_ptr(&desc->ptr[5], sg_dma_address(areq->dst) + - areq->assoclen, 0); - } else if ((sg_count = - sg_to_link_tbl_offset(areq->dst, sg_count, - areq->assoclen, cryptlen, - &edesc->link_tbl[tbl_off])) > 1) { - struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off]; - - to_talitos_ptr(&desc->ptr[5], edesc->dma_link_tbl + - tbl_off * sizeof(struct talitos_ptr), 0); - - /* Add an entry to the link table for ICV data */ - tbl_ptr += sg_count - 1; - to_talitos_ptr_ext_set(tbl_ptr, 0, 0); - tbl_ptr++; - to_talitos_ptr_ext_set(tbl_ptr, DESC_PTR_LNKTBL_RETURN, 0); - to_talitos_ptr_len(tbl_ptr, authsize, 0); - - /* icv data follows link tables */ - to_talitos_ptr(tbl_ptr, edesc->dma_link_tbl + - (edesc->src_nents + edesc->dst_nents + - 2) * sizeof(struct talitos_ptr) + - authsize, 0); - to_talitos_ptr_ext_or(&desc->ptr[5], DESC_PTR_LNKTBL_JUMP, 0); - dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl, - edesc->dma_len, DMA_BIDIRECTIONAL); + if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) + to_talitos_ptr_ext_or(&desc->ptr[5], authsize, is_sec1); + if (sg_count > 1) { edesc->icv_ool = true; + sync_needed = true; + + if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) { + struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off]; + int offset = (edesc->src_nents + edesc->dst_nents + 2) * + sizeof(struct talitos_ptr) + authsize; + + /* Add an entry to the link table for ICV data */ + tbl_ptr += sg_count - 1; + to_talitos_ptr_ext_set(tbl_ptr, 0, is_sec1); + tbl_ptr++; + to_talitos_ptr_ext_set(tbl_ptr, DESC_PTR_LNKTBL_RETURN, + is_sec1); + to_talitos_ptr_len(tbl_ptr, authsize, is_sec1); + + /* icv data follows link tables */ + to_talitos_ptr(tbl_ptr, edesc->dma_link_tbl + offset, + is_sec1); + } } else { - copy_talitos_ptr(&desc->ptr[5], &edesc->link_tbl[tbl_off], 0); + edesc->icv_ool = false; + } + + /* ICV data */ + if (!(desc->hdr & DESC_HDR_TYPE_IPSEC_ESP)) { + to_talitos_ptr_len(&desc->ptr[6], authsize, is_sec1); + to_talitos_ptr(&desc->ptr[6], edesc->dma_link_tbl + + areq->assoclen + cryptlen, is_sec1); } /* iv out */ - map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv, - DMA_FROM_DEVICE); + if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) + map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv, + DMA_FROM_DEVICE); + + if (sync_needed) + dma_sync_single_for_device(dev, edesc->dma_link_tbl, + edesc->dma_len, + DMA_BIDIRECTIONAL); ret = talitos_submit(dev, ctx->ch, desc, callback, areq); if (ret != -EINPROGRESS) { From e968b49f4d57a28d5e6ada6e92d4937ca0743793 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 6 Jun 2016 13:20:42 +0200 Subject: [PATCH 041/180] crypto: talitos - sg_to_link_tbl() not used anymore, remove it Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu --- drivers/crypto/talitos.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 4ff03c3f62cd3..ff8cf39dfc631 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1111,14 +1111,6 @@ static int sg_to_link_tbl_offset(struct scatterlist *sg, int sg_count, return count; } -static inline int sg_to_link_tbl(struct scatterlist *sg, int sg_count, - int cryptlen, - struct talitos_ptr *link_tbl_ptr) -{ - return sg_to_link_tbl_offset(sg, sg_count, 0, cryptlen, - link_tbl_ptr); -} - int talitos_sg_map(struct device *dev, struct scatterlist *src, unsigned int len, struct talitos_edesc *edesc, struct talitos_ptr *ptr, From b00577639d1970c8075d70f6180be83574712855 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 6 Jun 2016 13:20:44 +0200 Subject: [PATCH 042/180] crypto: talitos - implement cra_priority SEC1 doesn't have IPSEC_ESP descriptor type but it is able to perform IPSEC using HMAC_SNOOP_NO_AFEU, which is also existing on SEC2 In order to be able to define descriptors templates for SEC1 without breaking SEC2+, we have to give lower priority to HMAC_SNOOP_NO_AFEU so that SEC2+ selects IPSEC_ESP and not HMAC_SNOOP_NO_AFEU which is less performant. This is done by adding a priority field in the template. If the field is 0, we use the default priority, otherwise we used the one in the field. Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu --- drivers/crypto/talitos.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index ff8cf39dfc631..dfd3a93e49546 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -2120,6 +2120,7 @@ static int ahash_setkey(struct crypto_ahash *tfm, const u8 *key, struct talitos_alg_template { u32 type; + u32 priority; union { struct crypto_alg crypto; struct ahash_alg hash; @@ -2897,7 +2898,10 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev, } alg->cra_module = THIS_MODULE; - alg->cra_priority = TALITOS_CRA_PRIORITY; + if (t_alg->algt.priority) + alg->cra_priority = t_alg->algt.priority; + else + alg->cra_priority = TALITOS_CRA_PRIORITY; alg->cra_alignmask = 0; alg->cra_ctxsize = sizeof(struct talitos_ctx); alg->cra_flags |= CRYPTO_ALG_KERN_DRIVER_ONLY; From 7405c8d7ff979f5a5baa79006b8d2704e316f581 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 6 Jun 2016 13:20:46 +0200 Subject: [PATCH 043/180] crypto: talitos - templates for AEAD using HMAC_SNOOP_NO_AFEU This will allow IPSEC on SEC1 Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu --- drivers/crypto/talitos.c | 180 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 180 insertions(+) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index dfd3a93e49546..0418a2f41dc08 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -811,6 +811,11 @@ static void talitos_unregister_rng(struct device *dev) * crypto alg */ #define TALITOS_CRA_PRIORITY 3000 +/* + * Defines a priority for doing AEAD with descriptors type + * HMAC_SNOOP_NO_AFEA (HSNA) instead of type IPSEC_ESP + */ +#define TALITOS_CRA_PRIORITY_AEAD_HSNA (TALITOS_CRA_PRIORITY - 1) #define TALITOS_MAX_KEY_SIZE 96 #define TALITOS_MAX_IV_LENGTH 16 /* max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */ @@ -2151,6 +2156,27 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_SHA1_HMAC, }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha1),cbc(aes))", + .cra_driver_name = "authenc-hmac-sha1-" + "cbc-aes-talitos", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_AESU | + DESC_HDR_MODE0_AESU_CBC | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA1_HMAC, + }, { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { @@ -2173,6 +2199,29 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_SHA1_HMAC, }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha1)," + "cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-sha1-" + "cbc-3des-talitos", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_DEU | + DESC_HDR_MODE0_DEU_CBC | + DESC_HDR_MODE0_DEU_3DES | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA1_HMAC, + }, { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { @@ -2193,6 +2242,27 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_SHA224_HMAC, }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha224),cbc(aes))", + .cra_driver_name = "authenc-hmac-sha224-" + "cbc-aes-talitos", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA224_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_AESU | + DESC_HDR_MODE0_AESU_CBC | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA224_HMAC, + }, { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { @@ -2215,6 +2285,29 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_SHA224_HMAC, }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha224)," + "cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-sha224-" + "cbc-3des-talitos", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = SHA224_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_DEU | + DESC_HDR_MODE0_DEU_CBC | + DESC_HDR_MODE0_DEU_3DES | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA224_HMAC, + }, { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { @@ -2235,6 +2328,27 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_SHA256_HMAC, }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha256),cbc(aes))", + .cra_driver_name = "authenc-hmac-sha256-" + "cbc-aes-talitos", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_AESU | + DESC_HDR_MODE0_AESU_CBC | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA256_HMAC, + }, { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { @@ -2257,6 +2371,29 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_SHA256_HMAC, }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha256)," + "cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-sha256-" + "cbc-3des-talitos", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_DEU | + DESC_HDR_MODE0_DEU_CBC | + DESC_HDR_MODE0_DEU_3DES | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA256_HMAC, + }, { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { @@ -2361,6 +2498,27 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_MD5_HMAC, }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(md5),cbc(aes))", + .cra_driver_name = "authenc-hmac-md5-" + "cbc-aes-talitos", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = MD5_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_AESU | + DESC_HDR_MODE0_AESU_CBC | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_MD5_HMAC, + }, { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { @@ -2382,6 +2540,28 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_MD5_HMAC, }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(md5),cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-md5-" + "cbc-3des-talitos", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = MD5_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_DEU | + DESC_HDR_MODE0_DEU_CBC | + DESC_HDR_MODE0_DEU_3DES | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_MD5_HMAC, + }, /* ABLKCIPHER algorithms. */ { .type = CRYPTO_ALG_TYPE_ABLKCIPHER, .alg.crypto = { From 773b197972bebcbd9eb3cd4d2688a68619eef85c Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Wed, 8 Jun 2016 02:47:47 +0530 Subject: [PATCH 044/180] crypto: qat - Remove deprecated create_workqueue alloc_workqueue replaces deprecated create_workqueue(). The workqueue device_reset_wq has workitem &reset_data->reset_work per adf_reset_dev_data. The workqueue pf2vf_resp_wq is a workqueue for PF2VF responses has workitem &pf2vf_resp->pf2vf_resp_work per pf2vf_resp. The workqueue adf_vf_stop_wq is used to call adf_dev_stop() asynchronously. Dedicated workqueues have been used in all cases since the workitems on the workqueues are involved in operation of crypto which can be used in the IO path which is depended upon during memory reclaim. Hence, WQ_MEM_RECLAIM has been set to gurantee forward progress under memory pressure. Since there are only a fixed number of work items, explicit concurrency limit is unnecessary. Signed-off-by: Bhaktipriya Shridhar Acked-by: Tejun Heo Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_aer.c | 3 ++- drivers/crypto/qat/qat_common/adf_sriov.c | 2 +- drivers/crypto/qat/qat_common/adf_vf_isr.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c index b40d9c8dad964..7bfb57f1bcb52 100644 --- a/drivers/crypto/qat/qat_common/adf_aer.c +++ b/drivers/crypto/qat/qat_common/adf_aer.c @@ -243,7 +243,8 @@ EXPORT_SYMBOL_GPL(adf_disable_aer); int adf_init_aer(void) { - device_reset_wq = create_workqueue("qat_device_reset_wq"); + device_reset_wq = alloc_workqueue("qat_device_reset_wq", + WQ_MEM_RECLAIM, 0); return !device_reset_wq ? -EFAULT : 0; } diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c index 4a526e2f1d7ff..9320ae1d005bb 100644 --- a/drivers/crypto/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/qat/qat_common/adf_sriov.c @@ -292,7 +292,7 @@ EXPORT_SYMBOL_GPL(adf_sriov_configure); int __init adf_init_pf_wq(void) { /* Workqueue for PF2VF responses */ - pf2vf_resp_wq = create_workqueue("qat_pf2vf_resp_wq"); + pf2vf_resp_wq = alloc_workqueue("qat_pf2vf_resp_wq", WQ_MEM_RECLAIM, 0); return !pf2vf_resp_wq ? -ENOMEM : 0; } diff --git a/drivers/crypto/qat/qat_common/adf_vf_isr.c b/drivers/crypto/qat/qat_common/adf_vf_isr.c index aa689cabedb4b..bf99e11a3403d 100644 --- a/drivers/crypto/qat/qat_common/adf_vf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_vf_isr.c @@ -321,7 +321,7 @@ EXPORT_SYMBOL_GPL(adf_vf_isr_resource_alloc); int __init adf_init_vf_wq(void) { - adf_vf_stop_wq = create_workqueue("adf_vf_stop_wq"); + adf_vf_stop_wq = alloc_workqueue("adf_vf_stop_wq", WQ_MEM_RECLAIM, 0); return !adf_vf_stop_wq ? -EFAULT : 0; } From 5ad67c14510b74f3b705bbe6413287a0943e5800 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 8 Jun 2016 19:31:09 +0100 Subject: [PATCH 045/180] hwrng: exynos - fixup IO accesors The __raw IO functions are not endian safe, so use the readl_relaxed and writel_relaxed versions of these. Signed-off-by: Ben Dooks Signed-off-by: Herbert Xu --- drivers/char/hw_random/exynos-rng.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/hw_random/exynos-rng.c b/drivers/char/hw_random/exynos-rng.c index ed44561ea6475..23d358553b21f 100644 --- a/drivers/char/hw_random/exynos-rng.c +++ b/drivers/char/hw_random/exynos-rng.c @@ -45,12 +45,12 @@ struct exynos_rng { static u32 exynos_rng_readl(struct exynos_rng *rng, u32 offset) { - return __raw_readl(rng->mem + offset); + return readl_relaxed(rng->mem + offset); } static void exynos_rng_writel(struct exynos_rng *rng, u32 val, u32 offset) { - __raw_writel(val, rng->mem + offset); + writel_relaxed(val, rng->mem + offset); } static int exynos_rng_configure(struct exynos_rng *exynos_rng) From e123be16120df6cdda4e1811a49326125f0aa4dd Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Fri, 10 Jun 2016 10:21:53 +0200 Subject: [PATCH 046/180] hwrng: meson - Add Amlogic Meson Hardware Random Generator Add support for the Amlogic Meson SoCs hardware random generator. Signed-off-by: Neil Armstrong Signed-off-by: Herbert Xu --- drivers/char/hw_random/Kconfig | 14 +++ drivers/char/hw_random/Makefile | 1 + drivers/char/hw_random/meson-rng.c | 131 +++++++++++++++++++++++++++++ 3 files changed, 146 insertions(+) create mode 100644 drivers/char/hw_random/meson-rng.c diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index 43d527ea90146..51401100466ba 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -396,6 +396,20 @@ config HW_RANDOM_PIC32 If unsure, say Y. +config HW_RANDOM_MESON + tristate "Amlogic Meson Random Number Generator support" + depends on HW_RANDOM + depends on ARCH_MESON || COMPILE_TEST + default y + ---help--- + This driver provides kernel-side support for the Random Number + Generator hardware found on Amlogic Meson SoCs. + + To compile this driver as a module, choose M here. the + module will be called meson-rng. + + If unsure, say Y. + endif # HW_RANDOM config UML_RANDOM diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile index 63022b49f160a..04bb0b03356ff 100644 --- a/drivers/char/hw_random/Makefile +++ b/drivers/char/hw_random/Makefile @@ -34,3 +34,4 @@ obj-$(CONFIG_HW_RANDOM_ST) += st-rng.o obj-$(CONFIG_HW_RANDOM_XGENE) += xgene-rng.o obj-$(CONFIG_HW_RANDOM_STM32) += stm32-rng.o obj-$(CONFIG_HW_RANDOM_PIC32) += pic32-rng.o +obj-$(CONFIG_HW_RANDOM_MESON) += meson-rng.o diff --git a/drivers/char/hw_random/meson-rng.c b/drivers/char/hw_random/meson-rng.c new file mode 100644 index 0000000000000..0cfd81bcaeacf --- /dev/null +++ b/drivers/char/hw_random/meson-rng.c @@ -0,0 +1,131 @@ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright (c) 2016 BayLibre, SAS. + * Author: Neil Armstrong + * Copyright (C) 2014 Amlogic, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + * The full GNU General Public License is included in this distribution + * in the file called COPYING. + * + * BSD LICENSE + * + * Copyright (c) 2016 BayLibre, SAS. + * Author: Neil Armstrong + * Copyright (C) 2014 Amlogic, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#define RNG_DATA 0x00 + +struct meson_rng_data { + void __iomem *base; + struct platform_device *pdev; + struct hwrng rng; +}; + +static int meson_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait) +{ + struct meson_rng_data *data = + container_of(rng, struct meson_rng_data, rng); + + if (max < sizeof(u32)) + return 0; + + *(u32 *)buf = readl_relaxed(data->base + RNG_DATA); + + return sizeof(u32); +} + +static int meson_rng_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct meson_rng_data *data; + struct resource *res; + + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->pdev = pdev; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + data->base = devm_ioremap_resource(dev, res); + if (IS_ERR(data->base)) + return PTR_ERR(data->base); + + data->rng.name = pdev->name; + data->rng.read = meson_rng_read; + + platform_set_drvdata(pdev, data); + + return devm_hwrng_register(dev, &data->rng); +} + +static const struct of_device_id meson_rng_of_match[] = { + { .compatible = "amlogic,meson-rng", }, + {}, +}; + +static struct platform_driver meson_rng_driver = { + .probe = meson_rng_probe, + .driver = { + .name = "meson-rng", + .of_match_table = meson_rng_of_match, + }, +}; + +module_platform_driver(meson_rng_driver); + +MODULE_ALIAS("platform:meson-rng"); +MODULE_DESCRIPTION("Meson H/W Random Number Generator driver"); +MODULE_AUTHOR("Lawrence Mok "); +MODULE_AUTHOR("Neil Armstrong "); +MODULE_LICENSE("Dual BSD/GPL"); From 355912852115cd8aa4ad02c25182ae615ce925fb Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Tue, 14 Jun 2016 07:34:13 +0200 Subject: [PATCH 047/180] crypto: drbg - use CTR AES instead of ECB AES The CTR DRBG derives its random data from the CTR that is encrypted with AES. This patch now changes the CTR DRBG implementation such that the CTR AES mode is employed. This allows the use of steamlined CTR AES implementation such as ctr-aes-aesni. Unfortunately there are the following subtile changes we need to apply when using the CTR AES mode: - the CTR mode increments the counter after the cipher operation, but the CTR DRBG requires the increment before the cipher op. Hence, the crypto_inc is applied to the counter (drbg->V) once it is recalculated. - the CTR mode wants to encrypt data, but the CTR DRBG is interested in the encrypted counter only. The full CTR mode is the XOR of the encrypted counter with the plaintext data. To access the encrypted counter, the patch uses a NULL data vector as plaintext to be "encrypted". Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/Kconfig | 1 + crypto/drbg.c | 193 ++++++++++++++++++++++++++++++------------ include/crypto/drbg.h | 9 ++ 3 files changed, 149 insertions(+), 54 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 1d33beb6a1ae5..c903f1832f2c7 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1567,6 +1567,7 @@ config CRYPTO_DRBG_HASH config CRYPTO_DRBG_CTR bool "Enable CTR DRBG" select CRYPTO_AES + depends on CRYPTO_CTR help Enable the CTR DRBG variant as defined in NIST SP800-90A. diff --git a/crypto/drbg.c b/crypto/drbg.c index 0aca2b908c766..4ee1a9c794200 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -258,6 +258,7 @@ static int drbg_kcapi_sym(struct drbg_state *drbg, unsigned char *outval, const struct drbg_string *in); static int drbg_init_sym_kernel(struct drbg_state *drbg); static int drbg_fini_sym_kernel(struct drbg_state *drbg); +static int drbg_kcapi_sym_ctr(struct drbg_state *drbg, u8 *outbuf, u32 outlen); /* BCC function for CTR DRBG as defined in 10.4.3 */ static int drbg_ctr_bcc(struct drbg_state *drbg, @@ -482,36 +483,37 @@ static int drbg_ctr_update(struct drbg_state *drbg, struct list_head *seed, drbg_blocklen(drbg); unsigned char *temp_p, *df_data_p; /* pointer to iterate over buffers */ unsigned int len = 0; - struct drbg_string cipherin; if (3 > reseed) memset(df_data, 0, drbg_statelen(drbg)); - /* 10.2.1.3.2 step 2 and 10.2.1.4.2 step 2 */ - if (seed) { - ret = drbg_ctr_df(drbg, df_data, drbg_statelen(drbg), seed); + if (!reseed) { + /* + * The DRBG uses the CTR mode of the underlying AES cipher. The + * CTR mode increments the counter value after the AES operation + * but SP800-90A requires that the counter is incremented before + * the AES operation. Hence, we increment it at the time we set + * it by one. + */ + crypto_inc(drbg->V, drbg_blocklen(drbg)); + + ret = crypto_skcipher_setkey(drbg->ctr_handle, drbg->C, + drbg_keylen(drbg)); if (ret) goto out; - drbg_kcapi_symsetkey(drbg, drbg->C); } - drbg_string_fill(&cipherin, drbg->V, drbg_blocklen(drbg)); - /* - * 10.2.1.3.2 steps 2 and 3 are already covered as the allocation - * zeroizes all memory during initialization - */ - while (len < (drbg_statelen(drbg))) { - /* 10.2.1.2 step 2.1 */ - crypto_inc(drbg->V, drbg_blocklen(drbg)); - /* - * 10.2.1.2 step 2.2 */ - ret = drbg_kcapi_sym(drbg, temp + len, &cipherin); + /* 10.2.1.3.2 step 2 and 10.2.1.4.2 step 2 */ + if (seed) { + ret = drbg_ctr_df(drbg, df_data, drbg_statelen(drbg), seed); if (ret) goto out; - /* 10.2.1.2 step 2.3 and 3 */ - len += drbg_blocklen(drbg); } + ret = drbg_kcapi_sym_ctr(drbg, temp, drbg_statelen(drbg)); + if (ret) + return ret; + /* 10.2.1.2 step 4 */ temp_p = temp; df_data_p = df_data; @@ -522,9 +524,14 @@ static int drbg_ctr_update(struct drbg_state *drbg, struct list_head *seed, /* 10.2.1.2 step 5 */ memcpy(drbg->C, temp, drbg_keylen(drbg)); - drbg_kcapi_symsetkey(drbg, drbg->C); + ret = crypto_skcipher_setkey(drbg->ctr_handle, drbg->C, + drbg_keylen(drbg)); + if (ret) + goto out; /* 10.2.1.2 step 6 */ memcpy(drbg->V, temp + drbg_keylen(drbg), drbg_blocklen(drbg)); + /* See above: increment counter by one to compensate timing of CTR op */ + crypto_inc(drbg->V, drbg_blocklen(drbg)); ret = 0; out: @@ -543,46 +550,26 @@ static int drbg_ctr_generate(struct drbg_state *drbg, unsigned char *buf, unsigned int buflen, struct list_head *addtl) { - int len = 0; - int ret = 0; - struct drbg_string data; + int ret; + int len = min_t(int, buflen, INT_MAX); /* 10.2.1.5.2 step 2 */ if (addtl && !list_empty(addtl)) { ret = drbg_ctr_update(drbg, addtl, 2); if (ret) return 0; - drbg_kcapi_symsetkey(drbg, drbg->C); } /* 10.2.1.5.2 step 4.1 */ - crypto_inc(drbg->V, drbg_blocklen(drbg)); - drbg_string_fill(&data, drbg->V, drbg_blocklen(drbg)); - while (len < buflen) { - int outlen = 0; - /* 10.2.1.5.2 step 4.2 */ - ret = drbg_kcapi_sym(drbg, drbg->scratchpad, &data); - if (ret) { - len = ret; - goto out; - } - outlen = (drbg_blocklen(drbg) < (buflen - len)) ? - drbg_blocklen(drbg) : (buflen - len); - /* 10.2.1.5.2 step 4.3 */ - memcpy(buf + len, drbg->scratchpad, outlen); - len += outlen; - /* 10.2.1.5.2 step 6 */ - if (len < buflen) - crypto_inc(drbg->V, drbg_blocklen(drbg)); - } + ret = drbg_kcapi_sym_ctr(drbg, buf, len); + if (ret) + return ret; /* 10.2.1.5.2 step 6 */ ret = drbg_ctr_update(drbg, NULL, 3); if (ret) len = ret; -out: - memset(drbg->scratchpad, 0, drbg_blocklen(drbg)); return len; } @@ -1634,10 +1621,46 @@ static int drbg_kcapi_hash(struct drbg_state *drbg, unsigned char *outval, #endif /* (CONFIG_CRYPTO_DRBG_HASH || CONFIG_CRYPTO_DRBG_HMAC) */ #ifdef CONFIG_CRYPTO_DRBG_CTR +static int drbg_fini_sym_kernel(struct drbg_state *drbg) +{ + struct crypto_cipher *tfm = + (struct crypto_cipher *)drbg->priv_data; + if (tfm) + crypto_free_cipher(tfm); + drbg->priv_data = NULL; + + if (drbg->ctr_handle) + crypto_free_skcipher(drbg->ctr_handle); + drbg->ctr_handle = NULL; + + if (drbg->ctr_req) + skcipher_request_free(drbg->ctr_req);; + drbg->ctr_req = NULL; + + kfree(drbg->ctr_null_value_buf); + drbg->ctr_null_value = NULL; + + return 0; +} + +static void drbg_skcipher_cb(struct crypto_async_request *req, int error) +{ + struct drbg_state *drbg = req->data; + + if (error == -EINPROGRESS) + return; + drbg->ctr_async_err = error; + complete(&drbg->ctr_completion); +} + +#define DRBG_CTR_NULL_LEN 128 static int drbg_init_sym_kernel(struct drbg_state *drbg) { - int ret = 0; struct crypto_cipher *tfm; + struct crypto_skcipher *sk_tfm; + struct skcipher_request *req; + unsigned int alignmask; + char ctr_name[CRYPTO_MAX_ALG_NAME]; tfm = crypto_alloc_cipher(drbg->core->backend_cra_name, 0, 0); if (IS_ERR(tfm)) { @@ -1647,16 +1670,41 @@ static int drbg_init_sym_kernel(struct drbg_state *drbg) } BUG_ON(drbg_blocklen(drbg) != crypto_cipher_blocksize(tfm)); drbg->priv_data = tfm; - return ret; -} -static int drbg_fini_sym_kernel(struct drbg_state *drbg) -{ - struct crypto_cipher *tfm = - (struct crypto_cipher *)drbg->priv_data; - if (tfm) - crypto_free_cipher(tfm); - drbg->priv_data = NULL; + if (snprintf(ctr_name, CRYPTO_MAX_ALG_NAME, "ctr(%s)", + drbg->core->backend_cra_name) >= CRYPTO_MAX_ALG_NAME) { + drbg_fini_sym_kernel(drbg); + return -EINVAL; + } + sk_tfm = crypto_alloc_skcipher(ctr_name, 0, 0); + if (IS_ERR(sk_tfm)) { + pr_info("DRBG: could not allocate CTR cipher TFM handle: %s\n", + ctr_name); + drbg_fini_sym_kernel(drbg); + return PTR_ERR(sk_tfm); + } + drbg->ctr_handle = sk_tfm; + + req = skcipher_request_alloc(sk_tfm, GFP_KERNEL); + if (!req) { + pr_info("DRBG: could not allocate request queue\n"); + drbg_fini_sym_kernel(drbg); + return PTR_ERR(req); + } + drbg->ctr_req = req; + skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + drbg_skcipher_cb, drbg); + + alignmask = crypto_skcipher_alignmask(sk_tfm); + drbg->ctr_null_value_buf = kzalloc(DRBG_CTR_NULL_LEN + alignmask, + GFP_KERNEL); + if (!drbg->ctr_null_value_buf) { + drbg_fini_sym_kernel(drbg); + return -ENOMEM; + } + drbg->ctr_null_value = (u8 *)PTR_ALIGN(drbg->ctr_null_value_buf, + alignmask + 1); + return 0; } @@ -1680,6 +1728,43 @@ static int drbg_kcapi_sym(struct drbg_state *drbg, unsigned char *outval, crypto_cipher_encrypt_one(tfm, outval, in->buf); return 0; } + +static int drbg_kcapi_sym_ctr(struct drbg_state *drbg, u8 *outbuf, u32 outlen) +{ + struct scatterlist sg_in; + + sg_init_one(&sg_in, drbg->ctr_null_value, DRBG_CTR_NULL_LEN); + + while (outlen) { + u32 cryptlen = min_t(u32, outlen, DRBG_CTR_NULL_LEN); + struct scatterlist sg_out; + int ret; + + sg_init_one(&sg_out, outbuf, cryptlen); + skcipher_request_set_crypt(drbg->ctr_req, &sg_in, &sg_out, + cryptlen, drbg->V); + ret = crypto_skcipher_encrypt(drbg->ctr_req); + switch (ret) { + case 0: + break; + case -EINPROGRESS: + case -EBUSY: + ret = wait_for_completion_interruptible( + &drbg->ctr_completion); + if (!ret && !drbg->ctr_async_err) { + reinit_completion(&drbg->ctr_completion); + break; + } + default: + return ret; + } + init_completion(&drbg->ctr_completion); + + outlen -= cryptlen; + } + + return 0; +} #endif /* CONFIG_CRYPTO_DRBG_CTR */ /*************************************************************** diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h index d961b2b16f559..b2fe15d1cebad 100644 --- a/include/crypto/drbg.h +++ b/include/crypto/drbg.h @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -115,6 +116,14 @@ struct drbg_state { /* some memory the DRBG can use for its operation */ unsigned char *scratchpad; void *priv_data; /* Cipher handle */ + + struct crypto_skcipher *ctr_handle; /* CTR mode cipher handle */ + struct skcipher_request *ctr_req; /* CTR mode request handle */ + __u8 *ctr_null_value_buf; /* CTR mode unaligned buffer */ + __u8 *ctr_null_value; /* CTR mode aligned zero buf */ + struct completion ctr_completion; /* CTR mode async handler */ + int ctr_async_err; /* CTR mode async error */ + bool seeded; /* DRBG fully seeded? */ bool pr; /* Prediction resistance enabled? */ struct work_struct seed_work; /* asynchronous seeding support */ From 3cfc3b97211238ffc1a7885ebe62f899180fe043 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Tue, 14 Jun 2016 07:35:13 +0200 Subject: [PATCH 048/180] crypto: drbg - use aligned buffers Hardware cipher implementation may require aligned buffers. All buffers that potentially are processed with a cipher are now aligned. Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/drbg.c | 44 +++++++++++++++++++++++-------------------- include/crypto/drbg.h | 3 +++ 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/crypto/drbg.c b/crypto/drbg.c index 4ee1a9c794200..8ac3ea11d4372 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -1139,11 +1139,11 @@ static inline void drbg_dealloc_state(struct drbg_state *drbg) if (!drbg) return; kzfree(drbg->V); - drbg->V = NULL; + drbg->Vbuf = NULL; kzfree(drbg->C); - drbg->C = NULL; - kzfree(drbg->scratchpad); - drbg->scratchpad = NULL; + drbg->Cbuf = NULL; + kzfree(drbg->scratchpadbuf); + drbg->scratchpadbuf = NULL; drbg->reseed_ctr = 0; drbg->d_ops = NULL; drbg->core = NULL; @@ -1179,12 +1179,18 @@ static inline int drbg_alloc_state(struct drbg_state *drbg) goto err; } - drbg->V = kmalloc(drbg_statelen(drbg), GFP_KERNEL); - if (!drbg->V) - goto err; - drbg->C = kmalloc(drbg_statelen(drbg), GFP_KERNEL); - if (!drbg->C) + ret = drbg->d_ops->crypto_init(drbg); + if (ret < 0) goto err; + + drbg->Vbuf = kmalloc(drbg_statelen(drbg) + ret, GFP_KERNEL); + if (!drbg->Vbuf) + goto fini; + drbg->V = PTR_ALIGN(drbg->Vbuf, ret + 1); + drbg->Cbuf = kmalloc(drbg_statelen(drbg) + ret, GFP_KERNEL); + if (!drbg->Cbuf) + goto fini; + drbg->C = PTR_ALIGN(drbg->Cbuf, ret + 1); /* scratchpad is only generated for CTR and Hash */ if (drbg->core->flags & DRBG_HMAC) sb_size = 0; @@ -1198,13 +1204,16 @@ static inline int drbg_alloc_state(struct drbg_state *drbg) sb_size = drbg_statelen(drbg) + drbg_blocklen(drbg); if (0 < sb_size) { - drbg->scratchpad = kzalloc(sb_size, GFP_KERNEL); - if (!drbg->scratchpad) - goto err; + drbg->scratchpadbuf = kzalloc(sb_size + ret, GFP_KERNEL); + if (!drbg->scratchpadbuf) + goto fini; + drbg->scratchpad = PTR_ALIGN(drbg->scratchpadbuf, ret + 1); } return 0; +fini: + drbg->d_ops->crypto_fini(drbg); err: drbg_dealloc_state(drbg); return ret; @@ -1472,10 +1481,6 @@ static int drbg_instantiate(struct drbg_state *drbg, struct drbg_string *pers, if (ret) goto unlock; - ret = -EFAULT; - if (drbg->d_ops->crypto_init(drbg)) - goto err; - ret = drbg_prepare_hrng(drbg); if (ret) goto free_everything; @@ -1499,8 +1504,6 @@ static int drbg_instantiate(struct drbg_state *drbg, struct drbg_string *pers, mutex_unlock(&drbg->drbg_mutex); return ret; -err: - drbg_dealloc_state(drbg); unlock: mutex_unlock(&drbg->drbg_mutex); return ret; @@ -1585,7 +1588,8 @@ static int drbg_init_hash_kernel(struct drbg_state *drbg) sdesc->shash.tfm = tfm; sdesc->shash.flags = 0; drbg->priv_data = sdesc; - return 0; + + return crypto_shash_alignmask(tfm); } static int drbg_fini_hash_kernel(struct drbg_state *drbg) @@ -1705,7 +1709,7 @@ static int drbg_init_sym_kernel(struct drbg_state *drbg) drbg->ctr_null_value = (u8 *)PTR_ALIGN(drbg->ctr_null_value_buf, alignmask + 1); - return 0; + return alignmask; } static void drbg_kcapi_symsetkey(struct drbg_state *drbg, diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h index b2fe15d1cebad..61580b19f9f6e 100644 --- a/include/crypto/drbg.h +++ b/include/crypto/drbg.h @@ -108,13 +108,16 @@ struct drbg_test_data { struct drbg_state { struct mutex drbg_mutex; /* lock around DRBG */ unsigned char *V; /* internal state 10.1.1.1 1a) */ + unsigned char *Vbuf; /* hash: static value 10.1.1.1 1b) hmac / ctr: key */ unsigned char *C; + unsigned char *Cbuf; /* Number of RNG requests since last reseed -- 10.1.1.1 1c) */ size_t reseed_ctr; size_t reseed_threshold; /* some memory the DRBG can use for its operation */ unsigned char *scratchpad; + unsigned char *scratchpadbuf; void *priv_data; /* Cipher handle */ struct crypto_skcipher *ctr_handle; /* CTR mode cipher handle */ From a07203fbfcf146b737781ee4658ed198956036ca Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Tue, 14 Jun 2016 07:35:37 +0200 Subject: [PATCH 049/180] crypto: drbg - use full CTR AES for update The CTR DRBG update function performs a full CTR AES operation including the XOR with "plaintext" data. Hence, remove the XOR from the code and use the CTR mode to do the XOR. Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/drbg.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/crypto/drbg.c b/crypto/drbg.c index 8ac3ea11d4372..8ceb71699dea8 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -258,7 +258,10 @@ static int drbg_kcapi_sym(struct drbg_state *drbg, unsigned char *outval, const struct drbg_string *in); static int drbg_init_sym_kernel(struct drbg_state *drbg); static int drbg_fini_sym_kernel(struct drbg_state *drbg); -static int drbg_kcapi_sym_ctr(struct drbg_state *drbg, u8 *outbuf, u32 outlen); +static int drbg_kcapi_sym_ctr(struct drbg_state *drbg, + u8 *inbuf, u32 inbuflen, + u8 *outbuf, u32 outlen); +#define DRBG_CTR_NULL_LEN 128 /* BCC function for CTR DRBG as defined in 10.4.3 */ static int drbg_ctr_bcc(struct drbg_state *drbg, @@ -481,8 +484,6 @@ static int drbg_ctr_update(struct drbg_state *drbg, struct list_head *seed, unsigned char *temp = drbg->scratchpad; unsigned char *df_data = drbg->scratchpad + drbg_statelen(drbg) + drbg_blocklen(drbg); - unsigned char *temp_p, *df_data_p; /* pointer to iterate over buffers */ - unsigned int len = 0; if (3 > reseed) memset(df_data, 0, drbg_statelen(drbg)); @@ -510,18 +511,11 @@ static int drbg_ctr_update(struct drbg_state *drbg, struct list_head *seed, goto out; } - ret = drbg_kcapi_sym_ctr(drbg, temp, drbg_statelen(drbg)); + ret = drbg_kcapi_sym_ctr(drbg, df_data, drbg_statelen(drbg), + temp, drbg_statelen(drbg)); if (ret) return ret; - /* 10.2.1.2 step 4 */ - temp_p = temp; - df_data_p = df_data; - for (len = 0; len < drbg_statelen(drbg); len++) { - *temp_p ^= *df_data_p; - df_data_p++; temp_p++; - } - /* 10.2.1.2 step 5 */ memcpy(drbg->C, temp, drbg_keylen(drbg)); ret = crypto_skcipher_setkey(drbg->ctr_handle, drbg->C, @@ -561,7 +555,8 @@ static int drbg_ctr_generate(struct drbg_state *drbg, } /* 10.2.1.5.2 step 4.1 */ - ret = drbg_kcapi_sym_ctr(drbg, buf, len); + ret = drbg_kcapi_sym_ctr(drbg, drbg->ctr_null_value, DRBG_CTR_NULL_LEN, + buf, len); if (ret) return ret; @@ -1657,7 +1652,6 @@ static void drbg_skcipher_cb(struct crypto_async_request *req, int error) complete(&drbg->ctr_completion); } -#define DRBG_CTR_NULL_LEN 128 static int drbg_init_sym_kernel(struct drbg_state *drbg) { struct crypto_cipher *tfm; @@ -1733,14 +1727,16 @@ static int drbg_kcapi_sym(struct drbg_state *drbg, unsigned char *outval, return 0; } -static int drbg_kcapi_sym_ctr(struct drbg_state *drbg, u8 *outbuf, u32 outlen) +static int drbg_kcapi_sym_ctr(struct drbg_state *drbg, + u8 *inbuf, u32 inlen, + u8 *outbuf, u32 outlen) { struct scatterlist sg_in; - sg_init_one(&sg_in, drbg->ctr_null_value, DRBG_CTR_NULL_LEN); + sg_init_one(&sg_in, inbuf, inlen); while (outlen) { - u32 cryptlen = min_t(u32, outlen, DRBG_CTR_NULL_LEN); + u32 cryptlen = min_t(u32, inlen, outlen); struct scatterlist sg_out; int ret; From 103eb3f7bfb4fce0e299afbf50fef8ffa8d9d38c Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Tue, 14 Jun 2016 07:36:06 +0200 Subject: [PATCH 050/180] crypto: drbg - avoid duplicate maintenance of key The TFM object maintains the key for the CTR DRBG. Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/drbg.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crypto/drbg.c b/crypto/drbg.c index 8ceb71699dea8..ded86385ab6e3 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -517,8 +517,7 @@ static int drbg_ctr_update(struct drbg_state *drbg, struct list_head *seed, return ret; /* 10.2.1.2 step 5 */ - memcpy(drbg->C, temp, drbg_keylen(drbg)); - ret = crypto_skcipher_setkey(drbg->ctr_handle, drbg->C, + ret = crypto_skcipher_setkey(drbg->ctr_handle, temp, drbg_keylen(drbg)); if (ret) goto out; From 5a7de97309f5af4458b1a25a2a529a1a893c5269 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 14 Jun 2016 16:14:58 +0300 Subject: [PATCH 051/180] crypto: rsa - return raw integers for the ASN.1 parser Return the raw key with no other processing so that the caller can copy it or MPI parse it, etc. The scope is to have only one ANS.1 parser for all RSA implementations. Update the RSA software implementation so that it does the MPI conversion on top. Signed-off-by: Tudor Ambarus Signed-off-by: Herbert Xu --- crypto/rsa.c | 105 ++++++++++++++++++++++++-------- crypto/rsa_helper.c | 111 ++++++++++++---------------------- include/crypto/internal/rsa.h | 22 +++++-- 3 files changed, 135 insertions(+), 103 deletions(-) diff --git a/crypto/rsa.c b/crypto/rsa.c index 77d737f521472..dc692d43b6662 100644 --- a/crypto/rsa.c +++ b/crypto/rsa.c @@ -10,16 +10,23 @@ */ #include +#include #include #include #include #include +struct rsa_mpi_key { + MPI n; + MPI e; + MPI d; +}; + /* * RSAEP function [RFC3447 sec 5.1.1] * c = m^e mod n; */ -static int _rsa_enc(const struct rsa_key *key, MPI c, MPI m) +static int _rsa_enc(const struct rsa_mpi_key *key, MPI c, MPI m) { /* (1) Validate 0 <= m < n */ if (mpi_cmp_ui(m, 0) < 0 || mpi_cmp(m, key->n) >= 0) @@ -33,7 +40,7 @@ static int _rsa_enc(const struct rsa_key *key, MPI c, MPI m) * RSADP function [RFC3447 sec 5.1.2] * m = c^d mod n; */ -static int _rsa_dec(const struct rsa_key *key, MPI m, MPI c) +static int _rsa_dec(const struct rsa_mpi_key *key, MPI m, MPI c) { /* (1) Validate 0 <= c < n */ if (mpi_cmp_ui(c, 0) < 0 || mpi_cmp(c, key->n) >= 0) @@ -47,7 +54,7 @@ static int _rsa_dec(const struct rsa_key *key, MPI m, MPI c) * RSASP1 function [RFC3447 sec 5.2.1] * s = m^d mod n */ -static int _rsa_sign(const struct rsa_key *key, MPI s, MPI m) +static int _rsa_sign(const struct rsa_mpi_key *key, MPI s, MPI m) { /* (1) Validate 0 <= m < n */ if (mpi_cmp_ui(m, 0) < 0 || mpi_cmp(m, key->n) >= 0) @@ -61,7 +68,7 @@ static int _rsa_sign(const struct rsa_key *key, MPI s, MPI m) * RSAVP1 function [RFC3447 sec 5.2.2] * m = s^e mod n; */ -static int _rsa_verify(const struct rsa_key *key, MPI m, MPI s) +static int _rsa_verify(const struct rsa_mpi_key *key, MPI m, MPI s) { /* (1) Validate 0 <= s < n */ if (mpi_cmp_ui(s, 0) < 0 || mpi_cmp(s, key->n) >= 0) @@ -71,7 +78,7 @@ static int _rsa_verify(const struct rsa_key *key, MPI m, MPI s) return mpi_powm(m, s, key->e, key->n); } -static inline struct rsa_key *rsa_get_key(struct crypto_akcipher *tfm) +static inline struct rsa_mpi_key *rsa_get_key(struct crypto_akcipher *tfm) { return akcipher_tfm_ctx(tfm); } @@ -79,7 +86,7 @@ static inline struct rsa_key *rsa_get_key(struct crypto_akcipher *tfm) static int rsa_enc(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - const struct rsa_key *pkey = rsa_get_key(tfm); + const struct rsa_mpi_key *pkey = rsa_get_key(tfm); MPI m, c = mpi_alloc(0); int ret = 0; int sign; @@ -118,7 +125,7 @@ static int rsa_enc(struct akcipher_request *req) static int rsa_dec(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - const struct rsa_key *pkey = rsa_get_key(tfm); + const struct rsa_mpi_key *pkey = rsa_get_key(tfm); MPI c, m = mpi_alloc(0); int ret = 0; int sign; @@ -156,7 +163,7 @@ static int rsa_dec(struct akcipher_request *req) static int rsa_sign(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - const struct rsa_key *pkey = rsa_get_key(tfm); + const struct rsa_mpi_key *pkey = rsa_get_key(tfm); MPI m, s = mpi_alloc(0); int ret = 0; int sign; @@ -195,7 +202,7 @@ static int rsa_sign(struct akcipher_request *req) static int rsa_verify(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - const struct rsa_key *pkey = rsa_get_key(tfm); + const struct rsa_mpi_key *pkey = rsa_get_key(tfm); MPI s, m = mpi_alloc(0); int ret = 0; int sign; @@ -233,6 +240,16 @@ static int rsa_verify(struct akcipher_request *req) return ret; } +static void rsa_free_mpi_key(struct rsa_mpi_key *key) +{ + mpi_free(key->d); + mpi_free(key->e); + mpi_free(key->n); + key->d = NULL; + key->e = NULL; + key->n = NULL; +} + static int rsa_check_key_length(unsigned int len) { switch (len) { @@ -251,49 +268,87 @@ static int rsa_check_key_length(unsigned int len) static int rsa_set_pub_key(struct crypto_akcipher *tfm, const void *key, unsigned int keylen) { - struct rsa_key *pkey = akcipher_tfm_ctx(tfm); + struct rsa_mpi_key *mpi_key = akcipher_tfm_ctx(tfm); + struct rsa_key raw_key = {0}; int ret; - ret = rsa_parse_pub_key(pkey, key, keylen); + /* Free the old MPI key if any */ + rsa_free_mpi_key(mpi_key); + + ret = rsa_parse_pub_key(&raw_key, key, keylen); if (ret) return ret; - if (rsa_check_key_length(mpi_get_size(pkey->n) << 3)) { - rsa_free_key(pkey); - ret = -EINVAL; + mpi_key->e = mpi_read_raw_data(raw_key.e, raw_key.e_sz); + if (!mpi_key->e) + goto err; + + mpi_key->n = mpi_read_raw_data(raw_key.n, raw_key.n_sz); + if (!mpi_key->n) + goto err; + + if (rsa_check_key_length(mpi_get_size(mpi_key->n) << 3)) { + rsa_free_mpi_key(mpi_key); + return -EINVAL; } - return ret; + + return 0; + +err: + rsa_free_mpi_key(mpi_key); + return -ENOMEM; } static int rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key, unsigned int keylen) { - struct rsa_key *pkey = akcipher_tfm_ctx(tfm); + struct rsa_mpi_key *mpi_key = akcipher_tfm_ctx(tfm); + struct rsa_key raw_key = {0}; int ret; - ret = rsa_parse_priv_key(pkey, key, keylen); + /* Free the old MPI key if any */ + rsa_free_mpi_key(mpi_key); + + ret = rsa_parse_priv_key(&raw_key, key, keylen); if (ret) return ret; - if (rsa_check_key_length(mpi_get_size(pkey->n) << 3)) { - rsa_free_key(pkey); - ret = -EINVAL; + mpi_key->d = mpi_read_raw_data(raw_key.d, raw_key.d_sz); + if (!mpi_key->d) + goto err; + + mpi_key->e = mpi_read_raw_data(raw_key.e, raw_key.e_sz); + if (!mpi_key->e) + goto err; + + mpi_key->n = mpi_read_raw_data(raw_key.n, raw_key.n_sz); + if (!mpi_key->n) + goto err; + + if (rsa_check_key_length(mpi_get_size(mpi_key->n) << 3)) { + rsa_free_mpi_key(mpi_key); + return -EINVAL; } - return ret; + + return 0; + +err: + rsa_free_mpi_key(mpi_key); + return -ENOMEM; } static int rsa_max_size(struct crypto_akcipher *tfm) { - struct rsa_key *pkey = akcipher_tfm_ctx(tfm); + struct rsa_mpi_key *pkey = akcipher_tfm_ctx(tfm); return pkey->n ? mpi_get_size(pkey->n) : -EINVAL; } static void rsa_exit_tfm(struct crypto_akcipher *tfm) { - struct rsa_key *pkey = akcipher_tfm_ctx(tfm); + struct rsa_mpi_key *pkey = akcipher_tfm_ctx(tfm); - rsa_free_key(pkey); + rsa_free_mpi_key(pkey); } static struct akcipher_alg rsa = { @@ -310,7 +365,7 @@ static struct akcipher_alg rsa = { .cra_driver_name = "rsa-generic", .cra_priority = 100, .cra_module = THIS_MODULE, - .cra_ctxsize = sizeof(struct rsa_key), + .cra_ctxsize = sizeof(struct rsa_mpi_key), }, }; diff --git a/crypto/rsa_helper.c b/crypto/rsa_helper.c index d226f48d09076..583656af4fe28 100644 --- a/crypto/rsa_helper.c +++ b/crypto/rsa_helper.c @@ -22,20 +22,29 @@ int rsa_get_n(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct rsa_key *key = context; + const u8 *ptr = value; + size_t n_sz = vlen; - key->n = mpi_read_raw_data(value, vlen); - - if (!key->n) - return -ENOMEM; - - /* In FIPS mode only allow key size 2K & 3K */ - if (fips_enabled && (mpi_get_size(key->n) != 256 && - mpi_get_size(key->n) != 384)) { - pr_err("RSA: key size not allowed in FIPS mode\n"); - mpi_free(key->n); - key->n = NULL; + /* invalid key provided */ + if (!value || !vlen) return -EINVAL; + + if (fips_enabled) { + while (!*ptr && n_sz) { + ptr++; + n_sz--; + } + + /* In FIPS mode only allow key size 2K & 3K */ + if (n_sz != 256 && n_sz != 384) { + pr_err("RSA: key size not allowed in FIPS mode\n"); + return -EINVAL; + } } + + key->n = value; + key->n_sz = vlen; + return 0; } @@ -44,10 +53,12 @@ int rsa_get_e(void *context, size_t hdrlen, unsigned char tag, { struct rsa_key *key = context; - key->e = mpi_read_raw_data(value, vlen); + /* invalid key provided */ + if (!value || !key->n_sz || !vlen || vlen > key->n_sz) + return -EINVAL; - if (!key->e) - return -ENOMEM; + key->e = value; + key->e_sz = vlen; return 0; } @@ -57,46 +68,20 @@ int rsa_get_d(void *context, size_t hdrlen, unsigned char tag, { struct rsa_key *key = context; - key->d = mpi_read_raw_data(value, vlen); - - if (!key->d) - return -ENOMEM; - - /* In FIPS mode only allow key size 2K & 3K */ - if (fips_enabled && (mpi_get_size(key->d) != 256 && - mpi_get_size(key->d) != 384)) { - pr_err("RSA: key size not allowed in FIPS mode\n"); - mpi_free(key->d); - key->d = NULL; + /* invalid key provided */ + if (!value || !key->n_sz || !vlen || vlen > key->n_sz) return -EINVAL; - } - return 0; -} -static void free_mpis(struct rsa_key *key) -{ - mpi_free(key->n); - mpi_free(key->e); - mpi_free(key->d); - key->n = NULL; - key->e = NULL; - key->d = NULL; -} + key->d = value; + key->d_sz = vlen; -/** - * rsa_free_key() - frees rsa key allocated by rsa_parse_key() - * - * @rsa_key: struct rsa_key key representation - */ -void rsa_free_key(struct rsa_key *key) -{ - free_mpis(key); + return 0; } -EXPORT_SYMBOL_GPL(rsa_free_key); /** - * rsa_parse_pub_key() - extracts an rsa public key from BER encoded buffer - * and stores it in the provided struct rsa_key + * rsa_parse_pub_key() - decodes the BER encoded buffer and stores in the + * provided struct rsa_key, pointers to the raw key as is, + * so that the caller can copy it or MPI parse it, etc. * * @rsa_key: struct rsa_key key representation * @key: key in BER format @@ -107,23 +92,15 @@ EXPORT_SYMBOL_GPL(rsa_free_key); int rsa_parse_pub_key(struct rsa_key *rsa_key, const void *key, unsigned int key_len) { - int ret; - - free_mpis(rsa_key); - ret = asn1_ber_decoder(&rsapubkey_decoder, rsa_key, key, key_len); - if (ret < 0) - goto error; - - return 0; -error: - free_mpis(rsa_key); - return ret; + return asn1_ber_decoder(&rsapubkey_decoder, rsa_key, key, key_len); } EXPORT_SYMBOL_GPL(rsa_parse_pub_key); /** - * rsa_parse_pub_key() - extracts an rsa private key from BER encoded buffer - * and stores it in the provided struct rsa_key + * rsa_parse_priv_key() - decodes the BER encoded buffer and stores in the + * provided struct rsa_key, pointers to the raw key + * as is, so that the caller can copy it or MPI parse it, + * etc. * * @rsa_key: struct rsa_key key representation * @key: key in BER format @@ -134,16 +111,6 @@ EXPORT_SYMBOL_GPL(rsa_parse_pub_key); int rsa_parse_priv_key(struct rsa_key *rsa_key, const void *key, unsigned int key_len) { - int ret; - - free_mpis(rsa_key); - ret = asn1_ber_decoder(&rsaprivkey_decoder, rsa_key, key, key_len); - if (ret < 0) - goto error; - - return 0; -error: - free_mpis(rsa_key); - return ret; + return asn1_ber_decoder(&rsaprivkey_decoder, rsa_key, key, key_len); } EXPORT_SYMBOL_GPL(rsa_parse_priv_key); diff --git a/include/crypto/internal/rsa.h b/include/crypto/internal/rsa.h index c7585bdecbc26..d6c042a2ee52a 100644 --- a/include/crypto/internal/rsa.h +++ b/include/crypto/internal/rsa.h @@ -12,12 +12,24 @@ */ #ifndef _RSA_HELPER_ #define _RSA_HELPER_ -#include +#include +/** + * rsa_key - RSA key structure + * @n : RSA modulus raw byte stream + * @e : RSA public exponent raw byte stream + * @d : RSA private exponent raw byte stream + * @n_sz : length in bytes of RSA modulus n + * @e_sz : length in bytes of RSA public exponent + * @d_sz : length in bytes of RSA private exponent + */ struct rsa_key { - MPI n; - MPI e; - MPI d; + const u8 *n; + const u8 *e; + const u8 *d; + size_t n_sz; + size_t e_sz; + size_t d_sz; }; int rsa_parse_pub_key(struct rsa_key *rsa_key, const void *key, @@ -26,7 +38,5 @@ int rsa_parse_pub_key(struct rsa_key *rsa_key, const void *key, int rsa_parse_priv_key(struct rsa_key *rsa_key, const void *key, unsigned int key_len); -void rsa_free_key(struct rsa_key *rsa_key); - extern struct crypto_template rsa_pkcs1pad_tmpl; #endif From 88f1d316b9d0a213a9590fb5da6a72c0ec7012de Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 15 Jun 2016 19:13:25 +0800 Subject: [PATCH 052/180] crypto: drbg - fix semicolon.cocci warnings crypto/drbg.c:1637:39-40: Unneeded semicolon Remove unneeded semicolon. Generated by: scripts/coccinelle/misc/semicolon.cocci CC: Stephan Mueller Signed-off-by: Fengguang Wu Acked-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/drbg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/drbg.c b/crypto/drbg.c index ded86385ab6e3..8b39f509f0a6e 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -1632,7 +1632,7 @@ static int drbg_fini_sym_kernel(struct drbg_state *drbg) drbg->ctr_handle = NULL; if (drbg->ctr_req) - skcipher_request_free(drbg->ctr_req);; + skcipher_request_free(drbg->ctr_req); drbg->ctr_req = NULL; kfree(drbg->ctr_null_value_buf); From b30bdfa86431afbafe15284a3ad5ac19b49b88e3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 15 Jun 2016 22:27:05 +0800 Subject: [PATCH 053/180] crypto: gcm - Filter out async ghash if necessary As it is if you ask for a sync gcm you may actually end up with an async one because it does not filter out async implementations of ghash. This patch fixes this by adding the necessary filter when looking for ghash. Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu --- crypto/gcm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crypto/gcm.c b/crypto/gcm.c index bec329b3de8d7..d9ea5f9c05741 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -639,7 +639,9 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl, ghash_alg = crypto_find_alg(ghash_name, &crypto_ahash_type, CRYPTO_ALG_TYPE_HASH, - CRYPTO_ALG_TYPE_AHASH_MASK); + CRYPTO_ALG_TYPE_AHASH_MASK | + crypto_requires_sync(algt->type, + algt->mask)); if (IS_ERR(ghash_alg)) return PTR_ERR(ghash_alg); From a6ed42dac49bfd6cb12e2dbffcee1c6d0854bd52 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 16 Jun 2016 11:05:46 +0200 Subject: [PATCH 054/180] crypto: caam - fix misspelled upper_32_bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An endianess fix mistakenly used higher_32_bits() instead of upper_32_bits(), and that doesn't exist: drivers/crypto/caam/desc_constr.h: In function 'append_ptr': drivers/crypto/caam/desc_constr.h:84:75: error: implicit declaration of function 'higher_32_bits' [-Werror=implicit-function-declaration] *offset = cpu_to_caam_dma(ptr); Signed-off-by: Arnd Bergmann Fixes: 261ea058f016 ("crypto: caam - handle core endianness != caam endianness") Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/regs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index 8c766cf9202c1..b3c5016f64589 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -183,10 +183,10 @@ static inline u64 rd_reg64(void __iomem *reg) #ifdef CONFIG_SOC_IMX7D #define cpu_to_caam_dma(value) \ (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \ - (u64)cpu_to_caam32(higher_32_bits(value))) + (u64)cpu_to_caam32(upper_32_bits(value))) #define caam_dma_to_cpu(value) \ (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) | \ - (u64)caam32_to_cpu(higher_32_bits(value))) + (u64)caam32_to_cpu(upper_32_bits(value))) #else #define cpu_to_caam_dma(value) cpu_to_caam64(value) #define caam_dma_to_cpu(value) caam64_to_cpu(value) From 6999d504d4be3ddacd7a01ae961886a66b6a53d9 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Thu, 16 Jun 2016 16:32:55 +0300 Subject: [PATCH 055/180] crypto: caam - replace deprecated EXTRA_CFLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit EXTRA_CFLAGS is still supported but its usage is deprecated. Signed-off-by: Tudor Ambarus Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/caam/Makefile b/drivers/crypto/caam/Makefile index 550758a333e7c..3904700ef1102 100644 --- a/drivers/crypto/caam/Makefile +++ b/drivers/crypto/caam/Makefile @@ -2,7 +2,7 @@ # Makefile for the CAAM backend and dependent components # ifeq ($(CONFIG_CRYPTO_DEV_FSL_CAAM_DEBUG), y) - EXTRA_CFLAGS := -DDEBUG + ccflags-y := -DDEBUG endif obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam.o From 53964b9ee63b7075931b8df85307c449da564b50 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Fri, 17 Jun 2016 10:30:35 +0530 Subject: [PATCH 056/180] crypto: sha3 - Add SHA-3 hash algorithm This patch adds the implementation of SHA3 algorithm in software and it's based on original implementation pushed in patch https://lwn.net/Articles/518415/ with additional changes to match the padding rules specified in SHA-3 specification. Signed-off-by: Jeff Garzik Signed-off-by: Raveendra Padasalagi Signed-off-by: Herbert Xu --- crypto/Kconfig | 10 ++ crypto/Makefile | 1 + crypto/sha3_generic.c | 300 ++++++++++++++++++++++++++++++++++++++++++ include/crypto/sha3.h | 29 ++++ 4 files changed, 340 insertions(+) create mode 100644 crypto/sha3_generic.c create mode 100644 include/crypto/sha3.h diff --git a/crypto/Kconfig b/crypto/Kconfig index c903f1832f2c7..6881d1a5f8591 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -750,6 +750,16 @@ config CRYPTO_SHA512_SPARC64 SHA-512 secure hash standard (DFIPS 180-2) implemented using sparc64 crypto instructions, when available. +config CRYPTO_SHA3 + tristate "SHA3 digest algorithm" + select CRYPTO_HASH + help + SHA-3 secure hash standard (DFIPS 202). It's based on + cryptographic sponge function family called Keccak. + + References: + http://keccak.noekeon.org/ + config CRYPTO_TGR192 tristate "Tiger digest algorithms" select CRYPTO_HASH diff --git a/crypto/Makefile b/crypto/Makefile index 4f4ef7eaae3f2..0b82c47537435 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -61,6 +61,7 @@ obj-$(CONFIG_CRYPTO_RMD320) += rmd320.o obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o +obj-$(CONFIG_CRYPTO_SHA3) += sha3_generic.o obj-$(CONFIG_CRYPTO_WP512) += wp512.o obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c new file mode 100644 index 0000000000000..62264397a2d28 --- /dev/null +++ b/crypto/sha3_generic.c @@ -0,0 +1,300 @@ +/* + * Cryptographic API. + * + * SHA-3, as specified in + * http://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf + * + * SHA-3 code by Jeff Garzik + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option)• + * any later version. + * + */ +#include +#include +#include +#include +#include +#include + +#define KECCAK_ROUNDS 24 + +#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y)))) + +static const u64 keccakf_rndc[24] = { + 0x0000000000000001, 0x0000000000008082, 0x800000000000808a, + 0x8000000080008000, 0x000000000000808b, 0x0000000080000001, + 0x8000000080008081, 0x8000000000008009, 0x000000000000008a, + 0x0000000000000088, 0x0000000080008009, 0x000000008000000a, + 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, + 0x8000000000008003, 0x8000000000008002, 0x8000000000000080, + 0x000000000000800a, 0x800000008000000a, 0x8000000080008081, + 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 +}; + +static const int keccakf_rotc[24] = { + 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, + 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 +}; + +static const int keccakf_piln[24] = { + 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, + 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 +}; + +/* update the state with given number of rounds */ + +static void keccakf(u64 st[25]) +{ + int i, j, round; + u64 t, bc[5]; + + for (round = 0; round < KECCAK_ROUNDS; round++) { + + /* Theta */ + for (i = 0; i < 5; i++) + bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] + ^ st[i + 20]; + + for (i = 0; i < 5; i++) { + t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1); + for (j = 0; j < 25; j += 5) + st[j + i] ^= t; + } + + /* Rho Pi */ + t = st[1]; + for (i = 0; i < 24; i++) { + j = keccakf_piln[i]; + bc[0] = st[j]; + st[j] = ROTL64(t, keccakf_rotc[i]); + t = bc[0]; + } + + /* Chi */ + for (j = 0; j < 25; j += 5) { + for (i = 0; i < 5; i++) + bc[i] = st[j + i]; + for (i = 0; i < 5; i++) + st[j + i] ^= (~bc[(i + 1) % 5]) & + bc[(i + 2) % 5]; + } + + /* Iota */ + st[0] ^= keccakf_rndc[round]; + } +} + +static void sha3_init(struct sha3_state *sctx, unsigned int digest_sz) +{ + memset(sctx, 0, sizeof(*sctx)); + sctx->md_len = digest_sz; + sctx->rsiz = 200 - 2 * digest_sz; + sctx->rsizw = sctx->rsiz / 8; +} + +static int sha3_224_init(struct shash_desc *desc) +{ + struct sha3_state *sctx = shash_desc_ctx(desc); + + sha3_init(sctx, SHA3_224_DIGEST_SIZE); + return 0; +} + +static int sha3_256_init(struct shash_desc *desc) +{ + struct sha3_state *sctx = shash_desc_ctx(desc); + + sha3_init(sctx, SHA3_256_DIGEST_SIZE); + return 0; +} + +static int sha3_384_init(struct shash_desc *desc) +{ + struct sha3_state *sctx = shash_desc_ctx(desc); + + sha3_init(sctx, SHA3_384_DIGEST_SIZE); + return 0; +} + +static int sha3_512_init(struct shash_desc *desc) +{ + struct sha3_state *sctx = shash_desc_ctx(desc); + + sha3_init(sctx, SHA3_512_DIGEST_SIZE); + return 0; +} + +static int sha3_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha3_state *sctx = shash_desc_ctx(desc); + unsigned int done; + const u8 *src; + + done = 0; + src = data; + + if ((sctx->partial + len) > (sctx->rsiz - 1)) { + if (sctx->partial) { + done = -sctx->partial; + memcpy(sctx->buf + sctx->partial, data, + done + sctx->rsiz); + src = sctx->buf; + } + + do { + unsigned int i; + + for (i = 0; i < sctx->rsizw; i++) + sctx->st[i] ^= ((u64 *) src)[i]; + keccakf(sctx->st); + + done += sctx->rsiz; + src = data + done; + } while (done + (sctx->rsiz - 1) < len); + + sctx->partial = 0; + } + memcpy(sctx->buf + sctx->partial, src, len - done); + sctx->partial += (len - done); + + return 0; +} + +static int sha3_final(struct shash_desc *desc, u8 *out) +{ + struct sha3_state *sctx = shash_desc_ctx(desc); + unsigned int i, inlen = sctx->partial; + + sctx->buf[inlen++] = 0x06; + memset(sctx->buf + inlen, 0, sctx->rsiz - inlen); + sctx->buf[sctx->rsiz - 1] |= 0x80; + + for (i = 0; i < sctx->rsizw; i++) + sctx->st[i] ^= ((u64 *) sctx->buf)[i]; + + keccakf(sctx->st); + + for (i = 0; i < sctx->rsizw; i++) + sctx->st[i] = cpu_to_le64(sctx->st[i]); + + memcpy(out, sctx->st, sctx->md_len); + + memset(sctx, 0, sizeof(*sctx)); + return 0; +} + +static struct shash_alg sha3_224 = { + .digestsize = SHA3_224_DIGEST_SIZE, + .init = sha3_224_init, + .update = sha3_update, + .final = sha3_final, + .descsize = sizeof(struct sha3_state), + .base = { + .cra_name = "sha3-224", + .cra_driver_name = "sha3-224-generic", + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA3_224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static struct shash_alg sha3_256 = { + .digestsize = SHA3_256_DIGEST_SIZE, + .init = sha3_256_init, + .update = sha3_update, + .final = sha3_final, + .descsize = sizeof(struct sha3_state), + .base = { + .cra_name = "sha3-256", + .cra_driver_name = "sha3-256-generic", + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA3_256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static struct shash_alg sha3_384 = { + .digestsize = SHA3_384_DIGEST_SIZE, + .init = sha3_384_init, + .update = sha3_update, + .final = sha3_final, + .descsize = sizeof(struct sha3_state), + .base = { + .cra_name = "sha3-384", + .cra_driver_name = "sha3-384-generic", + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA3_384_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static struct shash_alg sha3_512 = { + .digestsize = SHA3_512_DIGEST_SIZE, + .init = sha3_512_init, + .update = sha3_update, + .final = sha3_final, + .descsize = sizeof(struct sha3_state), + .base = { + .cra_name = "sha3-512", + .cra_driver_name = "sha3-512-generic", + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA3_512_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int __init sha3_generic_mod_init(void) +{ + int ret; + + ret = crypto_register_shash(&sha3_224); + if (ret < 0) + goto err_out; + ret = crypto_register_shash(&sha3_256); + if (ret < 0) + goto err_out_224; + ret = crypto_register_shash(&sha3_384); + if (ret < 0) + goto err_out_256; + ret = crypto_register_shash(&sha3_512); + if (ret < 0) + goto err_out_384; + + return 0; + +err_out_384: + crypto_unregister_shash(&sha3_384); +err_out_256: + crypto_unregister_shash(&sha3_256); +err_out_224: + crypto_unregister_shash(&sha3_224); +err_out: + return ret; +} + +static void __exit sha3_generic_mod_fini(void) +{ + crypto_unregister_shash(&sha3_224); + crypto_unregister_shash(&sha3_256); + crypto_unregister_shash(&sha3_384); + crypto_unregister_shash(&sha3_512); +} + +module_init(sha3_generic_mod_init); +module_exit(sha3_generic_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA-3 Secure Hash Algorithm"); + +MODULE_ALIAS_CRYPTO("sha3-224"); +MODULE_ALIAS_CRYPTO("sha3-224-generic"); +MODULE_ALIAS_CRYPTO("sha3-256"); +MODULE_ALIAS_CRYPTO("sha3-256-generic"); +MODULE_ALIAS_CRYPTO("sha3-384"); +MODULE_ALIAS_CRYPTO("sha3-384-generic"); +MODULE_ALIAS_CRYPTO("sha3-512"); +MODULE_ALIAS_CRYPTO("sha3-512-generic"); diff --git a/include/crypto/sha3.h b/include/crypto/sha3.h new file mode 100644 index 0000000000000..f4c9f68f5ffeb --- /dev/null +++ b/include/crypto/sha3.h @@ -0,0 +1,29 @@ +/* + * Common values for SHA-3 algorithms + */ +#ifndef __CRYPTO_SHA3_H__ +#define __CRYPTO_SHA3_H__ + +#define SHA3_224_DIGEST_SIZE (224 / 8) +#define SHA3_224_BLOCK_SIZE (200 - 2 * SHA3_224_DIGEST_SIZE) + +#define SHA3_256_DIGEST_SIZE (256 / 8) +#define SHA3_256_BLOCK_SIZE (200 - 2 * SHA3_256_DIGEST_SIZE) + +#define SHA3_384_DIGEST_SIZE (384 / 8) +#define SHA3_384_BLOCK_SIZE (200 - 2 * SHA3_384_DIGEST_SIZE) + +#define SHA3_512_DIGEST_SIZE (512 / 8) +#define SHA3_512_BLOCK_SIZE (200 - 2 * SHA3_512_DIGEST_SIZE) + +struct sha3_state { + u64 st[25]; + unsigned int md_len; + unsigned int rsiz; + unsigned int rsizw; + + unsigned int partial; + u8 buf[SHA3_224_BLOCK_SIZE]; +}; + +#endif From 79cc6ab8947bd238b64afddc56ed84ee65f012ef Mon Sep 17 00:00:00 2001 From: raveendra padasalagi Date: Fri, 17 Jun 2016 10:30:36 +0530 Subject: [PATCH 057/180] crypto: sha3 - Add SHA-3 Test's in tcrypt Added support for SHA-3 algorithm test's in tcrypt module and related test vectors. Signed-off-by: Raveendra Padasalagi Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 53 +++++++++++++++++++- crypto/testmgr.c | 40 +++++++++++++++ crypto/testmgr.h | 125 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 217 insertions(+), 1 deletion(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 579dce0714638..4675459e82da7 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -72,7 +72,8 @@ static char *check[] = { "cast6", "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea", "khazad", "wp512", "wp384", "wp256", "tnepres", "xeta", "fcrypt", "camellia", "seed", "salsa20", "rmd128", "rmd160", "rmd256", "rmd320", - "lzo", "cts", "zlib", NULL + "lzo", "cts", "zlib", "sha3-224", "sha3-256", "sha3-384", "sha3-512", + NULL }; struct tcrypt_result { @@ -1284,6 +1285,22 @@ static int do_test(const char *alg, u32 type, u32 mask, int m) ret += tcrypt_test("crct10dif"); break; + case 48: + ret += tcrypt_test("sha3-224"); + break; + + case 49: + ret += tcrypt_test("sha3-256"); + break; + + case 50: + ret += tcrypt_test("sha3-384"); + break; + + case 51: + ret += tcrypt_test("sha3-512"); + break; + case 100: ret += tcrypt_test("hmac(md5)"); break; @@ -1691,6 +1708,22 @@ static int do_test(const char *alg, u32 type, u32 mask, int m) test_hash_speed("poly1305", sec, poly1305_speed_template); if (mode > 300 && mode < 400) break; + case 322: + test_hash_speed("sha3-224", sec, generic_hash_speed_template); + if (mode > 300 && mode < 400) break; + + case 323: + test_hash_speed("sha3-256", sec, generic_hash_speed_template); + if (mode > 300 && mode < 400) break; + + case 324: + test_hash_speed("sha3-384", sec, generic_hash_speed_template); + if (mode > 300 && mode < 400) break; + + case 325: + test_hash_speed("sha3-512", sec, generic_hash_speed_template); + if (mode > 300 && mode < 400) break; + case 399: break; @@ -1770,6 +1803,24 @@ static int do_test(const char *alg, u32 type, u32 mask, int m) test_ahash_speed("rmd320", sec, generic_hash_speed_template); if (mode > 400 && mode < 500) break; + case 418: + test_ahash_speed("sha3-224", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + + case 419: + test_ahash_speed("sha3-256", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + + case 420: + test_ahash_speed("sha3-384", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + + + case 421: + test_ahash_speed("sha3-512", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + + case 499: break; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index c727fb0cb0215..b773a563809bd 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -3658,6 +3658,46 @@ static const struct alg_test_desc alg_test_descs[] = { .count = SHA256_TEST_VECTORS } } + }, { + .alg = "sha3-224", + .test = alg_test_hash, + .fips_allowed = 1, + .suite = { + .hash = { + .vecs = sha3_224_tv_template, + .count = SHA3_224_TEST_VECTORS + } + } + }, { + .alg = "sha3-256", + .test = alg_test_hash, + .fips_allowed = 1, + .suite = { + .hash = { + .vecs = sha3_256_tv_template, + .count = SHA3_256_TEST_VECTORS + } + } + }, { + .alg = "sha3-384", + .test = alg_test_hash, + .fips_allowed = 1, + .suite = { + .hash = { + .vecs = sha3_384_tv_template, + .count = SHA3_384_TEST_VECTORS + } + } + }, { + .alg = "sha3-512", + .test = alg_test_hash, + .fips_allowed = 1, + .suite = { + .hash = { + .vecs = sha3_512_tv_template, + .count = SHA3_512_TEST_VECTORS + } + } }, { .alg = "sha384", .test = alg_test_hash, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 487ec880e889c..b70e3c92bedd8 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -376,6 +376,131 @@ static struct hash_testvec md4_tv_template [] = { }, }; +#define SHA3_224_TEST_VECTORS 3 +static struct hash_testvec sha3_224_tv_template[] = { + { + .plaintext = "", + .digest = "\x6b\x4e\x03\x42\x36\x67\xdb\xb7" + "\x3b\x6e\x15\x45\x4f\x0e\xb1\xab" + "\xd4\x59\x7f\x9a\x1b\x07\x8e\x3f" + "\x5b\x5a\x6b\xc7", + }, { + .plaintext = "a", + .psize = 1, + .digest = "\x9e\x86\xff\x69\x55\x7c\xa9\x5f" + "\x40\x5f\x08\x12\x69\x68\x5b\x38" + "\xe3\xa8\x19\xb3\x09\xee\x94\x2f" + "\x48\x2b\x6a\x8b", + }, { + .plaintext = "abcdbcdecdefdefgefghfghighijhijkijkl" + "jklmklmnlmnomnopnopq", + .psize = 56, + .digest = "\x8a\x24\x10\x8b\x15\x4a\xda\x21" + "\xc9\xfd\x55\x74\x49\x44\x79\xba" + "\x5c\x7e\x7a\xb7\x6e\xf2\x64\xea" + "\xd0\xfc\xce\x33", + }, +}; + +#define SHA3_256_TEST_VECTORS 3 +static struct hash_testvec sha3_256_tv_template[] = { + { + .plaintext = "", + .digest = "\xa7\xff\xc6\xf8\xbf\x1e\xd7\x66" + "\x51\xc1\x47\x56\xa0\x61\xd6\x62" + "\xf5\x80\xff\x4d\xe4\x3b\x49\xfa" + "\x82\xd8\x0a\x4b\x80\xf8\x43\x4a", + }, { + .plaintext = "a", + .psize = 1, + .digest = "\x80\x08\x4b\xf2\xfb\xa0\x24\x75" + "\x72\x6f\xeb\x2c\xab\x2d\x82\x15" + "\xea\xb1\x4b\xc6\xbd\xd8\xbf\xb2" + "\xc8\x15\x12\x57\x03\x2e\xcd\x8b", + }, { + .plaintext = "abcdbcdecdefdefgefghfghighijhijkijkl" + "jklmklmnlmnomnopnopq", + .psize = 56, + .digest = "\x41\xc0\xdb\xa2\xa9\xd6\x24\x08" + "\x49\x10\x03\x76\xa8\x23\x5e\x2c" + "\x82\xe1\xb9\x99\x8a\x99\x9e\x21" + "\xdb\x32\xdd\x97\x49\x6d\x33\x76", + }, +}; + + +#define SHA3_384_TEST_VECTORS 3 +static struct hash_testvec sha3_384_tv_template[] = { + { + .plaintext = "", + .digest = "\x0c\x63\xa7\x5b\x84\x5e\x4f\x7d" + "\x01\x10\x7d\x85\x2e\x4c\x24\x85" + "\xc5\x1a\x50\xaa\xaa\x94\xfc\x61" + "\x99\x5e\x71\xbb\xee\x98\x3a\x2a" + "\xc3\x71\x38\x31\x26\x4a\xdb\x47" + "\xfb\x6b\xd1\xe0\x58\xd5\xf0\x04", + }, { + .plaintext = "a", + .psize = 1, + .digest = "\x18\x15\xf7\x74\xf3\x20\x49\x1b" + "\x48\x56\x9e\xfe\xc7\x94\xd2\x49" + "\xee\xb5\x9a\xae\x46\xd2\x2b\xf7" + "\x7d\xaf\xe2\x5c\x5e\xdc\x28\xd7" + "\xea\x44\xf9\x3e\xe1\x23\x4a\xa8" + "\x8f\x61\xc9\x19\x12\xa4\xcc\xd9", + }, { + .plaintext = "abcdbcdecdefdefgefghfghighijhijkijkl" + "jklmklmnlmnomnopnopq", + .psize = 56, + .digest = "\x99\x1c\x66\x57\x55\xeb\x3a\x4b" + "\x6b\xbd\xfb\x75\xc7\x8a\x49\x2e" + "\x8c\x56\xa2\x2c\x5c\x4d\x7e\x42" + "\x9b\xfd\xbc\x32\xb9\xd4\xad\x5a" + "\xa0\x4a\x1f\x07\x6e\x62\xfe\xa1" + "\x9e\xef\x51\xac\xd0\x65\x7c\x22", + }, +}; + + +#define SHA3_512_TEST_VECTORS 3 +static struct hash_testvec sha3_512_tv_template[] = { + { + .plaintext = "", + .digest = "\xa6\x9f\x73\xcc\xa2\x3a\x9a\xc5" + "\xc8\xb5\x67\xdc\x18\x5a\x75\x6e" + "\x97\xc9\x82\x16\x4f\xe2\x58\x59" + "\xe0\xd1\xdc\xc1\x47\x5c\x80\xa6" + "\x15\xb2\x12\x3a\xf1\xf5\xf9\x4c" + "\x11\xe3\xe9\x40\x2c\x3a\xc5\x58" + "\xf5\x00\x19\x9d\x95\xb6\xd3\xe3" + "\x01\x75\x85\x86\x28\x1d\xcd\x26", + }, { + .plaintext = "a", + .psize = 1, + .digest = "\x69\x7f\x2d\x85\x61\x72\xcb\x83" + "\x09\xd6\xb8\xb9\x7d\xac\x4d\xe3" + "\x44\xb5\x49\xd4\xde\xe6\x1e\xdf" + "\xb4\x96\x2d\x86\x98\xb7\xfa\x80" + "\x3f\x4f\x93\xff\x24\x39\x35\x86" + "\xe2\x8b\x5b\x95\x7a\xc3\xd1\xd3" + "\x69\x42\x0c\xe5\x33\x32\x71\x2f" + "\x99\x7b\xd3\x36\xd0\x9a\xb0\x2a", + }, { + .plaintext = "abcdbcdecdefdefgefghfghighijhijkijkl" + "jklmklmnlmnomnopnopq", + .psize = 56, + .digest = "\x04\xa3\x71\xe8\x4e\xcf\xb5\xb8" + "\xb7\x7c\xb4\x86\x10\xfc\xa8\x18" + "\x2d\xd4\x57\xce\x6f\x32\x6a\x0f" + "\xd3\xd7\xec\x2f\x1e\x91\x63\x6d" + "\xee\x69\x1f\xbe\x0c\x98\x53\x02" + "\xba\x1b\x0d\x8d\xc7\x8c\x08\x63" + "\x46\xb5\x33\xb4\x9c\x03\x0d\x99" + "\xa2\x7d\xaf\x11\x39\xd6\xe7\x5e", + }, +}; + + /* * MD5 test vectors from RFC1321 */ From 01ac94580ac2f7076394212ca963da7409016796 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 17 Jun 2016 12:16:19 +0300 Subject: [PATCH 058/180] crypto: drbg - fix an error code in drbg_init_sym_kernel() We accidentally return PTR_ERR(NULL) which is success but we should return -ENOMEM. Fixes: 355912852115 ('crypto: drbg - use CTR AES instead of ECB AES') Signed-off-by: Dan Carpenter Acked-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/drbg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/drbg.c b/crypto/drbg.c index 8b39f509f0a6e..f752da3a7c757 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -1686,7 +1686,7 @@ static int drbg_init_sym_kernel(struct drbg_state *drbg) if (!req) { pr_info("DRBG: could not allocate request queue\n"); drbg_fini_sym_kernel(drbg); - return PTR_ERR(req); + return -ENOMEM; } drbg->ctr_req = req; skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, From fd2efd93b6fcd981263477298cf1544b46683378 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 23 Jun 2016 18:06:02 +0800 Subject: [PATCH 059/180] Revert "crypto: user - no parsing of CRYPTO_MSG_GETALG" This patch commit eed1e1afd8d542d9644534c1b712599b5d680007 as it is only a workaround for the real bug and the proper fix has now been applied as 055ddaace03580455a7b7dbea8e93d62acee61fc ("crypto: user - re-add size check for CRYPTO_MSG_GETALG"). Signed-off-by: Herbert Xu --- crypto/crypto_user.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index f71960dea8822..43fe85f20d577 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -516,12 +516,10 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return err; } - if (type != (CRYPTO_MSG_GETALG - CRYPTO_MSG_BASE)) { - err = nlmsg_parse(nlh, crypto_msg_min[type], attrs, - CRYPTOCFGA_MAX, crypto_policy); - if (err < 0) - return err; - } + err = nlmsg_parse(nlh, crypto_msg_min[type], attrs, CRYPTOCFGA_MAX, + crypto_policy); + if (err < 0) + return err; if (link->doit == NULL) return -EINVAL; From 7ea0da1d75e0d03f9ec484d2038e3caf06024b17 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 20 Jun 2016 13:19:17 +0800 Subject: [PATCH 060/180] crypto: chacha20-simd - Use generic code for small requests On 16-byte requests the optimised version is actually slower than the generic code, so we should simply use that instead. Signed-off-by: Herbert Xu Cheers, --- arch/x86/crypto/chacha20_glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c index 2d5c2e0bd939b..f910d1d449f00 100644 --- a/arch/x86/crypto/chacha20_glue.c +++ b/arch/x86/crypto/chacha20_glue.c @@ -70,7 +70,7 @@ static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst, struct blkcipher_walk walk; int err; - if (!may_use_simd()) + if (nbytes <= CHACHA20_BLOCK_SIZE || !may_use_simd()) return crypto_chacha20_crypt(desc, dst, src, nbytes); state = (u32 *)roundup((uintptr_t)state_buf, CHACHA20_STATE_ALIGN); From e26df73f80290efe5c772d94ac3a3cf2b077fe5b Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 21 Jun 2016 10:08:31 +0200 Subject: [PATCH 061/180] crypto: marvell - Add a macro constant for the size of the crypto queue Adding a macro constant to be used for the size of the crypto queue, instead of using a numeric value directly. It will be easier to maintain in case we add more than one crypto queue of the same size. Signed-off-by: Romain Perier Acked-by: Boris Brezillon Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cesa.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c index e8ef9fd24a166..47552c12e8c69 100644 --- a/drivers/crypto/marvell/cesa.c +++ b/drivers/crypto/marvell/cesa.c @@ -31,6 +31,9 @@ #include "cesa.h" +/* Limit of the crypto queue before reaching the backlog */ +#define CESA_CRYPTO_DEFAULT_MAX_QLEN 50 + static int allhwsupport = !IS_ENABLED(CONFIG_CRYPTO_DEV_MV_CESA); module_param_named(allhwsupport, allhwsupport, int, 0444); MODULE_PARM_DESC(allhwsupport, "Enable support for all hardware (even it if overlaps with the mv_cesa driver)"); @@ -416,7 +419,7 @@ static int mv_cesa_probe(struct platform_device *pdev) return -ENOMEM; spin_lock_init(&cesa->lock); - crypto_init_queue(&cesa->queue, 50); + crypto_init_queue(&cesa->queue, CESA_CRYPTO_DEFAULT_MAX_QLEN); res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs"); cesa->regs = devm_ioremap_resource(dev, res); if (IS_ERR(cesa->regs)) From f62830886f6b8cb2f1c55e23cb5eab51c4b9ef2c Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 21 Jun 2016 10:08:32 +0200 Subject: [PATCH 062/180] crypto: marvell - Check engine is not already running when enabling a req Add a BUG_ON() call when the driver tries to launch a crypto request while the engine is still processing the previous one. This replaces a silent system hang by a verbose kernel panic with the associated backtrace to let the user know that something went wrong in the CESA driver. Signed-off-by: Romain Perier Acked-by: Boris Brezillon Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cipher.c | 2 ++ drivers/crypto/marvell/hash.c | 2 ++ drivers/crypto/marvell/tdma.c | 2 ++ 3 files changed, 6 insertions(+) diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c index dcf1fceb93364..8c1432e443388 100644 --- a/drivers/crypto/marvell/cipher.c +++ b/drivers/crypto/marvell/cipher.c @@ -106,6 +106,8 @@ static void mv_cesa_ablkcipher_std_step(struct ablkcipher_request *req) mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE); writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG); + BUG_ON(readl(engine->regs + CESA_SA_CMD) & + CESA_SA_CMD_EN_CESA_SA_ACCL0); writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); } diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c index 7a5058da91517..204c29358a7d3 100644 --- a/drivers/crypto/marvell/hash.c +++ b/drivers/crypto/marvell/hash.c @@ -237,6 +237,8 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req) mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE); writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG); + BUG_ON(readl(engine->regs + CESA_SA_CMD) & + CESA_SA_CMD_EN_CESA_SA_ACCL0); writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); } diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c index 0ad8f1ecf1759..e416a7455d281 100644 --- a/drivers/crypto/marvell/tdma.c +++ b/drivers/crypto/marvell/tdma.c @@ -53,6 +53,8 @@ void mv_cesa_dma_step(struct mv_cesa_tdma_req *dreq) engine->regs + CESA_SA_CFG); writel_relaxed(dreq->chain.first->cur_dma, engine->regs + CESA_TDMA_NEXT_ADDR); + BUG_ON(readl(engine->regs + CESA_SA_CMD) & + CESA_SA_CMD_EN_CESA_SA_ACCL0); writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); } From b99acf79a17bf402ea5759ad9d44eba3ed837a59 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 21 Jun 2016 10:08:33 +0200 Subject: [PATCH 063/180] crypto: marvell - Fix wrong type check in dma functions So far, the way that the type of a TDMA operation was checked was wrong. We have to use the type mask in order to get the right part of the flag containing the type of the operation. Signed-off-by: Romain Perier Acked-by: Boris Brezillon Signed-off-by: Herbert Xu --- drivers/crypto/marvell/tdma.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c index e416a7455d281..4d23bf95be9c8 100644 --- a/drivers/crypto/marvell/tdma.c +++ b/drivers/crypto/marvell/tdma.c @@ -64,8 +64,9 @@ void mv_cesa_dma_cleanup(struct mv_cesa_tdma_req *dreq) for (tdma = dreq->chain.first; tdma;) { struct mv_cesa_tdma_desc *old_tdma = tdma; + u32 type = tdma->flags & CESA_TDMA_TYPE_MSK; - if (tdma->flags & CESA_TDMA_OP) + if (type == CESA_TDMA_OP) dma_pool_free(cesa_dev->dma->op_pool, tdma->op, le32_to_cpu(tdma->src)); @@ -90,7 +91,7 @@ void mv_cesa_dma_prepare(struct mv_cesa_tdma_req *dreq, if (tdma->flags & CESA_TDMA_SRC_IN_SRAM) tdma->src = cpu_to_le32(tdma->src + engine->sram_dma); - if (tdma->flags & CESA_TDMA_OP) + if ((tdma->flags & CESA_TDMA_TYPE_MSK) == CESA_TDMA_OP) mv_cesa_adjust_op(engine, tdma->op); } } From bac8e805a30dc2a29c3fdde9d822e59c75e710d7 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 21 Jun 2016 10:08:34 +0200 Subject: [PATCH 064/180] crypto: marvell - Copy IV vectors by DMA transfers for acipher requests Add a TDMA descriptor at the end of the request for copying the output IV vector via a DMA transfer. This is a good way for offloading as much as processing as possible to the DMA and the crypto engine. This is also required for processing multiple cipher requests in chained mode, otherwise the content of the IV vector would be overwritten by the last processed request. Signed-off-by: Romain Perier Acked-by: Boris Brezillon Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cesa.c | 4 ++++ drivers/crypto/marvell/cesa.h | 5 +++++ drivers/crypto/marvell/cipher.c | 31 ++++++++++++++++++++++--------- drivers/crypto/marvell/tdma.c | 29 +++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 9 deletions(-) diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c index 47552c12e8c69..9063f56d87250 100644 --- a/drivers/crypto/marvell/cesa.c +++ b/drivers/crypto/marvell/cesa.c @@ -312,6 +312,10 @@ static int mv_cesa_dev_dma_init(struct mv_cesa_dev *cesa) if (!dma->padding_pool) return -ENOMEM; + dma->iv_pool = dmam_pool_create("cesa_iv", dev, 16, 1, 0); + if (!dma->iv_pool) + return -ENOMEM; + cesa->dma = dma; return 0; diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h index 74071e45ada0d..685a627bd9ae0 100644 --- a/drivers/crypto/marvell/cesa.h +++ b/drivers/crypto/marvell/cesa.h @@ -275,6 +275,7 @@ struct mv_cesa_op_ctx { #define CESA_TDMA_DUMMY 0 #define CESA_TDMA_DATA 1 #define CESA_TDMA_OP 2 +#define CESA_TDMA_IV 3 /** * struct mv_cesa_tdma_desc - TDMA descriptor @@ -390,6 +391,7 @@ struct mv_cesa_dev_dma { struct dma_pool *op_pool; struct dma_pool *cache_pool; struct dma_pool *padding_pool; + struct dma_pool *iv_pool; }; /** @@ -790,6 +792,9 @@ mv_cesa_tdma_desc_iter_init(struct mv_cesa_tdma_chain *chain) memset(chain, 0, sizeof(*chain)); } +int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src, + u32 size, u32 flags, gfp_t gfp_flags); + struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain, const struct mv_cesa_op_ctx *op_templ, bool skip_ctx, diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c index 8c1432e443388..90a2a5cf05f6c 100644 --- a/drivers/crypto/marvell/cipher.c +++ b/drivers/crypto/marvell/cipher.c @@ -118,6 +118,7 @@ static int mv_cesa_ablkcipher_std_process(struct ablkcipher_request *req, struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; struct mv_cesa_engine *engine = sreq->base.engine; size_t len; + unsigned int ivsize; len = sg_pcopy_from_buffer(req->dst, creq->dst_nents, engine->sram + CESA_SA_DATA_SRAM_OFFSET, @@ -127,6 +128,10 @@ static int mv_cesa_ablkcipher_std_process(struct ablkcipher_request *req, if (sreq->offset < req->nbytes) return -EINPROGRESS; + ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(req)); + memcpy_fromio(req->info, + engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET, ivsize); + return 0; } @@ -135,21 +140,20 @@ static int mv_cesa_ablkcipher_process(struct crypto_async_request *req, { struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq); - struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = sreq->base.engine; + struct mv_cesa_tdma_req *dreq; + unsigned int ivsize; int ret; - if (creq->req.base.type == CESA_DMA_REQ) - ret = mv_cesa_dma_process(&creq->req.dma, status); - else - ret = mv_cesa_ablkcipher_std_process(ablkreq, status); + if (creq->req.base.type == CESA_STD_REQ) + return mv_cesa_ablkcipher_std_process(ablkreq, status); + ret = mv_cesa_dma_process(&creq->req.dma, status); if (ret) return ret; - memcpy_fromio(ablkreq->info, - engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET, - crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(ablkreq))); + dreq = &creq->req.dma; + ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(ablkreq)); + memcpy_fromio(ablkreq->info, dreq->chain.last->data, ivsize); return 0; } @@ -302,6 +306,7 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req, struct mv_cesa_tdma_chain chain; bool skip_ctx = false; int ret; + unsigned int ivsize; dreq->base.type = CESA_DMA_REQ; dreq->chain.first = NULL; @@ -360,6 +365,14 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req, } while (mv_cesa_ablkcipher_req_iter_next_op(&iter)); + /* Add output data for IV */ + ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(req)); + ret = mv_cesa_dma_add_iv_op(&chain, CESA_SA_CRYPT_IV_SRAM_OFFSET, + ivsize, CESA_TDMA_SRC_IN_SRAM, flags); + + if (ret) + goto err_free_tdma; + dreq->chain = chain; return 0; diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c index 4d23bf95be9c8..dc07335386067 100644 --- a/drivers/crypto/marvell/tdma.c +++ b/drivers/crypto/marvell/tdma.c @@ -69,6 +69,9 @@ void mv_cesa_dma_cleanup(struct mv_cesa_tdma_req *dreq) if (type == CESA_TDMA_OP) dma_pool_free(cesa_dev->dma->op_pool, tdma->op, le32_to_cpu(tdma->src)); + else if (type == CESA_TDMA_IV) + dma_pool_free(cesa_dev->dma->iv_pool, tdma->data, + le32_to_cpu(tdma->dst)); tdma = tdma->next; dma_pool_free(cesa_dev->dma->tdma_desc_pool, old_tdma, @@ -120,6 +123,32 @@ mv_cesa_dma_add_desc(struct mv_cesa_tdma_chain *chain, gfp_t flags) return new_tdma; } +int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src, + u32 size, u32 flags, gfp_t gfp_flags) +{ + + struct mv_cesa_tdma_desc *tdma; + u8 *iv; + dma_addr_t dma_handle; + + tdma = mv_cesa_dma_add_desc(chain, gfp_flags); + if (IS_ERR(tdma)) + return PTR_ERR(tdma); + + iv = dma_pool_alloc(cesa_dev->dma->iv_pool, flags, &dma_handle); + if (!iv) + return -ENOMEM; + + tdma->byte_cnt = cpu_to_le32(size | BIT(31)); + tdma->src = src; + tdma->dst = cpu_to_le32(dma_handle); + tdma->data = iv; + + flags &= (CESA_TDMA_DST_IN_SRAM | CESA_TDMA_SRC_IN_SRAM); + tdma->flags = flags | CESA_TDMA_IV; + return 0; +} + struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain, const struct mv_cesa_op_ctx *op_templ, bool skip_ctx, From 53da740fed302fbecd66d21cad1c4115b9aaab78 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 21 Jun 2016 10:08:35 +0200 Subject: [PATCH 065/180] crypto: marvell - Move tdma chain out of mv_cesa_tdma_req and remove it Currently, the only way to access the tdma chain is to use the 'req' union from a mv_cesa_{ablkcipher,ahash}. This will soon become a problem if we want to handle the TDMA chaining vs standard/non-DMA processing in a generic way (with generic functions at the cesa.c level detecting whether the request should be queued at the DMA level or not). Hence the decision to move the chain field a the mv_cesa_req level at the expense of adding 2 void * fields to all request contexts (including non-DMA ones) and to remove the type completly. To limit the overhead, we get rid of the type field, which can now be deduced from the req->chain.first value. Once these changes are done the union is no longer needed, so remove it and move mv_cesa_ablkcipher_std_req and mv_cesa_req to mv_cesa_ablkcipher_req directly. There are also no needs to keep the 'base' field into the union of mv_cesa_ahash_req, so move it into the upper structure. Signed-off-by: Romain Perier Acked-by: Boris Brezillon Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cesa.c | 3 +- drivers/crypto/marvell/cesa.h | 44 +++++++++-------------- drivers/crypto/marvell/cipher.c | 63 ++++++++++++++++---------------- drivers/crypto/marvell/hash.c | 64 +++++++++++++++------------------ drivers/crypto/marvell/tdma.c | 8 ++--- 5 files changed, 84 insertions(+), 98 deletions(-) diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c index 9063f56d87250..f407bacdb0218 100644 --- a/drivers/crypto/marvell/cesa.c +++ b/drivers/crypto/marvell/cesa.c @@ -111,7 +111,8 @@ static irqreturn_t mv_cesa_int(int irq, void *priv) return ret; } -int mv_cesa_queue_req(struct crypto_async_request *req) +int mv_cesa_queue_req(struct crypto_async_request *req, + struct mv_cesa_req *creq) { int ret; int i; diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h index 685a627bd9ae0..e67e3f17eb014 100644 --- a/drivers/crypto/marvell/cesa.h +++ b/drivers/crypto/marvell/cesa.h @@ -509,21 +509,11 @@ enum mv_cesa_req_type { /** * struct mv_cesa_req - CESA request - * @type: request type * @engine: engine associated with this request + * @chain: list of tdma descriptors associated with this request */ struct mv_cesa_req { - enum mv_cesa_req_type type; struct mv_cesa_engine *engine; -}; - -/** - * struct mv_cesa_tdma_req - CESA TDMA request - * @base: base information - * @chain: TDMA chain - */ -struct mv_cesa_tdma_req { - struct mv_cesa_req base; struct mv_cesa_tdma_chain chain; }; @@ -540,13 +530,11 @@ struct mv_cesa_sg_std_iter { /** * struct mv_cesa_ablkcipher_std_req - cipher standard request - * @base: base information * @op: operation context * @offset: current operation offset * @size: size of the crypto operation */ struct mv_cesa_ablkcipher_std_req { - struct mv_cesa_req base; struct mv_cesa_op_ctx op; unsigned int offset; unsigned int size; @@ -560,34 +548,27 @@ struct mv_cesa_ablkcipher_std_req { * @dst_nents: number of entries in the dest sg list */ struct mv_cesa_ablkcipher_req { - union { - struct mv_cesa_req base; - struct mv_cesa_tdma_req dma; - struct mv_cesa_ablkcipher_std_req std; - } req; + struct mv_cesa_req base; + struct mv_cesa_ablkcipher_std_req std; int src_nents; int dst_nents; }; /** * struct mv_cesa_ahash_std_req - standard hash request - * @base: base information * @offset: current operation offset */ struct mv_cesa_ahash_std_req { - struct mv_cesa_req base; unsigned int offset; }; /** * struct mv_cesa_ahash_dma_req - DMA hash request - * @base: base information * @padding: padding buffer * @padding_dma: DMA address of the padding buffer * @cache_dma: DMA address of the cache buffer */ struct mv_cesa_ahash_dma_req { - struct mv_cesa_tdma_req base; u8 *padding; dma_addr_t padding_dma; u8 *cache; @@ -606,8 +587,8 @@ struct mv_cesa_ahash_dma_req { * @state: hash state */ struct mv_cesa_ahash_req { + struct mv_cesa_req base; union { - struct mv_cesa_req base; struct mv_cesa_ahash_dma_req dma; struct mv_cesa_ahash_std_req std; } req; @@ -625,6 +606,12 @@ struct mv_cesa_ahash_req { extern struct mv_cesa_dev *cesa_dev; +static inline enum mv_cesa_req_type +mv_cesa_req_get_type(struct mv_cesa_req *req) +{ + return req->chain.first ? CESA_DMA_REQ : CESA_STD_REQ; +} + static inline void mv_cesa_update_op_cfg(struct mv_cesa_op_ctx *op, u32 cfg, u32 mask) { @@ -697,7 +684,8 @@ static inline bool mv_cesa_mac_op_is_first_frag(const struct mv_cesa_op_ctx *op) CESA_SA_DESC_CFG_FIRST_FRAG; } -int mv_cesa_queue_req(struct crypto_async_request *req); +int mv_cesa_queue_req(struct crypto_async_request *req, + struct mv_cesa_req *creq); /* * Helper function that indicates whether a crypto request needs to be @@ -767,9 +755,9 @@ static inline bool mv_cesa_req_dma_iter_next_op(struct mv_cesa_dma_iter *iter) return iter->op_len; } -void mv_cesa_dma_step(struct mv_cesa_tdma_req *dreq); +void mv_cesa_dma_step(struct mv_cesa_req *dreq); -static inline int mv_cesa_dma_process(struct mv_cesa_tdma_req *dreq, +static inline int mv_cesa_dma_process(struct mv_cesa_req *dreq, u32 status) { if (!(status & CESA_SA_INT_ACC0_IDMA_DONE)) @@ -781,10 +769,10 @@ static inline int mv_cesa_dma_process(struct mv_cesa_tdma_req *dreq, return 0; } -void mv_cesa_dma_prepare(struct mv_cesa_tdma_req *dreq, +void mv_cesa_dma_prepare(struct mv_cesa_req *dreq, struct mv_cesa_engine *engine); +void mv_cesa_dma_cleanup(struct mv_cesa_req *dreq); -void mv_cesa_dma_cleanup(struct mv_cesa_tdma_req *dreq); static inline void mv_cesa_tdma_desc_iter_init(struct mv_cesa_tdma_chain *chain) diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c index 90a2a5cf05f6c..7699528957f48 100644 --- a/drivers/crypto/marvell/cipher.c +++ b/drivers/crypto/marvell/cipher.c @@ -70,22 +70,22 @@ mv_cesa_ablkcipher_dma_cleanup(struct ablkcipher_request *req) dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_BIDIRECTIONAL); } - mv_cesa_dma_cleanup(&creq->req.dma); + mv_cesa_dma_cleanup(&creq->base); } static inline void mv_cesa_ablkcipher_cleanup(struct ablkcipher_request *req) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - if (creq->req.base.type == CESA_DMA_REQ) + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) mv_cesa_ablkcipher_dma_cleanup(req); } static void mv_cesa_ablkcipher_std_step(struct ablkcipher_request *req) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = sreq->base.engine; + struct mv_cesa_ablkcipher_std_req *sreq = &creq->std; + struct mv_cesa_engine *engine = creq->base.engine; size_t len = min_t(size_t, req->nbytes - sreq->offset, CESA_SA_SRAM_PAYLOAD_SIZE); @@ -115,8 +115,8 @@ static int mv_cesa_ablkcipher_std_process(struct ablkcipher_request *req, u32 status) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = sreq->base.engine; + struct mv_cesa_ablkcipher_std_req *sreq = &creq->std; + struct mv_cesa_engine *engine = creq->base.engine; size_t len; unsigned int ivsize; @@ -140,20 +140,19 @@ static int mv_cesa_ablkcipher_process(struct crypto_async_request *req, { struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq); - struct mv_cesa_tdma_req *dreq; + struct mv_cesa_req *basereq = &creq->base; unsigned int ivsize; int ret; - if (creq->req.base.type == CESA_STD_REQ) + if (mv_cesa_req_get_type(basereq) == CESA_STD_REQ) return mv_cesa_ablkcipher_std_process(ablkreq, status); - ret = mv_cesa_dma_process(&creq->req.dma, status); + ret = mv_cesa_dma_process(basereq, status); if (ret) return ret; - dreq = &creq->req.dma; ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(ablkreq)); - memcpy_fromio(ablkreq->info, dreq->chain.last->data, ivsize); + memcpy_fromio(ablkreq->info, basereq->chain.last->data, ivsize); return 0; } @@ -163,8 +162,8 @@ static void mv_cesa_ablkcipher_step(struct crypto_async_request *req) struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq); - if (creq->req.base.type == CESA_DMA_REQ) - mv_cesa_dma_step(&creq->req.dma); + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) + mv_cesa_dma_step(&creq->base); else mv_cesa_ablkcipher_std_step(ablkreq); } @@ -173,17 +172,17 @@ static inline void mv_cesa_ablkcipher_dma_prepare(struct ablkcipher_request *req) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - struct mv_cesa_tdma_req *dreq = &creq->req.dma; + struct mv_cesa_req *basereq = &creq->base; - mv_cesa_dma_prepare(dreq, dreq->base.engine); + mv_cesa_dma_prepare(basereq, basereq->engine); } static inline void mv_cesa_ablkcipher_std_prepare(struct ablkcipher_request *req) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = sreq->base.engine; + struct mv_cesa_ablkcipher_std_req *sreq = &creq->std; + struct mv_cesa_engine *engine = creq->base.engine; sreq->size = 0; sreq->offset = 0; @@ -196,9 +195,9 @@ static inline void mv_cesa_ablkcipher_prepare(struct crypto_async_request *req, { struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq); - creq->req.base.engine = engine; + creq->base.engine = engine; - if (creq->req.base.type == CESA_DMA_REQ) + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) mv_cesa_ablkcipher_dma_prepare(ablkreq); else mv_cesa_ablkcipher_std_prepare(ablkreq); @@ -301,16 +300,15 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req, struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; - struct mv_cesa_tdma_req *dreq = &creq->req.dma; + struct mv_cesa_req *basereq = &creq->base; struct mv_cesa_ablkcipher_dma_iter iter; struct mv_cesa_tdma_chain chain; bool skip_ctx = false; int ret; unsigned int ivsize; - dreq->base.type = CESA_DMA_REQ; - dreq->chain.first = NULL; - dreq->chain.last = NULL; + basereq->chain.first = NULL; + basereq->chain.last = NULL; if (req->src != req->dst) { ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents, @@ -373,12 +371,12 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req, if (ret) goto err_free_tdma; - dreq->chain = chain; + basereq->chain = chain; return 0; err_free_tdma: - mv_cesa_dma_cleanup(dreq); + mv_cesa_dma_cleanup(basereq); if (req->dst != req->src) dma_unmap_sg(cesa_dev->dev, req->dst, creq->dst_nents, DMA_FROM_DEVICE); @@ -395,11 +393,13 @@ mv_cesa_ablkcipher_std_req_init(struct ablkcipher_request *req, const struct mv_cesa_op_ctx *op_templ) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; + struct mv_cesa_ablkcipher_std_req *sreq = &creq->std; + struct mv_cesa_req *basereq = &creq->base; - sreq->base.type = CESA_STD_REQ; sreq->op = *op_templ; sreq->skip_ctx = false; + basereq->chain.first = NULL; + basereq->chain.last = NULL; return 0; } @@ -441,6 +441,7 @@ static int mv_cesa_ablkcipher_req_init(struct ablkcipher_request *req, static int mv_cesa_des_op(struct ablkcipher_request *req, struct mv_cesa_op_ctx *tmpl) { + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); struct mv_cesa_des_ctx *ctx = crypto_tfm_ctx(req->base.tfm); int ret; @@ -453,7 +454,7 @@ static int mv_cesa_des_op(struct ablkcipher_request *req, if (ret) return ret; - ret = mv_cesa_queue_req(&req->base); + ret = mv_cesa_queue_req(&req->base, &creq->base); if (mv_cesa_req_needs_cleanup(&req->base, ret)) mv_cesa_ablkcipher_cleanup(req); @@ -561,6 +562,7 @@ struct crypto_alg mv_cesa_cbc_des_alg = { static int mv_cesa_des3_op(struct ablkcipher_request *req, struct mv_cesa_op_ctx *tmpl) { + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); struct mv_cesa_des3_ctx *ctx = crypto_tfm_ctx(req->base.tfm); int ret; @@ -573,7 +575,7 @@ static int mv_cesa_des3_op(struct ablkcipher_request *req, if (ret) return ret; - ret = mv_cesa_queue_req(&req->base); + ret = mv_cesa_queue_req(&req->base, &creq->base); if (mv_cesa_req_needs_cleanup(&req->base, ret)) mv_cesa_ablkcipher_cleanup(req); @@ -687,6 +689,7 @@ struct crypto_alg mv_cesa_cbc_des3_ede_alg = { static int mv_cesa_aes_op(struct ablkcipher_request *req, struct mv_cesa_op_ctx *tmpl) { + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); struct mv_cesa_aes_ctx *ctx = crypto_tfm_ctx(req->base.tfm); int ret, i; u32 *key; @@ -715,7 +718,7 @@ static int mv_cesa_aes_op(struct ablkcipher_request *req, if (ret) return ret; - ret = mv_cesa_queue_req(&req->base); + ret = mv_cesa_queue_req(&req->base, &creq->base); if (mv_cesa_req_needs_cleanup(&req->base, ret)) mv_cesa_ablkcipher_cleanup(req); diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c index 204c29358a7d3..03aaa39d6fd60 100644 --- a/drivers/crypto/marvell/hash.c +++ b/drivers/crypto/marvell/hash.c @@ -103,14 +103,14 @@ static inline void mv_cesa_ahash_dma_cleanup(struct ahash_request *req) dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_TO_DEVICE); mv_cesa_ahash_dma_free_cache(&creq->req.dma); - mv_cesa_dma_cleanup(&creq->req.dma.base); + mv_cesa_dma_cleanup(&creq->base); } static inline void mv_cesa_ahash_cleanup(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); - if (creq->req.base.type == CESA_DMA_REQ) + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) mv_cesa_ahash_dma_cleanup(req); } @@ -118,7 +118,7 @@ static void mv_cesa_ahash_last_cleanup(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); - if (creq->req.base.type == CESA_DMA_REQ) + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) mv_cesa_ahash_dma_last_cleanup(req); } @@ -157,7 +157,7 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); struct mv_cesa_ahash_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = sreq->base.engine; + struct mv_cesa_engine *engine = creq->base.engine; struct mv_cesa_op_ctx *op; unsigned int new_cache_ptr = 0; u32 frag_mode; @@ -256,16 +256,16 @@ static int mv_cesa_ahash_std_process(struct ahash_request *req, u32 status) static inline void mv_cesa_ahash_dma_prepare(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); - struct mv_cesa_tdma_req *dreq = &creq->req.dma.base; + struct mv_cesa_req *basereq = &creq->base; - mv_cesa_dma_prepare(dreq, dreq->base.engine); + mv_cesa_dma_prepare(basereq, basereq->engine); } static void mv_cesa_ahash_std_prepare(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); struct mv_cesa_ahash_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = sreq->base.engine; + struct mv_cesa_engine *engine = creq->base.engine; sreq->offset = 0; mv_cesa_adjust_op(engine, &creq->op_tmpl); @@ -277,8 +277,8 @@ static void mv_cesa_ahash_step(struct crypto_async_request *req) struct ahash_request *ahashreq = ahash_request_cast(req); struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); - if (creq->req.base.type == CESA_DMA_REQ) - mv_cesa_dma_step(&creq->req.dma.base); + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) + mv_cesa_dma_step(&creq->base); else mv_cesa_ahash_std_step(ahashreq); } @@ -287,12 +287,12 @@ static int mv_cesa_ahash_process(struct crypto_async_request *req, u32 status) { struct ahash_request *ahashreq = ahash_request_cast(req); struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); - struct mv_cesa_engine *engine = creq->req.base.engine; + struct mv_cesa_engine *engine = creq->base.engine; unsigned int digsize; int ret, i; - if (creq->req.base.type == CESA_DMA_REQ) - ret = mv_cesa_dma_process(&creq->req.dma.base, status); + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) + ret = mv_cesa_dma_process(&creq->base, status); else ret = mv_cesa_ahash_std_process(ahashreq, status); @@ -338,9 +338,9 @@ static void mv_cesa_ahash_prepare(struct crypto_async_request *req, unsigned int digsize; int i; - creq->req.base.engine = engine; + creq->base.engine = engine; - if (creq->req.base.type == CESA_DMA_REQ) + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) mv_cesa_ahash_dma_prepare(ahashreq); else mv_cesa_ahash_std_prepare(ahashreq); @@ -555,15 +555,14 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; - struct mv_cesa_ahash_dma_req *ahashdreq = &creq->req.dma; - struct mv_cesa_tdma_req *dreq = &ahashdreq->base; + struct mv_cesa_req *basereq = &creq->base; struct mv_cesa_ahash_dma_iter iter; struct mv_cesa_op_ctx *op = NULL; unsigned int frag_len; int ret; - dreq->chain.first = NULL; - dreq->chain.last = NULL; + basereq->chain.first = NULL; + basereq->chain.last = NULL; if (creq->src_nents) { ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents, @@ -574,14 +573,14 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) } } - mv_cesa_tdma_desc_iter_init(&dreq->chain); + mv_cesa_tdma_desc_iter_init(&basereq->chain); mv_cesa_ahash_req_iter_init(&iter, req); /* * Add the cache (left-over data from a previous block) first. * This will never overflow the SRAM size. */ - ret = mv_cesa_ahash_dma_add_cache(&dreq->chain, &iter, creq, flags); + ret = mv_cesa_ahash_dma_add_cache(&basereq->chain, &iter, creq, flags); if (ret) goto err_free_tdma; @@ -592,7 +591,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) * data. We intentionally do not add the final op block. */ while (true) { - ret = mv_cesa_dma_add_op_transfers(&dreq->chain, + ret = mv_cesa_dma_add_op_transfers(&basereq->chain, &iter.base, &iter.src, flags); if (ret) @@ -603,7 +602,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) if (!mv_cesa_ahash_req_iter_next_op(&iter)) break; - op = mv_cesa_dma_add_frag(&dreq->chain, &creq->op_tmpl, + op = mv_cesa_dma_add_frag(&basereq->chain, &creq->op_tmpl, frag_len, flags); if (IS_ERR(op)) { ret = PTR_ERR(op); @@ -621,10 +620,10 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) * operation, which depends whether this is the final request. */ if (creq->last_req) - op = mv_cesa_ahash_dma_last_req(&dreq->chain, &iter, creq, + op = mv_cesa_ahash_dma_last_req(&basereq->chain, &iter, creq, frag_len, flags); else if (frag_len) - op = mv_cesa_dma_add_frag(&dreq->chain, &creq->op_tmpl, + op = mv_cesa_dma_add_frag(&basereq->chain, &creq->op_tmpl, frag_len, flags); if (IS_ERR(op)) { @@ -634,7 +633,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) if (op) { /* Add dummy desc to wait for crypto operation end */ - ret = mv_cesa_dma_add_dummy_end(&dreq->chain, flags); + ret = mv_cesa_dma_add_dummy_end(&basereq->chain, flags); if (ret) goto err_free_tdma; } @@ -648,7 +647,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) return 0; err_free_tdma: - mv_cesa_dma_cleanup(dreq); + mv_cesa_dma_cleanup(basereq); dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_TO_DEVICE); err: @@ -662,11 +661,6 @@ static int mv_cesa_ahash_req_init(struct ahash_request *req, bool *cached) struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); int ret; - if (cesa_dev->caps->has_tdma) - creq->req.base.type = CESA_DMA_REQ; - else - creq->req.base.type = CESA_STD_REQ; - creq->src_nents = sg_nents_for_len(req->src, req->nbytes); if (creq->src_nents < 0) { dev_err(cesa_dev->dev, "Invalid number of src SG"); @@ -680,7 +674,7 @@ static int mv_cesa_ahash_req_init(struct ahash_request *req, bool *cached) if (*cached) return 0; - if (creq->req.base.type == CESA_DMA_REQ) + if (cesa_dev->caps->has_tdma) ret = mv_cesa_ahash_dma_req_init(req); return ret; @@ -700,7 +694,7 @@ static int mv_cesa_ahash_update(struct ahash_request *req) if (cached) return 0; - ret = mv_cesa_queue_req(&req->base); + ret = mv_cesa_queue_req(&req->base, &creq->base); if (mv_cesa_req_needs_cleanup(&req->base, ret)) mv_cesa_ahash_cleanup(req); @@ -725,7 +719,7 @@ static int mv_cesa_ahash_final(struct ahash_request *req) if (cached) return 0; - ret = mv_cesa_queue_req(&req->base); + ret = mv_cesa_queue_req(&req->base, &creq->base); if (mv_cesa_req_needs_cleanup(&req->base, ret)) mv_cesa_ahash_cleanup(req); @@ -750,7 +744,7 @@ static int mv_cesa_ahash_finup(struct ahash_request *req) if (cached) return 0; - ret = mv_cesa_queue_req(&req->base); + ret = mv_cesa_queue_req(&req->base, &creq->base); if (mv_cesa_req_needs_cleanup(&req->base, ret)) mv_cesa_ahash_cleanup(req); diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c index dc07335386067..f9b8d2cfa1bce 100644 --- a/drivers/crypto/marvell/tdma.c +++ b/drivers/crypto/marvell/tdma.c @@ -37,9 +37,9 @@ bool mv_cesa_req_dma_iter_next_transfer(struct mv_cesa_dma_iter *iter, return true; } -void mv_cesa_dma_step(struct mv_cesa_tdma_req *dreq) +void mv_cesa_dma_step(struct mv_cesa_req *dreq) { - struct mv_cesa_engine *engine = dreq->base.engine; + struct mv_cesa_engine *engine = dreq->engine; writel_relaxed(0, engine->regs + CESA_SA_CFG); @@ -58,7 +58,7 @@ void mv_cesa_dma_step(struct mv_cesa_tdma_req *dreq) writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); } -void mv_cesa_dma_cleanup(struct mv_cesa_tdma_req *dreq) +void mv_cesa_dma_cleanup(struct mv_cesa_req *dreq) { struct mv_cesa_tdma_desc *tdma; @@ -82,7 +82,7 @@ void mv_cesa_dma_cleanup(struct mv_cesa_tdma_req *dreq) dreq->chain.last = NULL; } -void mv_cesa_dma_prepare(struct mv_cesa_tdma_req *dreq, +void mv_cesa_dma_prepare(struct mv_cesa_req *dreq, struct mv_cesa_engine *engine) { struct mv_cesa_tdma_desc *tdma; From 1bf6682cb31dff64c11564fe0c12ddd2cda23626 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 21 Jun 2016 10:08:36 +0200 Subject: [PATCH 066/180] crypto: marvell - Add a complete operation for async requests So far, the 'process' operation was used to check if the current request was correctly handled by the engine, if it was the case it copied information from the SRAM to the main memory. Now, we split this operation. We keep the 'process' operation, which still checks if the request was correctly handled by the engine or not, then we add a new operation for completion. The 'complete' method copies the content of the SRAM to memory. This will soon become useful if we want to call the process and the complete operations from different locations depending on the type of the request (different cleanup logic). Signed-off-by: Romain Perier Acked-by: Boris Brezillon Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cesa.c | 1 + drivers/crypto/marvell/cesa.h | 3 +++ drivers/crypto/marvell/cipher.c | 28 +++++++++++++++++++++++----- drivers/crypto/marvell/hash.c | 22 ++++++++++++---------- 4 files changed, 39 insertions(+), 15 deletions(-) diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c index f407bacdb0218..40d4add199476 100644 --- a/drivers/crypto/marvell/cesa.c +++ b/drivers/crypto/marvell/cesa.c @@ -98,6 +98,7 @@ static irqreturn_t mv_cesa_int(int irq, void *priv) engine->req = NULL; mv_cesa_dequeue_req_unlocked(engine); spin_unlock_bh(&engine->lock); + ctx->ops->complete(req); ctx->ops->cleanup(req); local_bh_disable(); req->complete(req, res); diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h index e67e3f17eb014..d7493351f14b4 100644 --- a/drivers/crypto/marvell/cesa.h +++ b/drivers/crypto/marvell/cesa.h @@ -456,6 +456,8 @@ struct mv_cesa_engine { * code) * @step: launch the crypto operation on the next chunk * @cleanup: cleanup the crypto request (release associated data) + * @complete: complete the request, i.e copy result or context from sram when + * needed. */ struct mv_cesa_req_ops { void (*prepare)(struct crypto_async_request *req, @@ -463,6 +465,7 @@ struct mv_cesa_req_ops { int (*process)(struct crypto_async_request *req, u32 status); void (*step)(struct crypto_async_request *req); void (*cleanup)(struct crypto_async_request *req); + void (*complete)(struct crypto_async_request *req); }; /** diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c index 7699528957f48..f994469feeb9e 100644 --- a/drivers/crypto/marvell/cipher.c +++ b/drivers/crypto/marvell/cipher.c @@ -118,7 +118,6 @@ static int mv_cesa_ablkcipher_std_process(struct ablkcipher_request *req, struct mv_cesa_ablkcipher_std_req *sreq = &creq->std; struct mv_cesa_engine *engine = creq->base.engine; size_t len; - unsigned int ivsize; len = sg_pcopy_from_buffer(req->dst, creq->dst_nents, engine->sram + CESA_SA_DATA_SRAM_OFFSET, @@ -128,10 +127,6 @@ static int mv_cesa_ablkcipher_std_process(struct ablkcipher_request *req, if (sreq->offset < req->nbytes) return -EINPROGRESS; - ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(req)); - memcpy_fromio(req->info, - engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET, ivsize); - return 0; } @@ -211,11 +206,34 @@ mv_cesa_ablkcipher_req_cleanup(struct crypto_async_request *req) mv_cesa_ablkcipher_cleanup(ablkreq); } +static void +mv_cesa_ablkcipher_complete(struct crypto_async_request *req) +{ + struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq); + struct mv_cesa_engine *engine = creq->base.engine; + unsigned int ivsize; + + ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(ablkreq)); + + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) { + struct mv_cesa_req *basereq; + + basereq = &creq->base; + memcpy(ablkreq->info, basereq->chain.last->data, ivsize); + } else { + memcpy_fromio(ablkreq->info, + engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET, + ivsize); + } +} + static const struct mv_cesa_req_ops mv_cesa_ablkcipher_req_ops = { .step = mv_cesa_ablkcipher_step, .process = mv_cesa_ablkcipher_process, .prepare = mv_cesa_ablkcipher_prepare, .cleanup = mv_cesa_ablkcipher_req_cleanup, + .complete = mv_cesa_ablkcipher_complete, }; static int mv_cesa_ablkcipher_cra_init(struct crypto_tfm *tfm) diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c index 03aaa39d6fd60..faa677a3e8813 100644 --- a/drivers/crypto/marvell/hash.c +++ b/drivers/crypto/marvell/hash.c @@ -287,17 +287,20 @@ static int mv_cesa_ahash_process(struct crypto_async_request *req, u32 status) { struct ahash_request *ahashreq = ahash_request_cast(req); struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); - struct mv_cesa_engine *engine = creq->base.engine; - unsigned int digsize; - int ret, i; if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) - ret = mv_cesa_dma_process(&creq->base, status); - else - ret = mv_cesa_ahash_std_process(ahashreq, status); + return mv_cesa_dma_process(&creq->base, status); - if (ret == -EINPROGRESS) - return ret; + return mv_cesa_ahash_std_process(ahashreq, status); +} + +static void mv_cesa_ahash_complete(struct crypto_async_request *req) +{ + struct ahash_request *ahashreq = ahash_request_cast(req); + struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); + struct mv_cesa_engine *engine = creq->base.engine; + unsigned int digsize; + int i; digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq)); for (i = 0; i < digsize / 4; i++) @@ -326,8 +329,6 @@ static int mv_cesa_ahash_process(struct crypto_async_request *req, u32 status) result[i] = cpu_to_be32(creq->state[i]); } } - - return ret; } static void mv_cesa_ahash_prepare(struct crypto_async_request *req, @@ -366,6 +367,7 @@ static const struct mv_cesa_req_ops mv_cesa_ahash_req_ops = { .process = mv_cesa_ahash_process, .prepare = mv_cesa_ahash_prepare, .cleanup = mv_cesa_ahash_req_cleanup, + .complete = mv_cesa_ahash_complete, }; static int mv_cesa_ahash_init(struct ahash_request *req, From 2786cee8e50bb4b4303dc22665f391b72318fa84 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 21 Jun 2016 10:08:37 +0200 Subject: [PATCH 067/180] crypto: marvell - Move SRAM I/O operations to step functions Currently the crypto requests were sent to engines sequentially. This commit moves the SRAM I/O operations from the prepare to the step functions. It provides flexibility for future works and allow to prepare a request while the engine is running. Signed-off-by: Romain Perier Acked-by: Boris Brezillon Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cipher.c | 6 +++--- drivers/crypto/marvell/hash.c | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c index f994469feeb9e..1e9c722b1bb0d 100644 --- a/drivers/crypto/marvell/cipher.c +++ b/drivers/crypto/marvell/cipher.c @@ -89,6 +89,9 @@ static void mv_cesa_ablkcipher_std_step(struct ablkcipher_request *req) size_t len = min_t(size_t, req->nbytes - sreq->offset, CESA_SA_SRAM_PAYLOAD_SIZE); + mv_cesa_adjust_op(engine, &sreq->op); + memcpy_toio(engine->sram, &sreq->op, sizeof(sreq->op)); + len = sg_pcopy_to_buffer(req->src, creq->src_nents, engine->sram + CESA_SA_DATA_SRAM_OFFSET, len, sreq->offset); @@ -177,12 +180,9 @@ mv_cesa_ablkcipher_std_prepare(struct ablkcipher_request *req) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); struct mv_cesa_ablkcipher_std_req *sreq = &creq->std; - struct mv_cesa_engine *engine = creq->base.engine; sreq->size = 0; sreq->offset = 0; - mv_cesa_adjust_op(engine, &sreq->op); - memcpy_toio(engine->sram, &sreq->op, sizeof(sreq->op)); } static inline void mv_cesa_ablkcipher_prepare(struct crypto_async_request *req, diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c index faa677a3e8813..4146757d34eb0 100644 --- a/drivers/crypto/marvell/hash.c +++ b/drivers/crypto/marvell/hash.c @@ -162,6 +162,15 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req) unsigned int new_cache_ptr = 0; u32 frag_mode; size_t len; + unsigned int digsize; + int i; + + mv_cesa_adjust_op(engine, &creq->op_tmpl); + memcpy_toio(engine->sram, &creq->op_tmpl, sizeof(creq->op_tmpl)); + + digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(req)); + for (i = 0; i < digsize / 4; i++) + writel_relaxed(creq->state[i], engine->regs + CESA_IVDIG(i)); if (creq->cache_ptr) memcpy_toio(engine->sram + CESA_SA_DATA_SRAM_OFFSET, @@ -265,11 +274,8 @@ static void mv_cesa_ahash_std_prepare(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); struct mv_cesa_ahash_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = creq->base.engine; sreq->offset = 0; - mv_cesa_adjust_op(engine, &creq->op_tmpl); - memcpy_toio(engine->sram, &creq->op_tmpl, sizeof(creq->op_tmpl)); } static void mv_cesa_ahash_step(struct crypto_async_request *req) @@ -336,8 +342,6 @@ static void mv_cesa_ahash_prepare(struct crypto_async_request *req, { struct ahash_request *ahashreq = ahash_request_cast(req); struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); - unsigned int digsize; - int i; creq->base.engine = engine; @@ -345,10 +349,6 @@ static void mv_cesa_ahash_prepare(struct crypto_async_request *req, mv_cesa_ahash_dma_prepare(ahashreq); else mv_cesa_ahash_std_prepare(ahashreq); - - digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq)); - for (i = 0; i < digsize / 4; i++) - writel_relaxed(creq->state[i], engine->regs + CESA_IVDIG(i)); } static void mv_cesa_ahash_req_cleanup(struct crypto_async_request *req) From bf8f91e711926c1fb57629338e30164ecfba9700 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 21 Jun 2016 10:08:38 +0200 Subject: [PATCH 068/180] crypto: marvell - Add load balancing between engines This commits adds support for fine grained load balancing on multi-engine IPs. The engine is pre-selected based on its current load and on the weight of the crypto request that is about to be processed. The global crypto queue is also moved to each engine. These changes are required to allow chaining crypto requests at the DMA level. By using a crypto queue per engine, we make sure that we keep the state of the tdma chain synchronized with the crypto queue. We also reduce contention on 'cesa_dev->lock' and improve parallelism. Signed-off-by: Romain Perier Acked-by: Boris Brezillon Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cesa.c | 34 +++++++++----------- drivers/crypto/marvell/cesa.h | 29 ++++++++++++++--- drivers/crypto/marvell/cipher.c | 57 +++++++++++++++------------------ drivers/crypto/marvell/hash.c | 50 +++++++++++------------------ 4 files changed, 84 insertions(+), 86 deletions(-) diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c index 40d4add199476..26624f176c397 100644 --- a/drivers/crypto/marvell/cesa.c +++ b/drivers/crypto/marvell/cesa.c @@ -40,16 +40,14 @@ MODULE_PARM_DESC(allhwsupport, "Enable support for all hardware (even it if over struct mv_cesa_dev *cesa_dev; -static void mv_cesa_dequeue_req_unlocked(struct mv_cesa_engine *engine) +static void mv_cesa_dequeue_req_locked(struct mv_cesa_engine *engine) { struct crypto_async_request *req, *backlog; struct mv_cesa_ctx *ctx; - spin_lock_bh(&cesa_dev->lock); - backlog = crypto_get_backlog(&cesa_dev->queue); - req = crypto_dequeue_request(&cesa_dev->queue); + backlog = crypto_get_backlog(&engine->queue); + req = crypto_dequeue_request(&engine->queue); engine->req = req; - spin_unlock_bh(&cesa_dev->lock); if (!req) return; @@ -58,7 +56,6 @@ static void mv_cesa_dequeue_req_unlocked(struct mv_cesa_engine *engine) backlog->complete(backlog, -EINPROGRESS); ctx = crypto_tfm_ctx(req->tfm); - ctx->ops->prepare(req, engine); ctx->ops->step(req); } @@ -96,7 +93,7 @@ static irqreturn_t mv_cesa_int(int irq, void *priv) if (res != -EINPROGRESS) { spin_lock_bh(&engine->lock); engine->req = NULL; - mv_cesa_dequeue_req_unlocked(engine); + mv_cesa_dequeue_req_locked(engine); spin_unlock_bh(&engine->lock); ctx->ops->complete(req); ctx->ops->cleanup(req); @@ -116,21 +113,19 @@ int mv_cesa_queue_req(struct crypto_async_request *req, struct mv_cesa_req *creq) { int ret; - int i; + struct mv_cesa_engine *engine = creq->engine; - spin_lock_bh(&cesa_dev->lock); - ret = crypto_enqueue_request(&cesa_dev->queue, req); - spin_unlock_bh(&cesa_dev->lock); + spin_lock_bh(&engine->lock); + ret = crypto_enqueue_request(&engine->queue, req); + spin_unlock_bh(&engine->lock); if (ret != -EINPROGRESS) return ret; - for (i = 0; i < cesa_dev->caps->nengines; i++) { - spin_lock_bh(&cesa_dev->engines[i].lock); - if (!cesa_dev->engines[i].req) - mv_cesa_dequeue_req_unlocked(&cesa_dev->engines[i]); - spin_unlock_bh(&cesa_dev->engines[i].lock); - } + spin_lock_bh(&engine->lock); + if (!engine->req) + mv_cesa_dequeue_req_locked(engine); + spin_unlock_bh(&engine->lock); return -EINPROGRESS; } @@ -425,7 +420,7 @@ static int mv_cesa_probe(struct platform_device *pdev) return -ENOMEM; spin_lock_init(&cesa->lock); - crypto_init_queue(&cesa->queue, CESA_CRYPTO_DEFAULT_MAX_QLEN); + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs"); cesa->regs = devm_ioremap_resource(dev, res); if (IS_ERR(cesa->regs)) @@ -498,6 +493,9 @@ static int mv_cesa_probe(struct platform_device *pdev) engine); if (ret) goto err_cleanup; + + crypto_init_queue(&engine->queue, CESA_CRYPTO_DEFAULT_MAX_QLEN); + atomic_set(&engine->load, 0); } cesa_dev = cesa; diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h index d7493351f14b4..8d600682ca470 100644 --- a/drivers/crypto/marvell/cesa.h +++ b/drivers/crypto/marvell/cesa.h @@ -400,7 +400,6 @@ struct mv_cesa_dev_dma { * @regs: device registers * @sram_size: usable SRAM size * @lock: device lock - * @queue: crypto request queue * @engines: array of engines * @dma: dma pools * @@ -412,7 +411,6 @@ struct mv_cesa_dev { struct device *dev; unsigned int sram_size; spinlock_t lock; - struct crypto_queue queue; struct mv_cesa_engine *engines; struct mv_cesa_dev_dma *dma; }; @@ -431,6 +429,8 @@ struct mv_cesa_dev { * @int_mask: interrupt mask cache * @pool: memory pool pointing to the memory region reserved in * SRAM + * @queue: fifo of the pending crypto requests + * @load: engine load counter, useful for load balancing * * Structure storing CESA engine information. */ @@ -446,11 +446,12 @@ struct mv_cesa_engine { size_t max_req_len; u32 int_mask; struct gen_pool *pool; + struct crypto_queue queue; + atomic_t load; }; /** * struct mv_cesa_req_ops - CESA request operations - * @prepare: prepare a request to be executed on the specified engine * @process: process a request chunk result (should return 0 if the * operation, -EINPROGRESS if it needs more steps or an error * code) @@ -460,8 +461,6 @@ struct mv_cesa_engine { * needed. */ struct mv_cesa_req_ops { - void (*prepare)(struct crypto_async_request *req, - struct mv_cesa_engine *engine); int (*process)(struct crypto_async_request *req, u32 status); void (*step)(struct crypto_async_request *req); void (*cleanup)(struct crypto_async_request *req); @@ -690,6 +689,26 @@ static inline bool mv_cesa_mac_op_is_first_frag(const struct mv_cesa_op_ctx *op) int mv_cesa_queue_req(struct crypto_async_request *req, struct mv_cesa_req *creq); +static inline struct mv_cesa_engine *mv_cesa_select_engine(int weight) +{ + int i; + u32 min_load = U32_MAX; + struct mv_cesa_engine *selected = NULL; + + for (i = 0; i < cesa_dev->caps->nengines; i++) { + struct mv_cesa_engine *engine = cesa_dev->engines + i; + u32 load = atomic_read(&engine->load); + if (load < min_load) { + min_load = load; + selected = engine; + } + } + + atomic_add(weight, &selected->load); + + return selected; +} + /* * Helper function that indicates whether a crypto request needs to be * cleaned up or not after being enqueued using mv_cesa_queue_req(). diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c index 1e9c722b1bb0d..3eb607cda2691 100644 --- a/drivers/crypto/marvell/cipher.c +++ b/drivers/crypto/marvell/cipher.c @@ -214,6 +214,7 @@ mv_cesa_ablkcipher_complete(struct crypto_async_request *req) struct mv_cesa_engine *engine = creq->base.engine; unsigned int ivsize; + atomic_sub(ablkreq->nbytes, &engine->load); ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(ablkreq)); if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) { @@ -231,7 +232,6 @@ mv_cesa_ablkcipher_complete(struct crypto_async_request *req) static const struct mv_cesa_req_ops mv_cesa_ablkcipher_req_ops = { .step = mv_cesa_ablkcipher_step, .process = mv_cesa_ablkcipher_process, - .prepare = mv_cesa_ablkcipher_prepare, .cleanup = mv_cesa_ablkcipher_req_cleanup, .complete = mv_cesa_ablkcipher_complete, }; @@ -456,29 +456,41 @@ static int mv_cesa_ablkcipher_req_init(struct ablkcipher_request *req, return ret; } -static int mv_cesa_des_op(struct ablkcipher_request *req, - struct mv_cesa_op_ctx *tmpl) +static int mv_cesa_ablkcipher_queue_req(struct ablkcipher_request *req, + struct mv_cesa_op_ctx *tmpl) { - struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - struct mv_cesa_des_ctx *ctx = crypto_tfm_ctx(req->base.tfm); int ret; - - mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_DES, - CESA_SA_DESC_CFG_CRYPTM_MSK); - - memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES_KEY_SIZE); + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); + struct mv_cesa_engine *engine; ret = mv_cesa_ablkcipher_req_init(req, tmpl); if (ret) return ret; + engine = mv_cesa_select_engine(req->nbytes); + mv_cesa_ablkcipher_prepare(&req->base, engine); + ret = mv_cesa_queue_req(&req->base, &creq->base); + if (mv_cesa_req_needs_cleanup(&req->base, ret)) mv_cesa_ablkcipher_cleanup(req); return ret; } +static int mv_cesa_des_op(struct ablkcipher_request *req, + struct mv_cesa_op_ctx *tmpl) +{ + struct mv_cesa_des_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + + mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_DES, + CESA_SA_DESC_CFG_CRYPTM_MSK); + + memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES_KEY_SIZE); + + return mv_cesa_ablkcipher_queue_req(req, tmpl); +} + static int mv_cesa_ecb_des_encrypt(struct ablkcipher_request *req) { struct mv_cesa_op_ctx tmpl; @@ -580,24 +592,14 @@ struct crypto_alg mv_cesa_cbc_des_alg = { static int mv_cesa_des3_op(struct ablkcipher_request *req, struct mv_cesa_op_ctx *tmpl) { - struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); struct mv_cesa_des3_ctx *ctx = crypto_tfm_ctx(req->base.tfm); - int ret; mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_3DES, CESA_SA_DESC_CFG_CRYPTM_MSK); memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES3_EDE_KEY_SIZE); - ret = mv_cesa_ablkcipher_req_init(req, tmpl); - if (ret) - return ret; - - ret = mv_cesa_queue_req(&req->base, &creq->base); - if (mv_cesa_req_needs_cleanup(&req->base, ret)) - mv_cesa_ablkcipher_cleanup(req); - - return ret; + return mv_cesa_ablkcipher_queue_req(req, tmpl); } static int mv_cesa_ecb_des3_ede_encrypt(struct ablkcipher_request *req) @@ -707,9 +709,8 @@ struct crypto_alg mv_cesa_cbc_des3_ede_alg = { static int mv_cesa_aes_op(struct ablkcipher_request *req, struct mv_cesa_op_ctx *tmpl) { - struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); struct mv_cesa_aes_ctx *ctx = crypto_tfm_ctx(req->base.tfm); - int ret, i; + int i; u32 *key; u32 cfg; @@ -732,15 +733,7 @@ static int mv_cesa_aes_op(struct ablkcipher_request *req, CESA_SA_DESC_CFG_CRYPTM_MSK | CESA_SA_DESC_CFG_AES_LEN_MSK); - ret = mv_cesa_ablkcipher_req_init(req, tmpl); - if (ret) - return ret; - - ret = mv_cesa_queue_req(&req->base, &creq->base); - if (mv_cesa_req_needs_cleanup(&req->base, ret)) - mv_cesa_ablkcipher_cleanup(req); - - return ret; + return mv_cesa_ablkcipher_queue_req(req, tmpl); } static int mv_cesa_ecb_aes_encrypt(struct ablkcipher_request *req) diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c index 4146757d34eb0..638b2b7eae997 100644 --- a/drivers/crypto/marvell/hash.c +++ b/drivers/crypto/marvell/hash.c @@ -335,6 +335,8 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req) result[i] = cpu_to_be32(creq->state[i]); } } + + atomic_sub(ahashreq->nbytes, &engine->load); } static void mv_cesa_ahash_prepare(struct crypto_async_request *req, @@ -365,7 +367,6 @@ static void mv_cesa_ahash_req_cleanup(struct crypto_async_request *req) static const struct mv_cesa_req_ops mv_cesa_ahash_req_ops = { .step = mv_cesa_ahash_step, .process = mv_cesa_ahash_process, - .prepare = mv_cesa_ahash_prepare, .cleanup = mv_cesa_ahash_req_cleanup, .complete = mv_cesa_ahash_complete, }; @@ -682,13 +683,13 @@ static int mv_cesa_ahash_req_init(struct ahash_request *req, bool *cached) return ret; } -static int mv_cesa_ahash_update(struct ahash_request *req) +static int mv_cesa_ahash_queue_req(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_engine *engine; bool cached = false; int ret; - creq->len += req->nbytes; ret = mv_cesa_ahash_req_init(req, &cached); if (ret) return ret; @@ -696,61 +697,48 @@ static int mv_cesa_ahash_update(struct ahash_request *req) if (cached) return 0; + engine = mv_cesa_select_engine(req->nbytes); + mv_cesa_ahash_prepare(&req->base, engine); + ret = mv_cesa_queue_req(&req->base, &creq->base); + if (mv_cesa_req_needs_cleanup(&req->base, ret)) mv_cesa_ahash_cleanup(req); return ret; } +static int mv_cesa_ahash_update(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + + creq->len += req->nbytes; + + return mv_cesa_ahash_queue_req(req); +} + static int mv_cesa_ahash_final(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); struct mv_cesa_op_ctx *tmpl = &creq->op_tmpl; - bool cached = false; - int ret; mv_cesa_set_mac_op_total_len(tmpl, creq->len); creq->last_req = true; req->nbytes = 0; - ret = mv_cesa_ahash_req_init(req, &cached); - if (ret) - return ret; - - if (cached) - return 0; - - ret = mv_cesa_queue_req(&req->base, &creq->base); - if (mv_cesa_req_needs_cleanup(&req->base, ret)) - mv_cesa_ahash_cleanup(req); - - return ret; + return mv_cesa_ahash_queue_req(req); } static int mv_cesa_ahash_finup(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); struct mv_cesa_op_ctx *tmpl = &creq->op_tmpl; - bool cached = false; - int ret; creq->len += req->nbytes; mv_cesa_set_mac_op_total_len(tmpl, creq->len); creq->last_req = true; - ret = mv_cesa_ahash_req_init(req, &cached); - if (ret) - return ret; - - if (cached) - return 0; - - ret = mv_cesa_queue_req(&req->base, &creq->base); - if (mv_cesa_req_needs_cleanup(&req->base, ret)) - mv_cesa_ahash_cleanup(req); - - return ret; + return mv_cesa_ahash_queue_req(req); } static int mv_cesa_ahash_export(struct ahash_request *req, void *hash, From 85030c5168f1df03a164d47254cc785331f1dfe2 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 21 Jun 2016 10:08:39 +0200 Subject: [PATCH 069/180] crypto: marvell - Add support for chaining crypto requests in TDMA mode The Cryptographic Engines and Security Accelerators (CESA) supports the Multi-Packet Chain Mode. With this mode enabled, multiple tdma requests can be chained and processed by the hardware without software intervention. This mode was already activated, however the crypto requests were not chained together. By doing so, we reduce significantly the number of IRQs. Instead of being interrupted at the end of each crypto request, we are interrupted at the end of the last cryptographic request processed by the engine. This commits re-factorizes the code, changes the code architecture and adds the required data structures to chain cryptographic requests together before sending them to an engine (stopped or possibly already running). Signed-off-by: Romain Perier Acked-by: Boris Brezillon Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cesa.c | 115 +++++++++++++++++++++++++------- drivers/crypto/marvell/cesa.h | 39 ++++++++++- drivers/crypto/marvell/cipher.c | 2 +- drivers/crypto/marvell/hash.c | 6 ++ drivers/crypto/marvell/tdma.c | 86 ++++++++++++++++++++++++ 5 files changed, 221 insertions(+), 27 deletions(-) diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c index 26624f176c397..218c497f2ddbd 100644 --- a/drivers/crypto/marvell/cesa.c +++ b/drivers/crypto/marvell/cesa.c @@ -40,14 +40,33 @@ MODULE_PARM_DESC(allhwsupport, "Enable support for all hardware (even it if over struct mv_cesa_dev *cesa_dev; -static void mv_cesa_dequeue_req_locked(struct mv_cesa_engine *engine) +struct crypto_async_request * +mv_cesa_dequeue_req_locked(struct mv_cesa_engine *engine, + struct crypto_async_request **backlog) { - struct crypto_async_request *req, *backlog; - struct mv_cesa_ctx *ctx; + struct crypto_async_request *req; - backlog = crypto_get_backlog(&engine->queue); + *backlog = crypto_get_backlog(&engine->queue); req = crypto_dequeue_request(&engine->queue); - engine->req = req; + + if (!req) + return NULL; + + return req; +} + +static void mv_cesa_rearm_engine(struct mv_cesa_engine *engine) +{ + struct crypto_async_request *req = NULL, *backlog = NULL; + struct mv_cesa_ctx *ctx; + + + spin_lock_bh(&engine->lock); + if (!engine->req) { + req = mv_cesa_dequeue_req_locked(engine, &backlog); + engine->req = req; + } + spin_unlock_bh(&engine->lock); if (!req) return; @@ -57,6 +76,46 @@ static void mv_cesa_dequeue_req_locked(struct mv_cesa_engine *engine) ctx = crypto_tfm_ctx(req->tfm); ctx->ops->step(req); + + return; +} + +static int mv_cesa_std_process(struct mv_cesa_engine *engine, u32 status) +{ + struct crypto_async_request *req; + struct mv_cesa_ctx *ctx; + int res; + + req = engine->req; + ctx = crypto_tfm_ctx(req->tfm); + res = ctx->ops->process(req, status); + + if (res == 0) { + ctx->ops->complete(req); + mv_cesa_engine_enqueue_complete_request(engine, req); + } else if (res == -EINPROGRESS) { + ctx->ops->step(req); + } + + return res; +} + +static int mv_cesa_int_process(struct mv_cesa_engine *engine, u32 status) +{ + if (engine->chain.first && engine->chain.last) + return mv_cesa_tdma_process(engine, status); + + return mv_cesa_std_process(engine, status); +} + +static inline void +mv_cesa_complete_req(struct mv_cesa_ctx *ctx, struct crypto_async_request *req, + int res) +{ + ctx->ops->cleanup(req); + local_bh_disable(); + req->complete(req, res); + local_bh_enable(); } static irqreturn_t mv_cesa_int(int irq, void *priv) @@ -83,26 +142,31 @@ static irqreturn_t mv_cesa_int(int irq, void *priv) writel(~status, engine->regs + CESA_SA_FPGA_INT_STATUS); writel(~status, engine->regs + CESA_SA_INT_STATUS); + /* Process fetched requests */ + res = mv_cesa_int_process(engine, status & mask); ret = IRQ_HANDLED; + spin_lock_bh(&engine->lock); req = engine->req; + if (res != -EINPROGRESS) + engine->req = NULL; spin_unlock_bh(&engine->lock); - if (req) { - ctx = crypto_tfm_ctx(req->tfm); - res = ctx->ops->process(req, status & mask); - if (res != -EINPROGRESS) { - spin_lock_bh(&engine->lock); - engine->req = NULL; - mv_cesa_dequeue_req_locked(engine); - spin_unlock_bh(&engine->lock); - ctx->ops->complete(req); - ctx->ops->cleanup(req); - local_bh_disable(); - req->complete(req, res); - local_bh_enable(); - } else { - ctx->ops->step(req); - } + + ctx = crypto_tfm_ctx(req->tfm); + + if (res && res != -EINPROGRESS) + mv_cesa_complete_req(ctx, req, res); + + /* Launch the next pending request */ + mv_cesa_rearm_engine(engine); + + /* Iterate over the complete queue */ + while (true) { + req = mv_cesa_engine_dequeue_complete_request(engine); + if (!req) + break; + + mv_cesa_complete_req(ctx, req, 0); } } @@ -116,16 +180,16 @@ int mv_cesa_queue_req(struct crypto_async_request *req, struct mv_cesa_engine *engine = creq->engine; spin_lock_bh(&engine->lock); + if (mv_cesa_req_get_type(creq) == CESA_DMA_REQ) + mv_cesa_tdma_chain(engine, creq); + ret = crypto_enqueue_request(&engine->queue, req); spin_unlock_bh(&engine->lock); if (ret != -EINPROGRESS) return ret; - spin_lock_bh(&engine->lock); - if (!engine->req) - mv_cesa_dequeue_req_locked(engine); - spin_unlock_bh(&engine->lock); + mv_cesa_rearm_engine(engine); return -EINPROGRESS; } @@ -496,6 +560,7 @@ static int mv_cesa_probe(struct platform_device *pdev) crypto_init_queue(&engine->queue, CESA_CRYPTO_DEFAULT_MAX_QLEN); atomic_set(&engine->load, 0); + INIT_LIST_HEAD(&engine->complete_queue); } cesa_dev = cesa; diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h index 8d600682ca470..e423d33decd44 100644 --- a/drivers/crypto/marvell/cesa.h +++ b/drivers/crypto/marvell/cesa.h @@ -271,7 +271,9 @@ struct mv_cesa_op_ctx { /* TDMA descriptor flags */ #define CESA_TDMA_DST_IN_SRAM BIT(31) #define CESA_TDMA_SRC_IN_SRAM BIT(30) -#define CESA_TDMA_TYPE_MSK GENMASK(29, 0) +#define CESA_TDMA_END_OF_REQ BIT(29) +#define CESA_TDMA_BREAK_CHAIN BIT(28) +#define CESA_TDMA_TYPE_MSK GENMASK(27, 0) #define CESA_TDMA_DUMMY 0 #define CESA_TDMA_DATA 1 #define CESA_TDMA_OP 2 @@ -431,6 +433,9 @@ struct mv_cesa_dev { * SRAM * @queue: fifo of the pending crypto requests * @load: engine load counter, useful for load balancing + * @chain: list of the current tdma descriptors being processed + * by this engine. + * @complete_queue: fifo of the processed requests by the engine * * Structure storing CESA engine information. */ @@ -448,6 +453,8 @@ struct mv_cesa_engine { struct gen_pool *pool; struct crypto_queue queue; atomic_t load; + struct mv_cesa_tdma_chain chain; + struct list_head complete_queue; }; /** @@ -608,6 +615,29 @@ struct mv_cesa_ahash_req { extern struct mv_cesa_dev *cesa_dev; + +static inline void +mv_cesa_engine_enqueue_complete_request(struct mv_cesa_engine *engine, + struct crypto_async_request *req) +{ + list_add_tail(&req->list, &engine->complete_queue); +} + +static inline struct crypto_async_request * +mv_cesa_engine_dequeue_complete_request(struct mv_cesa_engine *engine) +{ + struct crypto_async_request *req; + + req = list_first_entry_or_null(&engine->complete_queue, + struct crypto_async_request, + list); + if (req) + list_del(&req->list); + + return req; +} + + static inline enum mv_cesa_req_type mv_cesa_req_get_type(struct mv_cesa_req *req) { @@ -689,6 +719,10 @@ static inline bool mv_cesa_mac_op_is_first_frag(const struct mv_cesa_op_ctx *op) int mv_cesa_queue_req(struct crypto_async_request *req, struct mv_cesa_req *creq); +struct crypto_async_request * +mv_cesa_dequeue_req_locked(struct mv_cesa_engine *engine, + struct crypto_async_request **backlog); + static inline struct mv_cesa_engine *mv_cesa_select_engine(int weight) { int i; @@ -794,6 +828,9 @@ static inline int mv_cesa_dma_process(struct mv_cesa_req *dreq, void mv_cesa_dma_prepare(struct mv_cesa_req *dreq, struct mv_cesa_engine *engine); void mv_cesa_dma_cleanup(struct mv_cesa_req *dreq); +void mv_cesa_tdma_chain(struct mv_cesa_engine *engine, + struct mv_cesa_req *dreq); +int mv_cesa_tdma_process(struct mv_cesa_engine *engine, u32 status); static inline void diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c index 3eb607cda2691..48df03a06066e 100644 --- a/drivers/crypto/marvell/cipher.c +++ b/drivers/crypto/marvell/cipher.c @@ -390,6 +390,7 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req, goto err_free_tdma; basereq->chain = chain; + basereq->chain.last->flags |= CESA_TDMA_END_OF_REQ; return 0; @@ -447,7 +448,6 @@ static int mv_cesa_ablkcipher_req_init(struct ablkcipher_request *req, mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_OP_CRYPT_ONLY, CESA_SA_DESC_CFG_OP_MSK); - /* TODO: add a threshold for DMA usage */ if (cesa_dev->caps->has_tdma) ret = mv_cesa_ablkcipher_dma_req_init(req, tmpl); else diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c index 638b2b7eae997..c35912b4fffb6 100644 --- a/drivers/crypto/marvell/hash.c +++ b/drivers/crypto/marvell/hash.c @@ -172,6 +172,9 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req) for (i = 0; i < digsize / 4; i++) writel_relaxed(creq->state[i], engine->regs + CESA_IVDIG(i)); + mv_cesa_adjust_op(engine, &creq->op_tmpl); + memcpy_toio(engine->sram, &creq->op_tmpl, sizeof(creq->op_tmpl)); + if (creq->cache_ptr) memcpy_toio(engine->sram + CESA_SA_DATA_SRAM_OFFSET, creq->cache, creq->cache_ptr); @@ -647,6 +650,9 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) else creq->cache_ptr = 0; + basereq->chain.last->flags |= (CESA_TDMA_END_OF_REQ | + CESA_TDMA_BREAK_CHAIN); + return 0; err_free_tdma: diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c index f9b8d2cfa1bce..b0f9f5050dc8f 100644 --- a/drivers/crypto/marvell/tdma.c +++ b/drivers/crypto/marvell/tdma.c @@ -99,6 +99,92 @@ void mv_cesa_dma_prepare(struct mv_cesa_req *dreq, } } +void mv_cesa_tdma_chain(struct mv_cesa_engine *engine, + struct mv_cesa_req *dreq) +{ + if (engine->chain.first == NULL && engine->chain.last == NULL) { + engine->chain.first = dreq->chain.first; + engine->chain.last = dreq->chain.last; + } else { + struct mv_cesa_tdma_desc *last; + + last = engine->chain.last; + last->next = dreq->chain.first; + engine->chain.last = dreq->chain.last; + + if (!(last->flags & CESA_TDMA_BREAK_CHAIN)) + last->next_dma = dreq->chain.first->cur_dma; + } +} + +int mv_cesa_tdma_process(struct mv_cesa_engine *engine, u32 status) +{ + struct crypto_async_request *req = NULL; + struct mv_cesa_tdma_desc *tdma = NULL, *next = NULL; + dma_addr_t tdma_cur; + int res = 0; + + tdma_cur = readl(engine->regs + CESA_TDMA_CUR); + + for (tdma = engine->chain.first; tdma; tdma = next) { + spin_lock_bh(&engine->lock); + next = tdma->next; + spin_unlock_bh(&engine->lock); + + if (tdma->flags & CESA_TDMA_END_OF_REQ) { + struct crypto_async_request *backlog = NULL; + struct mv_cesa_ctx *ctx; + u32 current_status; + + spin_lock_bh(&engine->lock); + /* + * if req is NULL, this means we're processing the + * request in engine->req. + */ + if (!req) + req = engine->req; + else + req = mv_cesa_dequeue_req_locked(engine, + &backlog); + + /* Re-chaining to the next request */ + engine->chain.first = tdma->next; + tdma->next = NULL; + + /* If this is the last request, clear the chain */ + if (engine->chain.first == NULL) + engine->chain.last = NULL; + spin_unlock_bh(&engine->lock); + + ctx = crypto_tfm_ctx(req->tfm); + current_status = (tdma->cur_dma == tdma_cur) ? + status : CESA_SA_INT_ACC0_IDMA_DONE; + res = ctx->ops->process(req, current_status); + ctx->ops->complete(req); + + if (res == 0) + mv_cesa_engine_enqueue_complete_request(engine, + req); + + if (backlog) + backlog->complete(backlog, -EINPROGRESS); + } + + if (res || tdma->cur_dma == tdma_cur) + break; + } + + /* Save the last request in error to engine->req, so that the core + * knows which request was fautly */ + if (res) { + spin_lock_bh(&engine->lock); + engine->req = req; + spin_unlock_bh(&engine->lock); + } + + return res; +} + static struct mv_cesa_tdma_desc * mv_cesa_dma_add_desc(struct mv_cesa_tdma_chain *chain, gfp_t flags) { From 47a1f0b2d1d11350ffecbfebc4105116bf6c133e Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 21 Jun 2016 10:08:40 +0200 Subject: [PATCH 070/180] crypto: marvell - Increase the size of the crypto queue Now that crypto requests are chained together at the DMA level, we increase the size of the crypto queue for each engine. The result is that as the backlog list is reached later, it does not stop the crypto stack from sending asychronous requests, so more cryptographic tasks are processed by the engines. Signed-off-by: Romain Perier Acked-by: Boris Brezillon Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cesa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c index 218c497f2ddbd..e373cc6557c6a 100644 --- a/drivers/crypto/marvell/cesa.c +++ b/drivers/crypto/marvell/cesa.c @@ -32,7 +32,7 @@ #include "cesa.h" /* Limit of the crypto queue before reaching the backlog */ -#define CESA_CRYPTO_DEFAULT_MAX_QLEN 50 +#define CESA_CRYPTO_DEFAULT_MAX_QLEN 128 static int allhwsupport = !IS_ENABLED(CONFIG_CRYPTO_DEV_MV_CESA); module_param_named(allhwsupport, allhwsupport, int, 0444); From 81760ea6a95ad4c41273a71052f61b9f087b5753 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 21 Jun 2016 16:55:13 +0800 Subject: [PATCH 071/180] crypto: cryptd - Add helpers to check whether a tfm is queued This patch adds helpers to check whether a given tfm is currently queued. This is meant to be used by ablk_helper and similar entities to ensure that no reordering is introduced because of requests queued in cryptd with respect to requests being processed in softirq context. The per-cpu queue length limit is also increased to 1000 in line with network limits. Signed-off-by: Herbert Xu --- crypto/cryptd.c | 132 ++++++++++++++++++++++++++++++++++------ include/crypto/cryptd.h | 5 ++ 2 files changed, 118 insertions(+), 19 deletions(-) diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 7921251cdb13d..cf8037a87b2d1 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -31,7 +32,7 @@ #include #include -#define CRYPTD_MAX_CPU_QLEN 100 +#define CRYPTD_MAX_CPU_QLEN 1000 struct cryptd_cpu_queue { struct crypto_queue queue; @@ -58,6 +59,7 @@ struct aead_instance_ctx { }; struct cryptd_blkcipher_ctx { + atomic_t refcnt; struct crypto_blkcipher *child; }; @@ -66,6 +68,7 @@ struct cryptd_blkcipher_request_ctx { }; struct cryptd_hash_ctx { + atomic_t refcnt; struct crypto_shash *child; }; @@ -75,6 +78,7 @@ struct cryptd_hash_request_ctx { }; struct cryptd_aead_ctx { + atomic_t refcnt; struct crypto_aead *child; }; @@ -118,11 +122,29 @@ static int cryptd_enqueue_request(struct cryptd_queue *queue, { int cpu, err; struct cryptd_cpu_queue *cpu_queue; + struct crypto_tfm *tfm; + atomic_t *refcnt; + bool may_backlog; cpu = get_cpu(); cpu_queue = this_cpu_ptr(queue->cpu_queue); err = crypto_enqueue_request(&cpu_queue->queue, request); + + refcnt = crypto_tfm_ctx(request->tfm); + may_backlog = request->flags & CRYPTO_TFM_REQ_MAY_BACKLOG; + + if (err == -EBUSY && !may_backlog) + goto out_put_cpu; + queue_work_on(cpu, kcrypto_wq, &cpu_queue->work); + + if (!atomic_read(refcnt)) + goto out_put_cpu; + + tfm = request->tfm; + atomic_inc(refcnt); + +out_put_cpu: put_cpu(); return err; @@ -206,7 +228,10 @@ static void cryptd_blkcipher_crypt(struct ablkcipher_request *req, unsigned int len)) { struct cryptd_blkcipher_request_ctx *rctx; + struct cryptd_blkcipher_ctx *ctx; + struct crypto_ablkcipher *tfm; struct blkcipher_desc desc; + int refcnt; rctx = ablkcipher_request_ctx(req); @@ -222,9 +247,16 @@ static void cryptd_blkcipher_crypt(struct ablkcipher_request *req, req->base.complete = rctx->complete; out: + tfm = crypto_ablkcipher_reqtfm(req); + ctx = crypto_ablkcipher_ctx(tfm); + refcnt = atomic_read(&ctx->refcnt); + local_bh_disable(); rctx->complete(&req->base, err); local_bh_enable(); + + if (err != -EINPROGRESS && refcnt && atomic_dec_and_test(&ctx->refcnt)) + crypto_free_ablkcipher(tfm); } static void cryptd_blkcipher_encrypt(struct crypto_async_request *req, int err) @@ -456,6 +488,21 @@ static int cryptd_hash_enqueue(struct ahash_request *req, return cryptd_enqueue_request(queue, &req->base); } +static void cryptd_hash_complete(struct ahash_request *req, int err) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + int refcnt = atomic_read(&ctx->refcnt); + + local_bh_disable(); + rctx->complete(&req->base, err); + local_bh_enable(); + + if (err != -EINPROGRESS && refcnt && atomic_dec_and_test(&ctx->refcnt)) + crypto_free_ahash(tfm); +} + static void cryptd_hash_init(struct crypto_async_request *req_async, int err) { struct cryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm); @@ -475,9 +522,7 @@ static void cryptd_hash_init(struct crypto_async_request *req_async, int err) req->base.complete = rctx->complete; out: - local_bh_disable(); - rctx->complete(&req->base, err); - local_bh_enable(); + cryptd_hash_complete(req, err); } static int cryptd_hash_init_enqueue(struct ahash_request *req) @@ -500,9 +545,7 @@ static void cryptd_hash_update(struct crypto_async_request *req_async, int err) req->base.complete = rctx->complete; out: - local_bh_disable(); - rctx->complete(&req->base, err); - local_bh_enable(); + cryptd_hash_complete(req, err); } static int cryptd_hash_update_enqueue(struct ahash_request *req) @@ -523,9 +566,7 @@ static void cryptd_hash_final(struct crypto_async_request *req_async, int err) req->base.complete = rctx->complete; out: - local_bh_disable(); - rctx->complete(&req->base, err); - local_bh_enable(); + cryptd_hash_complete(req, err); } static int cryptd_hash_final_enqueue(struct ahash_request *req) @@ -546,9 +587,7 @@ static void cryptd_hash_finup(struct crypto_async_request *req_async, int err) req->base.complete = rctx->complete; out: - local_bh_disable(); - rctx->complete(&req->base, err); - local_bh_enable(); + cryptd_hash_complete(req, err); } static int cryptd_hash_finup_enqueue(struct ahash_request *req) @@ -575,9 +614,7 @@ static void cryptd_hash_digest(struct crypto_async_request *req_async, int err) req->base.complete = rctx->complete; out: - local_bh_disable(); - rctx->complete(&req->base, err); - local_bh_enable(); + cryptd_hash_complete(req, err); } static int cryptd_hash_digest_enqueue(struct ahash_request *req) @@ -688,7 +725,10 @@ static void cryptd_aead_crypt(struct aead_request *req, int (*crypt)(struct aead_request *req)) { struct cryptd_aead_request_ctx *rctx; + struct cryptd_aead_ctx *ctx; crypto_completion_t compl; + struct crypto_aead *tfm; + int refcnt; rctx = aead_request_ctx(req); compl = rctx->complete; @@ -697,10 +737,18 @@ static void cryptd_aead_crypt(struct aead_request *req, goto out; aead_request_set_tfm(req, child); err = crypt( req ); + out: + tfm = crypto_aead_reqtfm(req); + ctx = crypto_aead_ctx(tfm); + refcnt = atomic_read(&ctx->refcnt); + local_bh_disable(); compl(&req->base, err); local_bh_enable(); + + if (err != -EINPROGRESS && refcnt && atomic_dec_and_test(&ctx->refcnt)) + crypto_free_aead(tfm); } static void cryptd_aead_encrypt(struct crypto_async_request *areq, int err) @@ -883,6 +931,7 @@ struct cryptd_ablkcipher *cryptd_alloc_ablkcipher(const char *alg_name, u32 type, u32 mask) { char cryptd_alg_name[CRYPTO_MAX_ALG_NAME]; + struct cryptd_blkcipher_ctx *ctx; struct crypto_tfm *tfm; if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME, @@ -899,6 +948,9 @@ struct cryptd_ablkcipher *cryptd_alloc_ablkcipher(const char *alg_name, return ERR_PTR(-EINVAL); } + ctx = crypto_tfm_ctx(tfm); + atomic_set(&ctx->refcnt, 1); + return __cryptd_ablkcipher_cast(__crypto_ablkcipher_cast(tfm)); } EXPORT_SYMBOL_GPL(cryptd_alloc_ablkcipher); @@ -910,9 +962,20 @@ struct crypto_blkcipher *cryptd_ablkcipher_child(struct cryptd_ablkcipher *tfm) } EXPORT_SYMBOL_GPL(cryptd_ablkcipher_child); +bool cryptd_ablkcipher_queued(struct cryptd_ablkcipher *tfm) +{ + struct cryptd_blkcipher_ctx *ctx = crypto_ablkcipher_ctx(&tfm->base); + + return atomic_read(&ctx->refcnt) - 1; +} +EXPORT_SYMBOL_GPL(cryptd_ablkcipher_queued); + void cryptd_free_ablkcipher(struct cryptd_ablkcipher *tfm) { - crypto_free_ablkcipher(&tfm->base); + struct cryptd_blkcipher_ctx *ctx = crypto_ablkcipher_ctx(&tfm->base); + + if (atomic_dec_and_test(&ctx->refcnt)) + crypto_free_ablkcipher(&tfm->base); } EXPORT_SYMBOL_GPL(cryptd_free_ablkcipher); @@ -920,6 +983,7 @@ struct cryptd_ahash *cryptd_alloc_ahash(const char *alg_name, u32 type, u32 mask) { char cryptd_alg_name[CRYPTO_MAX_ALG_NAME]; + struct cryptd_hash_ctx *ctx; struct crypto_ahash *tfm; if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME, @@ -933,6 +997,9 @@ struct cryptd_ahash *cryptd_alloc_ahash(const char *alg_name, return ERR_PTR(-EINVAL); } + ctx = crypto_ahash_ctx(tfm); + atomic_set(&ctx->refcnt, 1); + return __cryptd_ahash_cast(tfm); } EXPORT_SYMBOL_GPL(cryptd_alloc_ahash); @@ -952,9 +1019,20 @@ struct shash_desc *cryptd_shash_desc(struct ahash_request *req) } EXPORT_SYMBOL_GPL(cryptd_shash_desc); +bool cryptd_ahash_queued(struct cryptd_ahash *tfm) +{ + struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base); + + return atomic_read(&ctx->refcnt) - 1; +} +EXPORT_SYMBOL_GPL(cryptd_ahash_queued); + void cryptd_free_ahash(struct cryptd_ahash *tfm) { - crypto_free_ahash(&tfm->base); + struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base); + + if (atomic_dec_and_test(&ctx->refcnt)) + crypto_free_ahash(&tfm->base); } EXPORT_SYMBOL_GPL(cryptd_free_ahash); @@ -962,6 +1040,7 @@ struct cryptd_aead *cryptd_alloc_aead(const char *alg_name, u32 type, u32 mask) { char cryptd_alg_name[CRYPTO_MAX_ALG_NAME]; + struct cryptd_aead_ctx *ctx; struct crypto_aead *tfm; if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME, @@ -974,6 +1053,10 @@ struct cryptd_aead *cryptd_alloc_aead(const char *alg_name, crypto_free_aead(tfm); return ERR_PTR(-EINVAL); } + + ctx = crypto_aead_ctx(tfm); + atomic_set(&ctx->refcnt, 1); + return __cryptd_aead_cast(tfm); } EXPORT_SYMBOL_GPL(cryptd_alloc_aead); @@ -986,9 +1069,20 @@ struct crypto_aead *cryptd_aead_child(struct cryptd_aead *tfm) } EXPORT_SYMBOL_GPL(cryptd_aead_child); +bool cryptd_aead_queued(struct cryptd_aead *tfm) +{ + struct cryptd_aead_ctx *ctx = crypto_aead_ctx(&tfm->base); + + return atomic_read(&ctx->refcnt) - 1; +} +EXPORT_SYMBOL_GPL(cryptd_aead_queued); + void cryptd_free_aead(struct cryptd_aead *tfm) { - crypto_free_aead(&tfm->base); + struct cryptd_aead_ctx *ctx = crypto_aead_ctx(&tfm->base); + + if (atomic_dec_and_test(&ctx->refcnt)) + crypto_free_aead(&tfm->base); } EXPORT_SYMBOL_GPL(cryptd_free_aead); diff --git a/include/crypto/cryptd.h b/include/crypto/cryptd.h index 1547f540c920b..bc792d5a9e88a 100644 --- a/include/crypto/cryptd.h +++ b/include/crypto/cryptd.h @@ -31,6 +31,7 @@ static inline struct cryptd_ablkcipher *__cryptd_ablkcipher_cast( struct cryptd_ablkcipher *cryptd_alloc_ablkcipher(const char *alg_name, u32 type, u32 mask); struct crypto_blkcipher *cryptd_ablkcipher_child(struct cryptd_ablkcipher *tfm); +bool cryptd_ablkcipher_queued(struct cryptd_ablkcipher *tfm); void cryptd_free_ablkcipher(struct cryptd_ablkcipher *tfm); struct cryptd_ahash { @@ -48,6 +49,8 @@ struct cryptd_ahash *cryptd_alloc_ahash(const char *alg_name, u32 type, u32 mask); struct crypto_shash *cryptd_ahash_child(struct cryptd_ahash *tfm); struct shash_desc *cryptd_shash_desc(struct ahash_request *req); +/* Must be called without moving CPUs. */ +bool cryptd_ahash_queued(struct cryptd_ahash *tfm); void cryptd_free_ahash(struct cryptd_ahash *tfm); struct cryptd_aead { @@ -64,6 +67,8 @@ struct cryptd_aead *cryptd_alloc_aead(const char *alg_name, u32 type, u32 mask); struct crypto_aead *cryptd_aead_child(struct cryptd_aead *tfm); +/* Must be called without moving CPUs. */ +bool cryptd_aead_queued(struct cryptd_aead *tfm); void cryptd_free_aead(struct cryptd_aead *tfm); From 38b2f68b426429c06cdf2ae5c8a89371524db203 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 21 Jun 2016 16:55:14 +0800 Subject: [PATCH 072/180] crypto: aesni - Fix cryptd reordering problem on gcm This patch fixes an old bug where gcm requests can be reordered because some are processed by cryptd while others are processed directly in softirq context. The fix is to always postpone to cryptd if there are currently requests outstanding from the same tfm. Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 5b7fa14710073..9e15572ef06d6 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -1098,9 +1098,12 @@ static int rfc4106_encrypt(struct aead_request *req) struct cryptd_aead **ctx = crypto_aead_ctx(tfm); struct cryptd_aead *cryptd_tfm = *ctx; - aead_request_set_tfm(req, irq_fpu_usable() ? - cryptd_aead_child(cryptd_tfm) : - &cryptd_tfm->base); + tfm = &cryptd_tfm->base; + if (irq_fpu_usable() && (!in_atomic() || + !cryptd_aead_queued(cryptd_tfm))) + tfm = cryptd_aead_child(cryptd_tfm); + + aead_request_set_tfm(req, tfm); return crypto_aead_encrypt(req); } @@ -1111,9 +1114,12 @@ static int rfc4106_decrypt(struct aead_request *req) struct cryptd_aead **ctx = crypto_aead_ctx(tfm); struct cryptd_aead *cryptd_tfm = *ctx; - aead_request_set_tfm(req, irq_fpu_usable() ? - cryptd_aead_child(cryptd_tfm) : - &cryptd_tfm->base); + tfm = &cryptd_tfm->base; + if (irq_fpu_usable() && (!in_atomic() || + !cryptd_aead_queued(cryptd_tfm))) + tfm = cryptd_aead_child(cryptd_tfm); + + aead_request_set_tfm(req, tfm); return crypto_aead_decrypt(req); } From 88407a39b5169889fe95a9355891b98437aa3e50 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 21 Jun 2016 16:55:15 +0800 Subject: [PATCH 073/180] crypto: ablk_helper - Fix cryptd reordering This patch fixes an old bug where requests can be reordered because some are processed by cryptd while others are processed directly in softirq context. The fix is to always postpone to cryptd if there are currently requests outstanding from the same tfm. Signed-off-by: Herbert Xu --- crypto/ablk_helper.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crypto/ablk_helper.c b/crypto/ablk_helper.c index e1fcf53bb931f..1441f07d0a199 100644 --- a/crypto/ablk_helper.c +++ b/crypto/ablk_helper.c @@ -71,7 +71,8 @@ int ablk_encrypt(struct ablkcipher_request *req) struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); - if (!may_use_simd()) { + if (!may_use_simd() || + (in_atomic() && cryptd_ablkcipher_queued(ctx->cryptd_tfm))) { struct ablkcipher_request *cryptd_req = ablkcipher_request_ctx(req); @@ -90,7 +91,8 @@ int ablk_decrypt(struct ablkcipher_request *req) struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); - if (!may_use_simd()) { + if (!may_use_simd() || + (in_atomic() && cryptd_ablkcipher_queued(ctx->cryptd_tfm))) { struct ablkcipher_request *cryptd_req = ablkcipher_request_ctx(req); From 7271b33cb87e80f3a416fb031ad3ca87f0bea80a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 21 Jun 2016 16:55:16 +0800 Subject: [PATCH 074/180] crypto: ghash-clmulni - Fix cryptd reordering This patch fixes an old bug where requests can be reordered because some are processed by cryptd while others are processed directly in softirq context. The fix is to always postpone to cryptd if there are currently requests outstanding from the same tfm. This patch also removes the redundant use of cryptd in the async init function as init never touches the FPU. Signed-off-by: Herbert Xu --- arch/x86/crypto/ghash-clmulni-intel_glue.c | 40 +++++++++------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index a69321a777839..0420bab19efbb 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -168,30 +168,23 @@ static int ghash_async_init(struct ahash_request *req) struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); struct ahash_request *cryptd_req = ahash_request_ctx(req); struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); - if (!irq_fpu_usable()) { - memcpy(cryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); - return crypto_ahash_init(cryptd_req); - } else { - struct shash_desc *desc = cryptd_shash_desc(cryptd_req); - struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); - - desc->tfm = child; - desc->flags = req->base.flags; - return crypto_shash_init(desc); - } + desc->tfm = child; + desc->flags = req->base.flags; + return crypto_shash_init(desc); } static int ghash_async_update(struct ahash_request *req) { struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - if (!irq_fpu_usable()) { - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); - struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - + if (!irq_fpu_usable() || + (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { memcpy(cryptd_req, req, sizeof(*req)); ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); return crypto_ahash_update(cryptd_req); @@ -204,12 +197,12 @@ static int ghash_async_update(struct ahash_request *req) static int ghash_async_final(struct ahash_request *req) { struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - if (!irq_fpu_usable()) { - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); - struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - + if (!irq_fpu_usable() || + (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { memcpy(cryptd_req, req, sizeof(*req)); ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); return crypto_ahash_final(cryptd_req); @@ -249,7 +242,8 @@ static int ghash_async_digest(struct ahash_request *req) struct ahash_request *cryptd_req = ahash_request_ctx(req); struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - if (!irq_fpu_usable()) { + if (!irq_fpu_usable() || + (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { memcpy(cryptd_req, req, sizeof(*req)); ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); return crypto_ahash_digest(cryptd_req); From 820573ebd60d85afb8bb07fa3547ebbf842c59d4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 21 Jun 2016 16:55:17 +0800 Subject: [PATCH 075/180] crypto: ghash-ce - Fix cryptd reordering This patch fixes an old bug where requests can be reordered because some are processed by cryptd while others are processed directly in softirq context. The fix is to always postpone to cryptd if there are currently requests outstanding from the same tfm. This patch also removes the redundant use of cryptd in the async init function as init never touches the FPU. Signed-off-by: Herbert Xu --- arch/arm/crypto/ghash-ce-glue.c | 40 ++++++++++++++------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c index 03a39fe292464..1568cb5cd8705 100644 --- a/arch/arm/crypto/ghash-ce-glue.c +++ b/arch/arm/crypto/ghash-ce-glue.c @@ -154,30 +154,23 @@ static int ghash_async_init(struct ahash_request *req) struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); struct ahash_request *cryptd_req = ahash_request_ctx(req); struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); - if (!may_use_simd()) { - memcpy(cryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); - return crypto_ahash_init(cryptd_req); - } else { - struct shash_desc *desc = cryptd_shash_desc(cryptd_req); - struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); - - desc->tfm = child; - desc->flags = req->base.flags; - return crypto_shash_init(desc); - } + desc->tfm = child; + desc->flags = req->base.flags; + return crypto_shash_init(desc); } static int ghash_async_update(struct ahash_request *req) { struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - if (!may_use_simd()) { - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); - struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - + if (!may_use_simd() || + (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { memcpy(cryptd_req, req, sizeof(*req)); ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); return crypto_ahash_update(cryptd_req); @@ -190,12 +183,12 @@ static int ghash_async_update(struct ahash_request *req) static int ghash_async_final(struct ahash_request *req) { struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - if (!may_use_simd()) { - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); - struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - + if (!may_use_simd() || + (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { memcpy(cryptd_req, req, sizeof(*req)); ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); return crypto_ahash_final(cryptd_req); @@ -212,7 +205,8 @@ static int ghash_async_digest(struct ahash_request *req) struct ahash_request *cryptd_req = ahash_request_ctx(req); struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - if (!may_use_simd()) { + if (!may_use_simd() || + (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { memcpy(cryptd_req, req, sizeof(*req)); ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); return crypto_ahash_digest(cryptd_req); From 331bf739c4f9992a73547d20bd8f2378b97d386a Mon Sep 17 00:00:00 2001 From: Megha Dey Date: Tue, 21 Jun 2016 18:21:46 -0700 Subject: [PATCH 076/180] crypto: sha1-mb - async implementation for sha1-mb Herbert wants the sha1-mb algorithm to have an async implementation: https://lkml.org/lkml/2016/4/5/286. Currently, sha1-mb uses an async interface for the outer algorithm and a sync interface for the inner algorithm. This patch introduces a async interface for even the inner algorithm. Signed-off-by: Megha Dey Signed-off-by: Tim Chen Signed-off-by: Herbert Xu --- arch/x86/crypto/sha-mb/sha1_mb.c | 182 +++++++++++++++++-------------- crypto/mcryptd.c | 132 ++++++++++------------ include/crypto/internal/hash.h | 12 +- include/crypto/mcryptd.h | 8 +- 4 files changed, 165 insertions(+), 169 deletions(-) diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c index 0a464919542ca..669cc37268e1c 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb.c +++ b/arch/x86/crypto/sha-mb/sha1_mb.c @@ -80,10 +80,10 @@ struct sha1_mb_ctx { static inline struct mcryptd_hash_request_ctx *cast_hash_to_mcryptd_ctx(struct sha1_hash_ctx *hash_ctx) { - struct shash_desc *desc; + struct ahash_request *areq; - desc = container_of((void *) hash_ctx, struct shash_desc, __ctx); - return container_of(desc, struct mcryptd_hash_request_ctx, desc); + areq = container_of((void *) hash_ctx, struct ahash_request, __ctx); + return container_of(areq, struct mcryptd_hash_request_ctx, areq); } static inline struct ahash_request @@ -93,7 +93,7 @@ static inline struct ahash_request } static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx, - struct shash_desc *desc) + struct ahash_request *areq) { rctx->flag = HASH_UPDATE; } @@ -375,9 +375,9 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_flush(struct sha1_ctx_mgr *mgr) } } -static int sha1_mb_init(struct shash_desc *desc) +static int sha1_mb_init(struct ahash_request *areq) { - struct sha1_hash_ctx *sctx = shash_desc_ctx(desc); + struct sha1_hash_ctx *sctx = ahash_request_ctx(areq); hash_ctx_init(sctx); sctx->job.result_digest[0] = SHA1_H0; @@ -395,7 +395,7 @@ static int sha1_mb_init(struct shash_desc *desc) static int sha1_mb_set_results(struct mcryptd_hash_request_ctx *rctx) { int i; - struct sha1_hash_ctx *sctx = shash_desc_ctx(&rctx->desc); + struct sha1_hash_ctx *sctx = ahash_request_ctx(&rctx->areq); __be32 *dst = (__be32 *) rctx->out; for (i = 0; i < 5; ++i) @@ -427,7 +427,7 @@ static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx, } sha_ctx = (struct sha1_hash_ctx *) - shash_desc_ctx(&rctx->desc); + ahash_request_ctx(&rctx->areq); kernel_fpu_begin(); sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, nbytes, flag); @@ -519,11 +519,10 @@ static void sha1_mb_add_list(struct mcryptd_hash_request_ctx *rctx, mcryptd_arm_flusher(cstate, delay); } -static int sha1_mb_update(struct shash_desc *desc, const u8 *data, - unsigned int len) +static int sha1_mb_update(struct ahash_request *areq) { struct mcryptd_hash_request_ctx *rctx = - container_of(desc, struct mcryptd_hash_request_ctx, desc); + container_of(areq, struct mcryptd_hash_request_ctx, areq); struct mcryptd_alg_cstate *cstate = this_cpu_ptr(sha1_mb_alg_state.alg_cstate); @@ -539,7 +538,7 @@ static int sha1_mb_update(struct shash_desc *desc, const u8 *data, } /* need to init context */ - req_ctx_init(rctx, desc); + req_ctx_init(rctx, areq); nbytes = crypto_ahash_walk_first(req, &rctx->walk); @@ -552,7 +551,7 @@ static int sha1_mb_update(struct shash_desc *desc, const u8 *data, rctx->flag |= HASH_DONE; /* submit */ - sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(desc); + sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq); sha1_mb_add_list(rctx, cstate); kernel_fpu_begin(); sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, @@ -579,11 +578,10 @@ static int sha1_mb_update(struct shash_desc *desc, const u8 *data, return ret; } -static int sha1_mb_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) +static int sha1_mb_finup(struct ahash_request *areq) { struct mcryptd_hash_request_ctx *rctx = - container_of(desc, struct mcryptd_hash_request_ctx, desc); + container_of(areq, struct mcryptd_hash_request_ctx, areq); struct mcryptd_alg_cstate *cstate = this_cpu_ptr(sha1_mb_alg_state.alg_cstate); @@ -598,7 +596,7 @@ static int sha1_mb_finup(struct shash_desc *desc, const u8 *data, } /* need to init context */ - req_ctx_init(rctx, desc); + req_ctx_init(rctx, areq); nbytes = crypto_ahash_walk_first(req, &rctx->walk); @@ -611,11 +609,10 @@ static int sha1_mb_finup(struct shash_desc *desc, const u8 *data, rctx->flag |= HASH_DONE; flag = HASH_LAST; } - rctx->out = out; /* submit */ rctx->flag |= HASH_FINAL; - sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(desc); + sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq); sha1_mb_add_list(rctx, cstate); kernel_fpu_begin(); @@ -641,10 +638,10 @@ static int sha1_mb_finup(struct shash_desc *desc, const u8 *data, return ret; } -static int sha1_mb_final(struct shash_desc *desc, u8 *out) +static int sha1_mb_final(struct ahash_request *areq) { struct mcryptd_hash_request_ctx *rctx = - container_of(desc, struct mcryptd_hash_request_ctx, desc); + container_of(areq, struct mcryptd_hash_request_ctx, areq); struct mcryptd_alg_cstate *cstate = this_cpu_ptr(sha1_mb_alg_state.alg_cstate); @@ -659,12 +656,11 @@ static int sha1_mb_final(struct shash_desc *desc, u8 *out) } /* need to init context */ - req_ctx_init(rctx, desc); + req_ctx_init(rctx, areq); - rctx->out = out; rctx->flag |= HASH_DONE | HASH_FINAL; - sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(desc); + sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq); /* flag HASH_FINAL and 0 data size */ sha1_mb_add_list(rctx, cstate); kernel_fpu_begin(); @@ -691,48 +687,98 @@ static int sha1_mb_final(struct shash_desc *desc, u8 *out) return ret; } -static int sha1_mb_export(struct shash_desc *desc, void *out) +static int sha1_mb_export(struct ahash_request *areq, void *out) { - struct sha1_hash_ctx *sctx = shash_desc_ctx(desc); + struct sha1_hash_ctx *sctx = ahash_request_ctx(areq); memcpy(out, sctx, sizeof(*sctx)); return 0; } -static int sha1_mb_import(struct shash_desc *desc, const void *in) +static int sha1_mb_import(struct ahash_request *areq, const void *in) { - struct sha1_hash_ctx *sctx = shash_desc_ctx(desc); + struct sha1_hash_ctx *sctx = ahash_request_ctx(areq); memcpy(sctx, in, sizeof(*sctx)); return 0; } +static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm) +{ + struct mcryptd_ahash *mcryptd_tfm; + struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); + struct mcryptd_hash_ctx *mctx; -static struct shash_alg sha1_mb_shash_alg = { - .digestsize = SHA1_DIGEST_SIZE, + mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb", + CRYPTO_ALG_INTERNAL, + CRYPTO_ALG_INTERNAL); + if (IS_ERR(mcryptd_tfm)) + return PTR_ERR(mcryptd_tfm); + mctx = crypto_ahash_ctx(&mcryptd_tfm->base); + mctx->alg_state = &sha1_mb_alg_state; + ctx->mcryptd_tfm = mcryptd_tfm; + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct ahash_request) + + crypto_ahash_reqsize(&mcryptd_tfm->base)); + + return 0; +} + +static void sha1_mb_async_exit_tfm(struct crypto_tfm *tfm) +{ + struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); + + mcryptd_free_ahash(ctx->mcryptd_tfm); +} + +static int sha1_mb_areq_init_tfm(struct crypto_tfm *tfm) +{ + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct ahash_request) + + sizeof(struct sha1_hash_ctx)); + + return 0; +} + +static void sha1_mb_areq_exit_tfm(struct crypto_tfm *tfm) +{ + struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); + + mcryptd_free_ahash(ctx->mcryptd_tfm); +} + +static struct ahash_alg sha1_mb_areq_alg = { .init = sha1_mb_init, .update = sha1_mb_update, .final = sha1_mb_final, .finup = sha1_mb_finup, .export = sha1_mb_export, .import = sha1_mb_import, - .descsize = sizeof(struct sha1_hash_ctx), - .statesize = sizeof(struct sha1_hash_ctx), - .base = { - .cra_name = "__sha1-mb", - .cra_driver_name = "__intel_sha1-mb", - .cra_priority = 100, - /* - * use ASYNC flag as some buffers in multi-buffer - * algo may not have completed before hashing thread sleep - */ - .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_ASYNC | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SHA1_BLOCK_SIZE, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(sha1_mb_shash_alg.base.cra_list), + .halg = { + .digestsize = SHA1_DIGEST_SIZE, + .statesize = sizeof(struct sha1_hash_ctx), + .base = { + .cra_name = "__sha1-mb", + .cra_driver_name = "__intel_sha1-mb", + .cra_priority = 100, + /* + * use ASYNC flag as some buffers in multi-buffer + * algo may not have completed before hashing thread + * sleep + */ + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_INTERNAL, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT + (sha1_mb_areq_alg.halg.base.cra_list), + .cra_init = sha1_mb_areq_init_tfm, + .cra_exit = sha1_mb_areq_exit_tfm, + .cra_ctxsize = sizeof(struct sha1_hash_ctx), + } } }; @@ -817,46 +863,20 @@ static int sha1_mb_async_import(struct ahash_request *req, const void *in) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - struct crypto_shash *child = mcryptd_ahash_child(mcryptd_tfm); + struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm); struct mcryptd_hash_request_ctx *rctx; - struct shash_desc *desc; + struct ahash_request *areq; memcpy(mcryptd_req, req, sizeof(*req)); ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); rctx = ahash_request_ctx(mcryptd_req); - desc = &rctx->desc; - desc->tfm = child; - desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; - - return crypto_ahash_import(mcryptd_req, in); -} - -static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm) -{ - struct mcryptd_ahash *mcryptd_tfm; - struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); - struct mcryptd_hash_ctx *mctx; + areq = &rctx->areq; - mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb", - CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(mcryptd_tfm)) - return PTR_ERR(mcryptd_tfm); - mctx = crypto_ahash_ctx(&mcryptd_tfm->base); - mctx->alg_state = &sha1_mb_alg_state; - ctx->mcryptd_tfm = mcryptd_tfm; - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct ahash_request) + - crypto_ahash_reqsize(&mcryptd_tfm->base)); - - return 0; -} - -static void sha1_mb_async_exit_tfm(struct crypto_tfm *tfm) -{ - struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); + ahash_request_set_tfm(areq, child); + ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP, + rctx->complete, req); - mcryptd_free_ahash(ctx->mcryptd_tfm); + return crypto_ahash_import(mcryptd_req, in); } static struct ahash_alg sha1_mb_async_alg = { @@ -965,7 +985,7 @@ static int __init sha1_mb_mod_init(void) } sha1_mb_alg_state.flusher = &sha1_mb_flusher; - err = crypto_register_shash(&sha1_mb_shash_alg); + err = crypto_register_ahash(&sha1_mb_areq_alg); if (err) goto err2; err = crypto_register_ahash(&sha1_mb_async_alg); @@ -975,7 +995,7 @@ static int __init sha1_mb_mod_init(void) return 0; err1: - crypto_unregister_shash(&sha1_mb_shash_alg); + crypto_unregister_ahash(&sha1_mb_areq_alg); err2: for_each_possible_cpu(cpu) { cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu); @@ -991,7 +1011,7 @@ static void __exit sha1_mb_mod_fini(void) struct mcryptd_alg_cstate *cpu_state; crypto_unregister_ahash(&sha1_mb_async_alg); - crypto_unregister_shash(&sha1_mb_shash_alg); + crypto_unregister_ahash(&sha1_mb_areq_alg); for_each_possible_cpu(cpu) { cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu); kfree(cpu_state->mgr); diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c index c4eb9da49d4f5..86fb59b109a99 100644 --- a/crypto/mcryptd.c +++ b/crypto/mcryptd.c @@ -41,7 +41,7 @@ struct mcryptd_flush_list { static struct mcryptd_flush_list __percpu *mcryptd_flist; struct hashd_instance_ctx { - struct crypto_shash_spawn spawn; + struct crypto_ahash_spawn spawn; struct mcryptd_queue *queue; }; @@ -272,18 +272,18 @@ static int mcryptd_hash_init_tfm(struct crypto_tfm *tfm) { struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); struct hashd_instance_ctx *ictx = crypto_instance_ctx(inst); - struct crypto_shash_spawn *spawn = &ictx->spawn; + struct crypto_ahash_spawn *spawn = &ictx->spawn; struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm); - struct crypto_shash *hash; + struct crypto_ahash *hash; - hash = crypto_spawn_shash(spawn); + hash = crypto_spawn_ahash(spawn); if (IS_ERR(hash)) return PTR_ERR(hash); ctx->child = hash; crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), sizeof(struct mcryptd_hash_request_ctx) + - crypto_shash_descsize(hash)); + crypto_ahash_reqsize(hash)); return 0; } @@ -291,21 +291,21 @@ static void mcryptd_hash_exit_tfm(struct crypto_tfm *tfm) { struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm); - crypto_free_shash(ctx->child); + crypto_free_ahash(ctx->child); } static int mcryptd_hash_setkey(struct crypto_ahash *parent, const u8 *key, unsigned int keylen) { struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(parent); - struct crypto_shash *child = ctx->child; + struct crypto_ahash *child = ctx->child; int err; - crypto_shash_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_shash_set_flags(child, crypto_ahash_get_flags(parent) & + crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_ahash_set_flags(child, crypto_ahash_get_flags(parent) & CRYPTO_TFM_REQ_MASK); - err = crypto_shash_setkey(child, key, keylen); - crypto_ahash_set_flags(parent, crypto_shash_get_flags(child) & + err = crypto_ahash_setkey(child, key, keylen); + crypto_ahash_set_flags(parent, crypto_ahash_get_flags(child) & CRYPTO_TFM_RES_MASK); return err; } @@ -331,20 +331,20 @@ static int mcryptd_hash_enqueue(struct ahash_request *req, static void mcryptd_hash_init(struct crypto_async_request *req_async, int err) { struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm); - struct crypto_shash *child = ctx->child; + struct crypto_ahash *child = ctx->child; struct ahash_request *req = ahash_request_cast(req_async); struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); - struct shash_desc *desc = &rctx->desc; + struct ahash_request *desc = &rctx->areq; if (unlikely(err == -EINPROGRESS)) goto out; - desc->tfm = child; - desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; + ahash_request_set_tfm(desc, child); + ahash_request_set_callback(desc, CRYPTO_TFM_REQ_MAY_SLEEP, + rctx->complete, req_async); - err = crypto_shash_init(desc); - - req->base.complete = rctx->complete; + rctx->out = req->result; + err = crypto_ahash_init(desc); out: local_bh_disable(); @@ -365,7 +365,8 @@ static void mcryptd_hash_update(struct crypto_async_request *req_async, int err) if (unlikely(err == -EINPROGRESS)) goto out; - err = shash_ahash_mcryptd_update(req, &rctx->desc); + rctx->out = req->result; + err = ahash_mcryptd_update(&rctx->areq); if (err) { req->base.complete = rctx->complete; goto out; @@ -391,7 +392,8 @@ static void mcryptd_hash_final(struct crypto_async_request *req_async, int err) if (unlikely(err == -EINPROGRESS)) goto out; - err = shash_ahash_mcryptd_final(req, &rctx->desc); + rctx->out = req->result; + err = ahash_mcryptd_final(&rctx->areq); if (err) { req->base.complete = rctx->complete; goto out; @@ -416,8 +418,8 @@ static void mcryptd_hash_finup(struct crypto_async_request *req_async, int err) if (unlikely(err == -EINPROGRESS)) goto out; - - err = shash_ahash_mcryptd_finup(req, &rctx->desc); + rctx->out = req->result; + err = ahash_mcryptd_finup(&rctx->areq); if (err) { req->base.complete = rctx->complete; @@ -439,25 +441,21 @@ static int mcryptd_hash_finup_enqueue(struct ahash_request *req) static void mcryptd_hash_digest(struct crypto_async_request *req_async, int err) { struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm); - struct crypto_shash *child = ctx->child; + struct crypto_ahash *child = ctx->child; struct ahash_request *req = ahash_request_cast(req_async); struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); - struct shash_desc *desc = &rctx->desc; + struct ahash_request *desc = &rctx->areq; if (unlikely(err == -EINPROGRESS)) goto out; - desc->tfm = child; - desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; /* check this again */ - - err = shash_ahash_mcryptd_digest(req, desc); + ahash_request_set_tfm(desc, child); + ahash_request_set_callback(desc, CRYPTO_TFM_REQ_MAY_SLEEP, + rctx->complete, req_async); - if (err) { - req->base.complete = rctx->complete; - goto out; - } + rctx->out = req->result; + err = ahash_mcryptd_digest(desc); - return; out: local_bh_disable(); rctx->complete(&req->base, err); @@ -473,14 +471,14 @@ static int mcryptd_hash_export(struct ahash_request *req, void *out) { struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); - return crypto_shash_export(&rctx->desc, out); + return crypto_ahash_export(&rctx->areq, out); } static int mcryptd_hash_import(struct ahash_request *req, const void *in) { struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); - return crypto_shash_import(&rctx->desc, in); + return crypto_ahash_import(&rctx->areq, in); } static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, @@ -488,7 +486,7 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, { struct hashd_instance_ctx *ctx; struct ahash_instance *inst; - struct shash_alg *salg; + struct hash_alg_common *halg; struct crypto_alg *alg; u32 type = 0; u32 mask = 0; @@ -496,11 +494,11 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, mcryptd_check_internal(tb, &type, &mask); - salg = shash_attr_alg(tb[1], type, mask); - if (IS_ERR(salg)) - return PTR_ERR(salg); + halg = ahash_attr_alg(tb[1], type, mask); + if (IS_ERR(halg)) + return PTR_ERR(halg); - alg = &salg->base; + alg = &halg->base; pr_debug("crypto: mcryptd hash alg: %s\n", alg->cra_name); inst = mcryptd_alloc_instance(alg, ahash_instance_headroom(), sizeof(*ctx)); @@ -511,7 +509,7 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, ctx = ahash_instance_ctx(inst); ctx->queue = queue; - err = crypto_init_shash_spawn(&ctx->spawn, salg, + err = crypto_init_ahash_spawn(&ctx->spawn, halg, ahash_crypto_instance(inst)); if (err) goto out_free_inst; @@ -521,8 +519,8 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, type |= CRYPTO_ALG_INTERNAL; inst->alg.halg.base.cra_flags = type; - inst->alg.halg.digestsize = salg->digestsize; - inst->alg.halg.statesize = salg->statesize; + inst->alg.halg.digestsize = halg->digestsize; + inst->alg.halg.statesize = halg->statesize; inst->alg.halg.base.cra_ctxsize = sizeof(struct mcryptd_hash_ctx); inst->alg.halg.base.cra_init = mcryptd_hash_init_tfm; @@ -539,7 +537,7 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, err = ahash_register_instance(tmpl, inst); if (err) { - crypto_drop_shash(&ctx->spawn); + crypto_drop_ahash(&ctx->spawn); out_free_inst: kfree(inst); } @@ -575,7 +573,7 @@ static void mcryptd_free(struct crypto_instance *inst) switch (inst->alg.cra_flags & CRYPTO_ALG_TYPE_MASK) { case CRYPTO_ALG_TYPE_AHASH: - crypto_drop_shash(&hctx->spawn); + crypto_drop_ahash(&hctx->spawn); kfree(ahash_instance(inst)); return; default: @@ -612,55 +610,38 @@ struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name, } EXPORT_SYMBOL_GPL(mcryptd_alloc_ahash); -int shash_ahash_mcryptd_digest(struct ahash_request *req, - struct shash_desc *desc) +int ahash_mcryptd_digest(struct ahash_request *desc) { int err; - err = crypto_shash_init(desc) ?: - shash_ahash_mcryptd_finup(req, desc); + err = crypto_ahash_init(desc) ?: + ahash_mcryptd_finup(desc); return err; } -EXPORT_SYMBOL_GPL(shash_ahash_mcryptd_digest); -int shash_ahash_mcryptd_update(struct ahash_request *req, - struct shash_desc *desc) +int ahash_mcryptd_update(struct ahash_request *desc) { - struct crypto_shash *tfm = desc->tfm; - struct shash_alg *shash = crypto_shash_alg(tfm); - /* alignment is to be done by multi-buffer crypto algorithm if needed */ - return shash->update(desc, NULL, 0); + return crypto_ahash_update(desc); } -EXPORT_SYMBOL_GPL(shash_ahash_mcryptd_update); -int shash_ahash_mcryptd_finup(struct ahash_request *req, - struct shash_desc *desc) +int ahash_mcryptd_finup(struct ahash_request *desc) { - struct crypto_shash *tfm = desc->tfm; - struct shash_alg *shash = crypto_shash_alg(tfm); - /* alignment is to be done by multi-buffer crypto algorithm if needed */ - return shash->finup(desc, NULL, 0, req->result); + return crypto_ahash_finup(desc); } -EXPORT_SYMBOL_GPL(shash_ahash_mcryptd_finup); -int shash_ahash_mcryptd_final(struct ahash_request *req, - struct shash_desc *desc) +int ahash_mcryptd_final(struct ahash_request *desc) { - struct crypto_shash *tfm = desc->tfm; - struct shash_alg *shash = crypto_shash_alg(tfm); - /* alignment is to be done by multi-buffer crypto algorithm if needed */ - return shash->final(desc, req->result); + return crypto_ahash_final(desc); } -EXPORT_SYMBOL_GPL(shash_ahash_mcryptd_final); -struct crypto_shash *mcryptd_ahash_child(struct mcryptd_ahash *tfm) +struct crypto_ahash *mcryptd_ahash_child(struct mcryptd_ahash *tfm) { struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base); @@ -668,12 +649,12 @@ struct crypto_shash *mcryptd_ahash_child(struct mcryptd_ahash *tfm) } EXPORT_SYMBOL_GPL(mcryptd_ahash_child); -struct shash_desc *mcryptd_shash_desc(struct ahash_request *req) +struct ahash_request *mcryptd_ahash_desc(struct ahash_request *req) { struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); - return &rctx->desc; + return &rctx->areq; } -EXPORT_SYMBOL_GPL(mcryptd_shash_desc); +EXPORT_SYMBOL_GPL(mcryptd_ahash_desc); void mcryptd_free_ahash(struct mcryptd_ahash *tfm) { @@ -681,7 +662,6 @@ void mcryptd_free_ahash(struct mcryptd_ahash *tfm) } EXPORT_SYMBOL_GPL(mcryptd_free_ahash); - static int __init mcryptd_init(void) { int err, cpu; diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 49dae16f8929d..1d4f365d8f03a 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -114,14 +114,10 @@ int shash_ahash_update(struct ahash_request *req, struct shash_desc *desc); int shash_ahash_finup(struct ahash_request *req, struct shash_desc *desc); int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc); -int shash_ahash_mcryptd_update(struct ahash_request *req, - struct shash_desc *desc); -int shash_ahash_mcryptd_final(struct ahash_request *req, - struct shash_desc *desc); -int shash_ahash_mcryptd_finup(struct ahash_request *req, - struct shash_desc *desc); -int shash_ahash_mcryptd_digest(struct ahash_request *req, - struct shash_desc *desc); +int ahash_mcryptd_update(struct ahash_request *desc); +int ahash_mcryptd_final(struct ahash_request *desc); +int ahash_mcryptd_finup(struct ahash_request *desc); +int ahash_mcryptd_digest(struct ahash_request *desc); int crypto_init_shash_ops_async(struct crypto_tfm *tfm); diff --git a/include/crypto/mcryptd.h b/include/crypto/mcryptd.h index c23ee1f7ee806..4a53c0d38cd2c 100644 --- a/include/crypto/mcryptd.h +++ b/include/crypto/mcryptd.h @@ -39,7 +39,7 @@ struct mcryptd_instance_ctx { }; struct mcryptd_hash_ctx { - struct crypto_shash *child; + struct crypto_ahash *child; struct mcryptd_alg_state *alg_state; }; @@ -59,13 +59,13 @@ struct mcryptd_hash_request_ctx { struct crypto_hash_walk walk; u8 *out; int flag; - struct shash_desc desc; + struct ahash_request areq; }; struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name, u32 type, u32 mask); -struct crypto_shash *mcryptd_ahash_child(struct mcryptd_ahash *tfm); -struct shash_desc *mcryptd_shash_desc(struct ahash_request *req); +struct crypto_ahash *mcryptd_ahash_child(struct mcryptd_ahash *tfm); +struct ahash_request *mcryptd_ahash_desc(struct ahash_request *req); void mcryptd_free_ahash(struct mcryptd_ahash *tfm); void mcryptd_flusher(struct work_struct *work); From 4e5f2c400765e3a3ce512dc1ae890bac53401798 Mon Sep 17 00:00:00 2001 From: Salvatore Benedetto Date: Wed, 22 Jun 2016 17:49:13 +0100 Subject: [PATCH 077/180] crypto: kpp - Key-agreement Protocol Primitives API (KPP) Add key-agreement protocol primitives (kpp) API which allows to implement primitives required by protocols such as DH and ECDH. The API is composed mainly by the following functions * set_secret() - It allows the user to set his secret, also referred to as his private key, along with the parameters known to both parties involved in the key-agreement session. * generate_public_key() - It generates the public key to be sent to the other counterpart involved in the key-agreement session. The function has to be called after set_params() and set_secret() * generate_secret() - It generates the shared secret for the session Other functions such as init() and exit() are provided for allowing cryptographic hardware to be inizialized properly before use Signed-off-by: Salvatore Benedetto Signed-off-by: Herbert Xu --- crypto/Kconfig | 10 + crypto/Makefile | 1 + crypto/crypto_user.c | 20 ++ crypto/kpp.c | 123 ++++++++++++ include/crypto/internal/kpp.h | 64 +++++++ include/crypto/kpp.h | 328 ++++++++++++++++++++++++++++++++ include/linux/crypto.h | 1 + include/uapi/linux/cryptouser.h | 5 + 8 files changed, 552 insertions(+) create mode 100644 crypto/kpp.c create mode 100644 include/crypto/internal/kpp.h create mode 100644 include/crypto/kpp.h diff --git a/crypto/Kconfig b/crypto/Kconfig index 6881d1a5f8591..e72c4270173d5 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -93,6 +93,15 @@ config CRYPTO_AKCIPHER select CRYPTO_AKCIPHER2 select CRYPTO_ALGAPI +config CRYPTO_KPP2 + tristate + select CRYPTO_ALGAPI2 + +config CRYPTO_KPP + tristate + select CRYPTO_ALGAPI + select CRYPTO_KPP2 + config CRYPTO_RSA tristate "RSA algorithm" select CRYPTO_AKCIPHER @@ -115,6 +124,7 @@ config CRYPTO_MANAGER2 select CRYPTO_HASH2 select CRYPTO_BLKCIPHER2 select CRYPTO_AKCIPHER2 + select CRYPTO_KPP2 config CRYPTO_USER tristate "Userspace cryptographic algorithm configuration" diff --git a/crypto/Makefile b/crypto/Makefile index 0b82c47537435..07b0f51bd6456 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -30,6 +30,7 @@ crypto_hash-y += shash.o obj-$(CONFIG_CRYPTO_HASH2) += crypto_hash.o obj-$(CONFIG_CRYPTO_AKCIPHER2) += akcipher.o +obj-$(CONFIG_CRYPTO_KPP2) += kpp.o $(obj)/rsapubkey-asn1.o: $(obj)/rsapubkey-asn1.c $(obj)/rsapubkey-asn1.h $(obj)/rsaprivkey-asn1.o: $(obj)/rsaprivkey-asn1.c $(obj)/rsaprivkey-asn1.h diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 43fe85f20d577..d28513fb5a90b 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "internal.h" @@ -126,6 +127,21 @@ static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg) return -EMSGSIZE; } +static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg) +{ + struct crypto_report_kpp rkpp; + + strncpy(rkpp.type, "kpp", sizeof(rkpp.type)); + + if (nla_put(skb, CRYPTOCFGA_REPORT_KPP, + sizeof(struct crypto_report_kpp), &rkpp)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + static int crypto_report_one(struct crypto_alg *alg, struct crypto_user_alg *ualg, struct sk_buff *skb) { @@ -176,6 +192,10 @@ static int crypto_report_one(struct crypto_alg *alg, goto nla_put_failure; break; + case CRYPTO_ALG_TYPE_KPP: + if (crypto_report_kpp(skb, alg)) + goto nla_put_failure; + break; } out: diff --git a/crypto/kpp.c b/crypto/kpp.c new file mode 100644 index 0000000000000..d36ce05eee438 --- /dev/null +++ b/crypto/kpp.c @@ -0,0 +1,123 @@ +/* + * Key-agreement Protocol Primitives (KPP) + * + * Copyright (c) 2016, Intel Corporation + * Authors: Salvatore Benedetto + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +#ifdef CONFIG_NET +static int crypto_kpp_report(struct sk_buff *skb, struct crypto_alg *alg) +{ + struct crypto_report_kpp rkpp; + + strncpy(rkpp.type, "kpp", sizeof(rkpp.type)); + + if (nla_put(skb, CRYPTOCFGA_REPORT_KPP, + sizeof(struct crypto_report_kpp), &rkpp)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} +#else +static int crypto_kpp_report(struct sk_buff *skb, struct crypto_alg *alg) +{ + return -ENOSYS; +} +#endif + +static void crypto_kpp_show(struct seq_file *m, struct crypto_alg *alg) + __attribute__ ((unused)); + +static void crypto_kpp_show(struct seq_file *m, struct crypto_alg *alg) +{ + seq_puts(m, "type : kpp\n"); +} + +static void crypto_kpp_exit_tfm(struct crypto_tfm *tfm) +{ + struct crypto_kpp *kpp = __crypto_kpp_tfm(tfm); + struct kpp_alg *alg = crypto_kpp_alg(kpp); + + alg->exit(kpp); +} + +static int crypto_kpp_init_tfm(struct crypto_tfm *tfm) +{ + struct crypto_kpp *kpp = __crypto_kpp_tfm(tfm); + struct kpp_alg *alg = crypto_kpp_alg(kpp); + + if (alg->exit) + kpp->base.exit = crypto_kpp_exit_tfm; + + if (alg->init) + return alg->init(kpp); + + return 0; +} + +static const struct crypto_type crypto_kpp_type = { + .extsize = crypto_alg_extsize, + .init_tfm = crypto_kpp_init_tfm, +#ifdef CONFIG_PROC_FS + .show = crypto_kpp_show, +#endif + .report = crypto_kpp_report, + .maskclear = ~CRYPTO_ALG_TYPE_MASK, + .maskset = CRYPTO_ALG_TYPE_MASK, + .type = CRYPTO_ALG_TYPE_KPP, + .tfmsize = offsetof(struct crypto_kpp, base), +}; + +struct crypto_kpp *crypto_alloc_kpp(const char *alg_name, u32 type, u32 mask) +{ + return crypto_alloc_tfm(alg_name, &crypto_kpp_type, type, mask); +} +EXPORT_SYMBOL_GPL(crypto_alloc_kpp); + +static void kpp_prepare_alg(struct kpp_alg *alg) +{ + struct crypto_alg *base = &alg->base; + + base->cra_type = &crypto_kpp_type; + base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; + base->cra_flags |= CRYPTO_ALG_TYPE_KPP; +} + +int crypto_register_kpp(struct kpp_alg *alg) +{ + struct crypto_alg *base = &alg->base; + + kpp_prepare_alg(alg); + return crypto_register_alg(base); +} +EXPORT_SYMBOL_GPL(crypto_register_kpp); + +void crypto_unregister_kpp(struct kpp_alg *alg) +{ + crypto_unregister_alg(&alg->base); +} +EXPORT_SYMBOL_GPL(crypto_unregister_kpp); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Key-agreement Protocol Primitives"); diff --git a/include/crypto/internal/kpp.h b/include/crypto/internal/kpp.h new file mode 100644 index 0000000000000..ad3acf3649bec --- /dev/null +++ b/include/crypto/internal/kpp.h @@ -0,0 +1,64 @@ +/* + * Key-agreement Protocol Primitives (KPP) + * + * Copyright (c) 2016, Intel Corporation + * Authors: Salvatore Benedetto + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#ifndef _CRYPTO_KPP_INT_H +#define _CRYPTO_KPP_INT_H +#include +#include + +/* + * Transform internal helpers. + */ +static inline void *kpp_request_ctx(struct kpp_request *req) +{ + return req->__ctx; +} + +static inline void *kpp_tfm_ctx(struct crypto_kpp *tfm) +{ + return tfm->base.__crt_ctx; +} + +static inline void kpp_request_complete(struct kpp_request *req, int err) +{ + req->base.complete(&req->base, err); +} + +static inline const char *kpp_alg_name(struct crypto_kpp *tfm) +{ + return crypto_kpp_tfm(tfm)->__crt_alg->cra_name; +} + +/** + * crypto_register_kpp() -- Register key-agreement protocol primitives algorithm + * + * Function registers an implementation of a key-agreement protocol primitive + * algorithm + * + * @alg: algorithm definition + * + * Return: zero on success; error code in case of error + */ +int crypto_register_kpp(struct kpp_alg *alg); + +/** + * crypto_unregister_kpp() -- Unregister key-agreement protocol primitive + * algorithm + * + * Function unregisters an implementation of a key-agreement protocol primitive + * algorithm + * + * @alg: algorithm definition + */ +void crypto_unregister_kpp(struct kpp_alg *alg); + +#endif diff --git a/include/crypto/kpp.h b/include/crypto/kpp.h new file mode 100644 index 0000000000000..4fa897f3366bd --- /dev/null +++ b/include/crypto/kpp.h @@ -0,0 +1,328 @@ +/* + * Key-agreement Protocol Primitives (KPP) + * + * Copyright (c) 2016, Intel Corporation + * Authors: Salvatore Benedetto + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#ifndef _CRYPTO_KPP_ +#define _CRYPTO_KPP_ +#include + +/** + * struct kpp_request + * + * @base: Common attributes for async crypto requests + * @src: Source data + * @dst: Destination data + * @src_len: Size of the input buffer + * @dst_len: Size of the output buffer. It needs to be at least + * as big as the expected result depending on the operation + * After operation it will be updated with the actual size of the + * result. In case of error where the dst sgl size was insufficient, + * it will be updated to the size required for the operation. + * @__ctx: Start of private context data + */ +struct kpp_request { + struct crypto_async_request base; + struct scatterlist *src; + struct scatterlist *dst; + unsigned int src_len; + unsigned int dst_len; + void *__ctx[] CRYPTO_MINALIGN_ATTR; +}; + +/** + * struct crypto_kpp - user-instantiated object which encapsulate + * algorithms and core processing logic + * + * @base: Common crypto API algorithm data structure + */ +struct crypto_kpp { + struct crypto_tfm base; +}; + +/** + * struct kpp_alg - generic key-agreement protocol primitives + * + * @set_secret: Function invokes the protocol specific function to + * store the secret private key along with parameters. + * The implementation knows how to decode thie buffer + * @generate_public_key: Function generate the public key to be sent to the + * counterpart. In case of error, where output is not big + * enough req->dst_len will be updated to the size + * required + * @compute_shared_secret: Function compute the shared secret as defined by + * the algorithm. The result is given back to the user. + * In case of error, where output is not big enough, + * req->dst_len will be updated to the size required + * @max_size: Function returns the size of the output buffer + * @init: Initialize the object. This is called only once at + * instantiation time. In case the cryptographic hardware + * needs to be initialized. Software fallback should be + * put in place here. + * @exit: Undo everything @init did. + * + * @reqsize: Request context size required by algorithm + * implementation + * @base Common crypto API algorithm data structure + */ +struct kpp_alg { + int (*set_secret)(struct crypto_kpp *tfm, void *buffer, + unsigned int len); + int (*generate_public_key)(struct kpp_request *req); + int (*compute_shared_secret)(struct kpp_request *req); + + int (*max_size)(struct crypto_kpp *tfm); + + int (*init)(struct crypto_kpp *tfm); + void (*exit)(struct crypto_kpp *tfm); + + unsigned int reqsize; + struct crypto_alg base; +}; + +/** + * DOC: Generic Key-agreement Protocol Primitevs API + * + * The KPP API is used with the algorithm type + * CRYPTO_ALG_TYPE_KPP (listed as type "kpp" in /proc/crypto) + */ + +/** + * crypto_alloc_kpp() - allocate KPP tfm handle + * @alg_name: is the name of the kpp algorithm (e.g. "dh", "ecdh") + * @type: specifies the type of the algorithm + * @mask: specifies the mask for the algorithm + * + * Allocate a handle for kpp algorithm. The returned struct crypto_kpp + * is requeried for any following API invocation + * + * Return: allocated handle in case of success; IS_ERR() is true in case of + * an error, PTR_ERR() returns the error code. + */ +struct crypto_kpp *crypto_alloc_kpp(const char *alg_name, u32 type, u32 mask); + +static inline struct crypto_tfm *crypto_kpp_tfm(struct crypto_kpp *tfm) +{ + return &tfm->base; +} + +static inline struct kpp_alg *__crypto_kpp_alg(struct crypto_alg *alg) +{ + return container_of(alg, struct kpp_alg, base); +} + +static inline struct crypto_kpp *__crypto_kpp_tfm(struct crypto_tfm *tfm) +{ + return container_of(tfm, struct crypto_kpp, base); +} + +static inline struct kpp_alg *crypto_kpp_alg(struct crypto_kpp *tfm) +{ + return __crypto_kpp_alg(crypto_kpp_tfm(tfm)->__crt_alg); +} + +static inline unsigned int crypto_kpp_reqsize(struct crypto_kpp *tfm) +{ + return crypto_kpp_alg(tfm)->reqsize; +} + +static inline void kpp_request_set_tfm(struct kpp_request *req, + struct crypto_kpp *tfm) +{ + req->base.tfm = crypto_kpp_tfm(tfm); +} + +static inline struct crypto_kpp *crypto_kpp_reqtfm(struct kpp_request *req) +{ + return __crypto_kpp_tfm(req->base.tfm); +} + +/** + * crypto_free_kpp() - free KPP tfm handle + * + * @tfm: KPP tfm handle allocated with crypto_alloc_kpp() + */ +static inline void crypto_free_kpp(struct crypto_kpp *tfm) +{ + crypto_destroy_tfm(tfm, crypto_kpp_tfm(tfm)); +} + +/** + * kpp_request_alloc() - allocates kpp request + * + * @tfm: KPP tfm handle allocated with crypto_alloc_kpp() + * @gfp: allocation flags + * + * Return: allocated handle in case of success or NULL in case of an error. + */ +static inline struct kpp_request *kpp_request_alloc(struct crypto_kpp *tfm, + gfp_t gfp) +{ + struct kpp_request *req; + + req = kmalloc(sizeof(*req) + crypto_kpp_reqsize(tfm), gfp); + if (likely(req)) + kpp_request_set_tfm(req, tfm); + + return req; +} + +/** + * kpp_request_free() - zeroize and free kpp request + * + * @req: request to free + */ +static inline void kpp_request_free(struct kpp_request *req) +{ + kzfree(req); +} + +/** + * kpp_request_set_callback() - Sets an asynchronous callback. + * + * Callback will be called when an asynchronous operation on a given + * request is finished. + * + * @req: request that the callback will be set for + * @flgs: specify for instance if the operation may backlog + * @cmpl: callback which will be called + * @data: private data used by the caller + */ +static inline void kpp_request_set_callback(struct kpp_request *req, + u32 flgs, + crypto_completion_t cmpl, + void *data) +{ + req->base.complete = cmpl; + req->base.data = data; + req->base.flags = flgs; +} + +/** + * kpp_request_set_input() - Sets input buffer + * + * Sets parameters required by generate_public_key + * + * @req: kpp request + * @input: ptr to input scatter list + * @input_len: size of the input scatter list + */ +static inline void kpp_request_set_input(struct kpp_request *req, + struct scatterlist *input, + unsigned int input_len) +{ + req->src = input; + req->src_len = input_len; +} + +/** + * kpp_request_set_output() - Sets output buffer + * + * Sets parameters required by kpp operation + * + * @req: kpp request + * @output: ptr to output scatter list + * @output_len: size of the output scatter list + */ +static inline void kpp_request_set_output(struct kpp_request *req, + struct scatterlist *output, + unsigned int output_len) +{ + req->dst = output; + req->dst_len = output_len; +} + +enum { + CRYPTO_KPP_SECRET_TYPE_UNKNOWN, +}; + +/** + * struct kpp_secret - small header for packing secret buffer + * + * @type: define type of secret. Each kpp type will define its own + * @len: specify the len of the secret, include the header, that + * follows the struct + */ +struct kpp_secret { + unsigned short type; + unsigned short len; +}; + +/** + * crypto_kpp_set_secret() - Invoke kpp operation + * + * Function invokes the specific kpp operation for a given alg. + * + * @tfm: tfm handle + * + * Return: zero on success; error code in case of error + */ +static inline int crypto_kpp_set_secret(struct crypto_kpp *tfm, void *buffer, + unsigned int len) +{ + struct kpp_alg *alg = crypto_kpp_alg(tfm); + + return alg->set_secret(tfm, buffer, len); +} + +/** + * crypto_kpp_generate_public_key() - Invoke kpp operation + * + * Function invokes the specific kpp operation for generating the public part + * for a given kpp algorithm + * + * @req: kpp key request + * + * Return: zero on success; error code in case of error + */ +static inline int crypto_kpp_generate_public_key(struct kpp_request *req) +{ + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); + struct kpp_alg *alg = crypto_kpp_alg(tfm); + + return alg->generate_public_key(req); +} + +/** + * crypto_kpp_compute_shared_secret() - Invoke kpp operation + * + * Function invokes the specific kpp operation for computing the shared secret + * for a given kpp algorithm. + * + * @req: kpp key request + * + * Return: zero on success; error code in case of error + */ +static inline int crypto_kpp_compute_shared_secret(struct kpp_request *req) +{ + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); + struct kpp_alg *alg = crypto_kpp_alg(tfm); + + return alg->compute_shared_secret(req); +} + +/** + * crypto_kpp_maxsize() - Get len for output buffer + * + * Function returns the output buffer size required + * + * @tfm: KPP tfm handle allocated with crypto_alloc_kpp() + * + * Return: minimum len for output buffer or error code if key hasn't been set + */ +static inline int crypto_kpp_maxsize(struct crypto_kpp *tfm) +{ + struct kpp_alg *alg = crypto_kpp_alg(tfm); + + return alg->max_size(tfm); +} + +#endif diff --git a/include/linux/crypto.h b/include/linux/crypto.h index d844cbc365f72..992cfc2e5df1d 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -48,6 +48,7 @@ #define CRYPTO_ALG_TYPE_BLKCIPHER 0x00000004 #define CRYPTO_ALG_TYPE_ABLKCIPHER 0x00000005 #define CRYPTO_ALG_TYPE_GIVCIPHER 0x00000006 +#define CRYPTO_ALG_TYPE_KPP 0x00000008 #define CRYPTO_ALG_TYPE_RNG 0x0000000c #define CRYPTO_ALG_TYPE_AKCIPHER 0x0000000d #define CRYPTO_ALG_TYPE_DIGEST 0x0000000e diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h index 2e67bb64c1da5..79b5ded2001a3 100644 --- a/include/uapi/linux/cryptouser.h +++ b/include/uapi/linux/cryptouser.h @@ -45,6 +45,7 @@ enum crypto_attr_type_t { CRYPTOCFGA_REPORT_RNG, /* struct crypto_report_rng */ CRYPTOCFGA_REPORT_CIPHER, /* struct crypto_report_cipher */ CRYPTOCFGA_REPORT_AKCIPHER, /* struct crypto_report_akcipher */ + CRYPTOCFGA_REPORT_KPP, /* struct crypto_report_kpp */ __CRYPTOCFGA_MAX #define CRYPTOCFGA_MAX (__CRYPTOCFGA_MAX - 1) @@ -107,5 +108,9 @@ struct crypto_report_akcipher { char type[CRYPTO_MAX_NAME]; }; +struct crypto_report_kpp { + char type[CRYPTO_MAX_NAME]; +}; + #define CRYPTO_REPORT_MAXSIZE (sizeof(struct crypto_user_alg) + \ sizeof(struct crypto_report_blkcipher)) From 802c7f1c84e4b5a6ac78635878041023fc5831b1 Mon Sep 17 00:00:00 2001 From: Salvatore Benedetto Date: Wed, 22 Jun 2016 17:49:14 +0100 Subject: [PATCH 078/180] crypto: dh - Add DH software implementation * Implement MPI based Diffie-Hellman under kpp API * Test provided uses data generad by OpenSSL Signed-off-by: Salvatore Benedetto Signed-off-by: Herbert Xu --- crypto/Kconfig | 8 ++ crypto/Makefile | 4 + crypto/dh.c | 189 +++++++++++++++++++++++++++++++++++ crypto/dh_helper.c | 95 ++++++++++++++++++ crypto/testmgr.c | 144 +++++++++++++++++++++++++++ crypto/testmgr.h | 230 +++++++++++++++++++++++++++++++++++++++++++ include/crypto/dh.h | 29 ++++++ include/crypto/kpp.h | 1 + 8 files changed, 700 insertions(+) create mode 100644 crypto/dh.c create mode 100644 crypto/dh_helper.c create mode 100644 include/crypto/dh.h diff --git a/crypto/Kconfig b/crypto/Kconfig index e72c4270173d5..162d2f9aa2425 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -111,6 +111,14 @@ config CRYPTO_RSA help Generic implementation of the RSA public key algorithm. +config CRYPTO_DH + tristate "Diffie-Hellman algorithm" + select CRYPTO_KPP + select MPILIB + help + Generic implementation of the Diffie-Hellman algorithm. + + config CRYPTO_MANAGER tristate "Cryptographic algorithm manager" select CRYPTO_MANAGER2 diff --git a/crypto/Makefile b/crypto/Makefile index 07b0f51bd6456..82897208e8e03 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -32,6 +32,10 @@ obj-$(CONFIG_CRYPTO_HASH2) += crypto_hash.o obj-$(CONFIG_CRYPTO_AKCIPHER2) += akcipher.o obj-$(CONFIG_CRYPTO_KPP2) += kpp.o +dh_generic-y := dh.o +dh_generic-y += dh_helper.o +obj-$(CONFIG_CRYPTO_DH) += dh_generic.o + $(obj)/rsapubkey-asn1.o: $(obj)/rsapubkey-asn1.c $(obj)/rsapubkey-asn1.h $(obj)/rsaprivkey-asn1.o: $(obj)/rsaprivkey-asn1.c $(obj)/rsaprivkey-asn1.h clean-files += rsapubkey-asn1.c rsapubkey-asn1.h diff --git a/crypto/dh.c b/crypto/dh.c new file mode 100644 index 0000000000000..5e960fe28681d --- /dev/null +++ b/crypto/dh.c @@ -0,0 +1,189 @@ +/* Diffie-Hellman Key Agreement Method [RFC2631] + * + * Copyright (c) 2016, Intel Corporation + * Authors: Salvatore Benedetto + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include + +struct dh_ctx { + MPI p; + MPI g; + MPI xa; +}; + +static inline void dh_clear_params(struct dh_ctx *ctx) +{ + mpi_free(ctx->p); + mpi_free(ctx->g); + ctx->p = NULL; + ctx->g = NULL; +} + +static void dh_free_ctx(struct dh_ctx *ctx) +{ + dh_clear_params(ctx); + mpi_free(ctx->xa); + ctx->xa = NULL; +} + +/* + * If base is g we compute the public key + * ya = g^xa mod p; [RFC2631 sec 2.1.1] + * else if base if the counterpart public key we compute the shared secret + * ZZ = yb^xa mod p; [RFC2631 sec 2.1.1] + */ +static int _compute_val(const struct dh_ctx *ctx, MPI base, MPI val) +{ + /* val = base^xa mod p */ + return mpi_powm(val, base, ctx->xa, ctx->p); +} + +static inline struct dh_ctx *dh_get_ctx(struct crypto_kpp *tfm) +{ + return kpp_tfm_ctx(tfm); +} + +static int dh_check_params_length(unsigned int p_len) +{ + return (p_len < 1536) ? -EINVAL : 0; +} + +static int dh_set_params(struct dh_ctx *ctx, struct dh *params) +{ + if (unlikely(!params->p || !params->g)) + return -EINVAL; + + if (dh_check_params_length(params->p_size << 3)) + return -EINVAL; + + ctx->p = mpi_read_raw_data(params->p, params->p_size); + if (!ctx->p) + return -EINVAL; + + ctx->g = mpi_read_raw_data(params->g, params->g_size); + if (!ctx->g) { + mpi_free(ctx->p); + return -EINVAL; + } + + return 0; +} + +static int dh_set_secret(struct crypto_kpp *tfm, void *buf, unsigned int len) +{ + struct dh_ctx *ctx = dh_get_ctx(tfm); + struct dh params; + + if (crypto_dh_decode_key(buf, len, ¶ms) < 0) + return -EINVAL; + + if (dh_set_params(ctx, ¶ms) < 0) + return -EINVAL; + + ctx->xa = mpi_read_raw_data(params.key, params.key_size); + if (!ctx->xa) { + dh_clear_params(ctx); + return -EINVAL; + } + + return 0; +} + +static int dh_compute_value(struct kpp_request *req) +{ + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); + struct dh_ctx *ctx = dh_get_ctx(tfm); + MPI base, val = mpi_alloc(0); + int ret = 0; + int sign; + + if (!val) + return -ENOMEM; + + if (unlikely(!ctx->xa)) { + ret = -EINVAL; + goto err_free_val; + } + + if (req->src) { + base = mpi_read_raw_from_sgl(req->src, req->src_len); + if (!base) { + ret = EINVAL; + goto err_free_val; + } + } else { + base = ctx->g; + } + + ret = _compute_val(ctx, base, val); + if (ret) + goto err_free_base; + + ret = mpi_write_to_sgl(val, req->dst, &req->dst_len, &sign); + if (ret) + goto err_free_base; + + if (sign < 0) + ret = -EBADMSG; +err_free_base: + if (req->src) + mpi_free(base); +err_free_val: + mpi_free(val); + return ret; +} + +static int dh_max_size(struct crypto_kpp *tfm) +{ + struct dh_ctx *ctx = dh_get_ctx(tfm); + + return mpi_get_size(ctx->p); +} + +static void dh_exit_tfm(struct crypto_kpp *tfm) +{ + struct dh_ctx *ctx = dh_get_ctx(tfm); + + dh_free_ctx(ctx); +} + +static struct kpp_alg dh = { + .set_secret = dh_set_secret, + .generate_public_key = dh_compute_value, + .compute_shared_secret = dh_compute_value, + .max_size = dh_max_size, + .exit = dh_exit_tfm, + .base = { + .cra_name = "dh", + .cra_driver_name = "dh-generic", + .cra_priority = 100, + .cra_module = THIS_MODULE, + .cra_ctxsize = sizeof(struct dh_ctx), + }, +}; + +static int dh_init(void) +{ + return crypto_register_kpp(&dh); +} + +static void dh_exit(void) +{ + crypto_unregister_kpp(&dh); +} + +module_init(dh_init); +module_exit(dh_exit); +MODULE_ALIAS_CRYPTO("dh"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("DH generic algorithm"); diff --git a/crypto/dh_helper.c b/crypto/dh_helper.c new file mode 100644 index 0000000000000..02db76b20d003 --- /dev/null +++ b/crypto/dh_helper.c @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016, Intel Corporation + * Authors: Salvatore Benedetto + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include + +#define DH_KPP_SECRET_MIN_SIZE (sizeof(struct kpp_secret) + 3 * sizeof(int)) + +static inline u8 *dh_pack_data(void *dst, const void *src, size_t size) +{ + memcpy(dst, src, size); + return dst + size; +} + +static inline const u8 *dh_unpack_data(void *dst, const void *src, size_t size) +{ + memcpy(dst, src, size); + return src + size; +} + +static inline int dh_data_size(const struct dh *p) +{ + return p->key_size + p->p_size + p->g_size; +} + +int crypto_dh_key_len(const struct dh *p) +{ + return DH_KPP_SECRET_MIN_SIZE + dh_data_size(p); +} +EXPORT_SYMBOL_GPL(crypto_dh_key_len); + +int crypto_dh_encode_key(char *buf, unsigned int len, const struct dh *params) +{ + u8 *ptr = buf; + struct kpp_secret secret = { + .type = CRYPTO_KPP_SECRET_TYPE_DH, + .len = len + }; + + if (unlikely(!buf)) + return -EINVAL; + + if (len != crypto_dh_key_len(params)) + return -EINVAL; + + ptr = dh_pack_data(ptr, &secret, sizeof(secret)); + ptr = dh_pack_data(ptr, ¶ms->key_size, sizeof(params->key_size)); + ptr = dh_pack_data(ptr, ¶ms->p_size, sizeof(params->p_size)); + ptr = dh_pack_data(ptr, ¶ms->g_size, sizeof(params->g_size)); + ptr = dh_pack_data(ptr, params->key, params->key_size); + ptr = dh_pack_data(ptr, params->p, params->p_size); + dh_pack_data(ptr, params->g, params->g_size); + + return 0; +} +EXPORT_SYMBOL_GPL(crypto_dh_encode_key); + +int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params) +{ + const u8 *ptr = buf; + struct kpp_secret secret; + + if (unlikely(!buf || len < DH_KPP_SECRET_MIN_SIZE)) + return -EINVAL; + + ptr = dh_unpack_data(&secret, ptr, sizeof(secret)); + if (secret.type != CRYPTO_KPP_SECRET_TYPE_DH) + return -EINVAL; + + ptr = dh_unpack_data(¶ms->key_size, ptr, sizeof(params->key_size)); + ptr = dh_unpack_data(¶ms->p_size, ptr, sizeof(params->p_size)); + ptr = dh_unpack_data(¶ms->g_size, ptr, sizeof(params->g_size)); + if (secret.len != crypto_dh_key_len(params)) + return -EINVAL; + + /* Don't allocate memory. Set pointers to data within + * the given buffer + */ + params->key = (void *)ptr; + params->p = (void *)(ptr + params->key_size); + params->g = (void *)(ptr + params->key_size + params->p_size); + + return 0; +} +EXPORT_SYMBOL_GPL(crypto_dh_decode_key); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index b773a563809bd..ff79eb887fd0b 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "internal.h" @@ -120,6 +121,11 @@ struct akcipher_test_suite { unsigned int count; }; +struct kpp_test_suite { + struct kpp_testvec *vecs; + unsigned int count; +}; + struct alg_test_desc { const char *alg; int (*test)(const struct alg_test_desc *desc, const char *driver, @@ -134,6 +140,7 @@ struct alg_test_desc { struct cprng_test_suite cprng; struct drbg_test_suite drbg; struct akcipher_test_suite akcipher; + struct kpp_test_suite kpp; } suite; }; @@ -1777,6 +1784,133 @@ static int alg_test_drbg(const struct alg_test_desc *desc, const char *driver, } +static int do_test_kpp(struct crypto_kpp *tfm, struct kpp_testvec *vec, + const char *alg) +{ + struct kpp_request *req; + void *input_buf = NULL; + void *output_buf = NULL; + struct tcrypt_result result; + unsigned int out_len_max; + int err = -ENOMEM; + struct scatterlist src, dst; + + req = kpp_request_alloc(tfm, GFP_KERNEL); + if (!req) + return err; + + init_completion(&result.completion); + + err = crypto_kpp_set_secret(tfm, vec->secret, vec->secret_size); + if (err < 0) + goto free_req; + + out_len_max = crypto_kpp_maxsize(tfm); + output_buf = kzalloc(out_len_max, GFP_KERNEL); + if (!output_buf) { + err = -ENOMEM; + goto free_req; + } + + /* Use appropriate parameter as base */ + kpp_request_set_input(req, NULL, 0); + sg_init_one(&dst, output_buf, out_len_max); + kpp_request_set_output(req, &dst, out_len_max); + kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + tcrypt_complete, &result); + + /* Compute public key */ + err = wait_async_op(&result, crypto_kpp_generate_public_key(req)); + if (err) { + pr_err("alg: %s: generate public key test failed. err %d\n", + alg, err); + goto free_output; + } + /* Verify calculated public key */ + if (memcmp(vec->expected_a_public, sg_virt(req->dst), + vec->expected_a_public_size)) { + pr_err("alg: %s: generate public key test failed. Invalid output\n", + alg); + err = -EINVAL; + goto free_output; + } + + /* Calculate shared secret key by using counter part (b) public key. */ + input_buf = kzalloc(vec->b_public_size, GFP_KERNEL); + if (!input_buf) { + err = -ENOMEM; + goto free_output; + } + + memcpy(input_buf, vec->b_public, vec->b_public_size); + sg_init_one(&src, input_buf, vec->b_public_size); + sg_init_one(&dst, output_buf, out_len_max); + kpp_request_set_input(req, &src, vec->b_public_size); + kpp_request_set_output(req, &dst, out_len_max); + kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + tcrypt_complete, &result); + err = wait_async_op(&result, crypto_kpp_compute_shared_secret(req)); + if (err) { + pr_err("alg: %s: compute shard secret test failed. err %d\n", + alg, err); + goto free_all; + } + /* + * verify shared secret from which the user will derive + * secret key by executing whatever hash it has chosen + */ + if (memcmp(vec->expected_ss, sg_virt(req->dst), + vec->expected_ss_size)) { + pr_err("alg: %s: compute shared secret test failed. Invalid output\n", + alg); + err = -EINVAL; + } + +free_all: + kfree(input_buf); +free_output: + kfree(output_buf); +free_req: + kpp_request_free(req); + return err; +} + +static int test_kpp(struct crypto_kpp *tfm, const char *alg, + struct kpp_testvec *vecs, unsigned int tcount) +{ + int ret, i; + + for (i = 0; i < tcount; i++) { + ret = do_test_kpp(tfm, vecs++, alg); + if (ret) { + pr_err("alg: %s: test failed on vector %d, err=%d\n", + alg, i + 1, ret); + return ret; + } + } + return 0; +} + +static int alg_test_kpp(const struct alg_test_desc *desc, const char *driver, + u32 type, u32 mask) +{ + struct crypto_kpp *tfm; + int err = 0; + + tfm = crypto_alloc_kpp(driver, type | CRYPTO_ALG_INTERNAL, mask); + if (IS_ERR(tfm)) { + pr_err("alg: kpp: Failed to load tfm for %s: %ld\n", + driver, PTR_ERR(tfm)); + return PTR_ERR(tfm); + } + if (desc->suite.kpp.vecs) + err = test_kpp(tfm, desc->alg, desc->suite.kpp.vecs, + desc->suite.kpp.count); + + crypto_free_kpp(tfm); + return err; +} + static int do_test_rsa(struct crypto_akcipher *tfm, struct akcipher_testvec *vecs) { @@ -2728,6 +2862,16 @@ static const struct alg_test_desc alg_test_descs[] = { } } } + }, { + .alg = "dh", + .test = alg_test_kpp, + .fips_allowed = 1, + .suite = { + .kpp = { + .vecs = dh_tv_template, + .count = DH_TEST_VECTORS + } + } }, { .alg = "digest_null", .test = alg_test_null, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index b70e3c92bedd8..78e874eca031d 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -133,6 +133,17 @@ struct akcipher_testvec { bool public_key_vec; }; +struct kpp_testvec { + unsigned char *secret; + unsigned char *b_public; + unsigned char *expected_a_public; + unsigned char *expected_ss; + unsigned short secret_size; + unsigned short b_public_size; + unsigned short expected_a_public_size; + unsigned short expected_ss_size; +}; + static char zeroed_string[48]; /* @@ -330,6 +341,225 @@ static struct akcipher_testvec rsa_tv_template[] = { } }; +#define DH_TEST_VECTORS 2 + +struct kpp_testvec dh_tv_template[] = { + { + .secret = +#ifdef __LITTLE_ENDIAN + "\x01\x00" /* type */ + "\x11\x02" /* len */ + "\x00\x01\x00\x00" /* key_size */ + "\x00\x01\x00\x00" /* p_size */ + "\x01\x00\x00\x00" /* g_size */ +#else + "\x00\x01" /* type */ + "\x02\x11" /* len */ + "\x00\x00\x01\x00" /* key_size */ + "\x00\x00\x01\x00" /* p_size */ + "\x00\x00\x00\x01" /* g_size */ +#endif + /* xa */ + "\x44\xc1\x48\x36\xa7\x2b\x6f\x4e\x43\x03\x68\xad\x31\x00\xda\xf3" + "\x2a\x01\xa8\x32\x63\x5f\x89\x32\x1f\xdf\x4c\xa1\x6a\xbc\x10\x15" + "\x90\x35\xc9\x26\x41\xdf\x7b\xaa\x56\x56\x3d\x85\x44\xb5\xc0\x8e" + "\x37\x83\x06\x50\xb3\x5f\x0e\x28\x2c\xd5\x46\x15\xe3\xda\x7d\x74" + "\x87\x13\x91\x4f\xd4\x2d\xf6\xc7\x5e\x14\x2c\x11\xc2\x26\xb4\x3a" + "\xe3\xb2\x36\x20\x11\x3b\x22\xf2\x06\x65\x66\xe2\x57\x58\xf8\x22" + "\x1a\x94\xbd\x2b\x0e\x8c\x55\xad\x61\x23\x45\x2b\x19\x1e\x63\x3a" + "\x13\x61\xe3\xa0\x79\x70\x3e\x6d\x98\x32\xbc\x7f\x82\xc3\x11\xd8" + "\xeb\x53\xb5\xfc\xb5\xd5\x3c\x4a\xea\x92\x3e\x01\xce\x15\x65\xd4" + "\xaa\x85\xc1\x11\x90\x83\x31\x6e\xfe\xe7\x7f\x7d\xed\xab\xf9\x29" + "\xf8\xc7\xf1\x68\xc6\xb7\xe4\x1f\x2f\x28\xa0\xc9\x1a\x50\x64\x29" + "\x4b\x01\x6d\x1a\xda\x46\x63\x21\x07\x40\x8c\x8e\x4c\x6f\xb5\xe5" + "\x12\xf3\xc2\x1b\x48\x27\x5e\x27\x01\xb1\xaa\xed\x68\x9b\x83\x18" + "\x8f\xb1\xeb\x1f\x04\xd1\x3c\x79\xed\x4b\xf7\x0a\x33\xdc\xe0\xc6" + "\xd8\x02\x51\x59\x00\x74\x30\x07\x4c\x2d\xac\xe4\x13\xf1\x80\xf0" + "\xce\xfa\xff\xa9\xce\x29\x46\xdd\x9d\xad\xd1\xc3\xc6\x58\x1a\x63" + /* p */ + "\xb9\x36\x3a\xf1\x82\x1f\x60\xd3\x22\x47\xb8\xbc\x2d\x22\x6b\x81" + "\x7f\xe8\x20\x06\x09\x23\x73\x49\x9a\x59\x8b\x35\x25\xf8\x31\xbc" + "\x7d\xa8\x1c\x9d\x56\x0d\x1a\xf7\x4b\x4f\x96\xa4\x35\x77\x6a\x89" + "\xab\x42\x00\x49\x21\x71\xed\x28\x16\x1d\x87\x5a\x10\xa7\x9c\x64" + "\x94\xd4\x87\x3d\x28\xef\x44\xfe\x4b\xe2\xb4\x15\x8c\x82\xa6\xf3" + "\x50\x5f\xa8\xe8\xa2\x60\xe7\x00\x86\x78\x05\xd4\x78\x19\xa1\x98" + "\x62\x4e\x4a\x00\x78\x56\x96\xe6\xcf\xd7\x10\x1b\x74\x5d\xd0\x26" + "\x61\xdb\x6b\x32\x09\x51\xd8\xa5\xfd\x54\x16\x71\x01\xb3\x39\xe6" + "\x4e\x69\xb1\xd7\x06\x8f\xd6\x1e\xdc\x72\x25\x26\x74\xc8\x41\x06" + "\x5c\xd1\x26\x5c\xb0\x2f\xf9\x59\x13\xc1\x2a\x0f\x78\xea\x7b\xf7" + "\xbd\x59\xa0\x90\x1d\xfc\x33\x5b\x4c\xbf\x05\x9c\x3a\x3f\x69\xa2" + "\x45\x61\x4e\x10\x6a\xb3\x17\xc5\x68\x30\xfb\x07\x5f\x34\xc6\xfb" + "\x73\x07\x3c\x70\xf6\xae\xe7\x72\x84\xc3\x18\x81\x8f\xe8\x11\x1f" + "\x3d\x83\x83\x01\x2a\x14\x73\xbf\x32\x32\x2e\xc9\x4d\xdb\x2a\xca" + "\xee\x71\xf9\xda\xad\xe8\x82\x0b\x4d\x0c\x1f\xb6\x1d\xef\x00\x67" + "\x74\x3d\x95\xe0\xb7\xc4\x30\x8a\x24\x87\x12\x47\x27\x70\x0d\x73" + /* g */ + "\x02", + .b_public = + "\x2a\x67\x5c\xfd\x63\x5d\xc0\x97\x0a\x8b\xa2\x1f\xf8\x8a\xcb\x54" + "\xca\x2f\xd3\x49\x3f\x01\x8e\x87\xfe\xcc\x94\xa0\x3e\xd4\x26\x79" + "\x9a\x94\x3c\x11\x81\x58\x5c\x60\x3d\xf5\x98\x90\x89\x64\x62\x1f" + "\xbd\x05\x6d\x2b\xcd\x84\x40\x9b\x4a\x1f\xe0\x19\xf1\xca\x20\xb3" + "\x4e\xa0\x4f\x15\xcc\xa5\xfe\xa5\xb4\xf5\x0b\x18\x7a\x5a\x37\xaa" + "\x58\x00\x19\x7f\xe2\xa3\xd9\x1c\x44\x57\xcc\xde\x2e\xc1\x38\xea" + "\xeb\xe3\x90\x40\xc4\x6c\xf7\xcd\xe9\x22\x50\x71\xf5\x7c\xdb\x37" + "\x0e\x80\xc3\xed\x7e\xb1\x2b\x2f\xbe\x71\xa6\x11\xa5\x9d\xf5\x39" + "\xf1\xa2\xe5\x85\xbc\x25\x91\x4e\x84\x8d\x26\x9f\x4f\xe6\x0f\xa6" + "\x2b\x6b\xf9\x0d\xaf\x6f\xbb\xfa\x2d\x79\x15\x31\x57\xae\x19\x60" + "\x22\x0a\xf5\xfd\x98\x0e\xbf\x5d\x49\x75\x58\x37\xbc\x7f\xf5\x21" + "\x56\x1e\xd5\xb3\x50\x0b\xca\x96\xf3\xd1\x3f\xb3\x70\xa8\x6d\x63" + "\x48\xfb\x3d\xd7\x29\x91\x45\xb5\x48\xcd\xb6\x78\x30\xf2\x3f\x1e" + "\xd6\x22\xd6\x35\x9b\xf9\x1f\x85\xae\xab\x4b\xd7\xe0\xc7\x86\x67" + "\x3f\x05\x7f\xa6\x0d\x2f\x0d\xbf\x53\x5f\x4d\x2c\x6d\x5e\x57\x40" + "\x30\x3a\x23\x98\xf9\xb4\x32\xf5\x32\x83\xdd\x0b\xae\x33\x97\x2f", + .expected_a_public = + "\x5c\x24\xdf\xeb\x5b\x4b\xf8\xc5\xef\x39\x48\x82\xe0\x1e\x62\xee" + "\x8a\xae\xdf\x93\x6c\x2b\x16\x95\x92\x16\x3f\x16\x7b\x75\x03\x85" + "\xd9\xf1\x69\xc2\x14\x87\x45\xfc\xa4\x19\xf6\xf0\xa4\xf3\xec\xd4" + "\x6c\x5c\x03\x3b\x94\xc2\x2f\x92\xe4\xce\xb3\xe4\x72\xe8\x17\xe6" + "\x23\x7e\x00\x01\x09\x59\x13\xbf\xc1\x2f\x99\xa9\x07\xaa\x02\x23" + "\x4a\xca\x39\x4f\xbc\xec\x0f\x27\x4f\x19\x93\x6c\xb9\x30\x52\xfd" + "\x2b\x9d\x86\xf1\x06\x1e\xb6\x56\x27\x4a\xc9\x8a\xa7\x8a\x48\x5e" + "\xb5\x60\xcb\xdf\xff\x03\x26\x10\xbf\x90\x8f\x46\x60\xeb\x9b\x9a" + "\xd6\x6f\x44\x91\x03\x92\x18\x2c\x96\x5e\x40\x19\xfb\xf4\x4f\x3a" + "\x02\x7b\xaf\xcc\x22\x20\x79\xb9\xf8\x9f\x8f\x85\x6b\xec\x44\xbb" + "\xe6\xa8\x8e\xb1\xe8\x2c\xee\x64\xee\xf8\xbd\x00\xf3\xe2\x2b\x93" + "\xcd\xe7\xc4\xdf\xc9\x19\x46\xfe\xb6\x07\x73\xc1\x8a\x64\x79\x26" + "\xe7\x30\xad\x2a\xdf\xe6\x8f\x59\xf5\x81\xbf\x4a\x29\x91\xe7\xb7" + "\xcf\x48\x13\x27\x75\x79\x40\xd9\xd6\x32\x52\x4e\x6a\x86\xae\x6f" + "\xc2\xbf\xec\x1f\xc2\x69\xb2\xb6\x59\xe5\xa5\x17\xa4\x77\xb7\x62" + "\x46\xde\xe8\xd2\x89\x78\x9a\xef\xa3\xb5\x8f\x26\xec\x80\xda\x39", + .expected_ss = + "\x8f\xf3\xac\xa2\xea\x22\x11\x5c\x45\x65\x1a\x77\x75\x2e\xcf\x46" + "\x23\x14\x1e\x67\x53\x4d\x35\xb0\x38\x1d\x4e\xb9\x41\x9a\x21\x24" + "\x6e\x9f\x40\xfe\x90\x51\xb1\x06\xa4\x7b\x87\x17\x2f\xe7\x5e\x22" + "\xf0\x7b\x54\x84\x0a\xac\x0a\x90\xd2\xd7\xe8\x7f\xe7\xe3\x30\x75" + "\x01\x1f\x24\x75\x56\xbe\xcc\x8d\x1e\x68\x0c\x41\x72\xd3\xfa\xbb" + "\xe5\x9c\x60\xc7\x28\x77\x0c\xbe\x89\xab\x08\xd6\x21\xe7\x2e\x1a" + "\x58\x7a\xca\x4f\x22\xf3\x2b\x30\xfd\xf4\x98\xc1\xa3\xf8\xf6\xcc" + "\xa9\xe4\xdb\x5b\xee\xd5\x5c\x6f\x62\x4c\xd1\x1a\x02\x2a\x23\xe4" + "\xb5\x57\xf3\xf9\xec\x04\x83\x54\xfe\x08\x5e\x35\xac\xfb\xa8\x09" + "\x82\x32\x60\x11\xb2\x16\x62\x6b\xdf\xda\xde\x9c\xcb\x63\x44\x6c" + "\x59\x26\x6a\x8f\xb0\x24\xcb\xa6\x72\x48\x1e\xeb\xe0\xe1\x09\x44" + "\xdd\xee\x66\x6d\x84\xcf\xa5\xc1\xb8\x36\x74\xd3\x15\x96\xc3\xe4" + "\xc6\x5a\x4d\x23\x97\x0c\x5c\xcb\xa9\xf5\x29\xc2\x0e\xff\x93\x82" + "\xd3\x34\x49\xad\x64\xa6\xb1\xc0\x59\x28\x75\x60\xa7\x8a\xb0\x11" + "\x56\x89\x42\x74\x11\xf5\xf6\x5e\x6f\x16\x54\x6a\xb1\x76\x4d\x50" + "\x8a\x68\xc1\x5b\x82\xb9\x0d\x00\x32\x50\xed\x88\x87\x48\x92\x17", + .secret_size = 529, + .b_public_size = 256, + .expected_a_public_size = 256, + .expected_ss_size = 256, + }, + { + .secret = +#ifdef __LITTLE_ENDIAN + "\x01\x00" /* type */ + "\x11\x02" /* len */ + "\x00\x01\x00\x00" /* key_size */ + "\x00\x01\x00\x00" /* p_size */ + "\x01\x00\x00\x00" /* g_size */ +#else + "\x00\x01" /* type */ + "\x02\x11" /* len */ + "\x00\x00\x01\x00" /* key_size */ + "\x00\x00\x01\x00" /* p_size */ + "\x00\x00\x00\x01" /* g_size */ +#endif + /* xa */ + "\x4d\x75\xa8\x6e\xba\x23\x3a\x0c\x63\x56\xc8\xc9\x5a\xa7\xd6\x0e" + "\xed\xae\x40\x78\x87\x47\x5f\xe0\xa7\x7b\xba\x84\x88\x67\x4e\xe5" + "\x3c\xcc\x5c\x6a\xe7\x4a\x20\xec\xbe\xcb\xf5\x52\x62\x9f\x37\x80" + "\x0c\x72\x7b\x83\x66\xa4\xf6\x7f\x95\x97\x1c\x6a\x5c\x7e\xf1\x67" + "\x37\xb3\x93\x39\x3d\x0b\x55\x35\xd9\xe5\x22\x04\x9f\xf8\xc1\x04" + "\xce\x13\xa5\xac\xe1\x75\x05\xd1\x2b\x53\xa2\x84\xef\xb1\x18\xf4" + "\x66\xdd\xea\xe6\x24\x69\x5a\x49\xe0\x7a\xd8\xdf\x1b\xb7\xf1\x6d" + "\x9b\x50\x2c\xc8\x1c\x1c\xa3\xb4\x37\xfb\x66\x3f\x67\x71\x73\xa9" + "\xff\x5f\xd9\xa2\x25\x6e\x25\x1b\x26\x54\xbf\x0c\xc6\xdb\xea\x0a" + "\x52\x6c\x16\x7c\x27\x68\x15\x71\x58\x73\x9d\xe6\xc2\x80\xaa\x97" + "\x31\x66\xfb\xa6\xfb\xfd\xd0\x9c\x1d\xbe\x81\x48\xf5\x9a\x32\xf1" + "\x69\x62\x18\x78\xae\x72\x36\xe6\x94\x27\xd1\xff\x18\x4f\x28\x6a" + "\x16\xbd\x6a\x60\xee\xe5\xf9\x6d\x16\xe4\xb8\xa6\x41\x9b\x23\x7e" + "\xf7\x9d\xd1\x1d\x03\x15\x66\x3a\xcf\xb6\x2c\x13\x96\x2c\x52\x21" + "\xe4\x2d\x48\x7a\x8a\x5d\xb2\x88\xed\x98\x61\x79\x8b\x6a\x1e\x5f" + "\xd0\x8a\x2d\x99\x5a\x2b\x0f\xbc\xef\x53\x8f\x32\xc1\xa2\x99\x26" + /* p */ + "\xb9\x36\x3a\xf1\x82\x1f\x60\xd3\x22\x47\xb8\xbc\x2d\x22\x6b\x81" + "\x7f\xe8\x20\x06\x09\x23\x73\x49\x9a\x59\x8b\x35\x25\xf8\x31\xbc" + "\x7d\xa8\x1c\x9d\x56\x0d\x1a\xf7\x4b\x4f\x96\xa4\x35\x77\x6a\x89" + "\xab\x42\x00\x49\x21\x71\xed\x28\x16\x1d\x87\x5a\x10\xa7\x9c\x64" + "\x94\xd4\x87\x3d\x28\xef\x44\xfe\x4b\xe2\xb4\x15\x8c\x82\xa6\xf3" + "\x50\x5f\xa8\xe8\xa2\x60\xe7\x00\x86\x78\x05\xd4\x78\x19\xa1\x98" + "\x62\x4e\x4a\x00\x78\x56\x96\xe6\xcf\xd7\x10\x1b\x74\x5d\xd0\x26" + "\x61\xdb\x6b\x32\x09\x51\xd8\xa5\xfd\x54\x16\x71\x01\xb3\x39\xe6" + "\x4e\x69\xb1\xd7\x06\x8f\xd6\x1e\xdc\x72\x25\x26\x74\xc8\x41\x06" + "\x5c\xd1\x26\x5c\xb0\x2f\xf9\x59\x13\xc1\x2a\x0f\x78\xea\x7b\xf7" + "\xbd\x59\xa0\x90\x1d\xfc\x33\x5b\x4c\xbf\x05\x9c\x3a\x3f\x69\xa2" + "\x45\x61\x4e\x10\x6a\xb3\x17\xc5\x68\x30\xfb\x07\x5f\x34\xc6\xfb" + "\x73\x07\x3c\x70\xf6\xae\xe7\x72\x84\xc3\x18\x81\x8f\xe8\x11\x1f" + "\x3d\x83\x83\x01\x2a\x14\x73\xbf\x32\x32\x2e\xc9\x4d\xdb\x2a\xca" + "\xee\x71\xf9\xda\xad\xe8\x82\x0b\x4d\x0c\x1f\xb6\x1d\xef\x00\x67" + "\x74\x3d\x95\xe0\xb7\xc4\x30\x8a\x24\x87\x12\x47\x27\x70\x0d\x73" + /* g */ + "\x02", + .b_public = + "\x99\x4d\xd9\x01\x84\x8e\x4a\x5b\xb8\xa5\x64\x8c\x6c\x00\x5c\x0e" + "\x1e\x1b\xee\x5d\x9f\x53\xe3\x16\x70\x01\xed\xbf\x4f\x14\x36\x6e" + "\xe4\x43\x45\x43\x49\xcc\xb1\xb0\x2a\xc0\x6f\x22\x55\x42\x17\x94" + "\x18\x83\xd7\x2a\x5c\x51\x54\xf8\x4e\x7c\x10\xda\x76\x68\x57\x77" + "\x1e\x62\x03\x30\x04\x7b\x4c\x39\x9c\x54\x01\x54\xec\xef\xb3\x55" + "\xa4\xc0\x24\x6d\x3d\xbd\xcc\x46\x5b\x00\x96\xc7\xea\x93\xd1\x3f" + "\xf2\x6a\x72\xe3\xf2\xc1\x92\x24\x5b\xda\x48\x70\x2c\xa9\x59\x97" + "\x19\xb1\xd6\x54\xb3\x9c\x2e\xb0\x63\x07\x9b\x5e\xac\xb5\xf2\xb1" + "\x5b\xf8\xf3\xd7\x2d\x37\x9b\x68\x6c\xf8\x90\x07\xbc\x37\x9a\xa5" + "\xe2\x91\x12\x25\x47\x77\xe3\x3d\xb2\x95\x69\x44\x0b\x91\x1e\xaf" + "\x7c\x8c\x7c\x34\x41\x6a\xab\x60\x6e\xc6\x52\xec\x7e\x94\x0a\x37" + "\xec\x98\x90\xdf\x3f\x02\xbd\x23\x52\xdd\xd9\xe5\x31\x80\x74\x25" + "\xb6\xd2\xd3\xcc\xd5\xcc\x6d\xf9\x7e\x4d\x78\xab\x77\x51\xfa\x77" + "\x19\x94\x49\x8c\x05\xd4\x75\xed\xd2\xb3\x64\x57\xe0\x52\x99\xc0" + "\x83\xe3\xbb\x5e\x2b\xf1\xd2\xc0\xb1\x37\x36\x0b\x7c\xb5\x63\x96" + "\x8e\xde\x04\x23\x11\x95\x62\x11\x9a\xce\x6f\x63\xc8\xd5\xd1\x8f", + .expected_a_public = + "\x90\x89\xe4\x82\xd6\x0a\xcf\x1a\xae\xce\x1b\x66\xa7\x19\x71\x18" + "\x8f\x95\x4b\x5b\x80\x45\x4a\x5a\x43\x99\x4d\x37\xcf\xa3\xa7\x28" + "\x9c\xc7\x73\xf1\xb2\x17\xf6\x99\xe3\x6b\x56\xcb\x3e\x35\x60\x7d" + "\x65\xc7\x84\x6b\x3e\x60\xee\xcd\xd2\x70\xe7\xc9\x32\x1c\xf0\xb4" + "\xf9\x52\xd9\x88\x75\xfd\x40\x2c\xa7\xbe\x19\x1c\x0a\xae\x93\xe1" + "\x71\xc7\xcd\x4f\x33\x5c\x10\x7d\x39\x56\xfc\x73\x84\xb2\x67\xc3" + "\x77\x26\x20\x97\x2b\xf8\x13\x43\x93\x9c\x9a\xa4\x08\xc7\x34\x83" + "\xe6\x98\x61\xe7\x16\x30\x2c\xb1\xdb\x2a\xb2\xcc\xc3\x02\xa5\x3c" + "\x71\x50\x14\x83\xc7\xbb\xa4\xbe\x98\x1b\xfe\xcb\x43\xe9\x97\x62" + "\xd6\xf0\x8c\xcb\x1c\xba\x1e\xa8\xa6\xa6\x50\xfc\x85\x7d\x47\xbf" + "\xf4\x3e\x23\xd3\x5f\xb2\x71\x3e\x40\x94\xaa\x87\x83\x2c\x6c\x8e" + "\x60\xfd\xdd\xf7\xf4\x76\x03\xd3\x1d\xec\x18\x51\xa3\xf2\x44\x1a" + "\x3f\xb4\x7c\x18\x0d\x68\x65\x92\x54\x0d\x2d\x81\x16\xf1\x84\x66" + "\x89\x92\xd0\x1a\x5e\x1f\x42\x46\x5b\xe5\x83\x86\x80\xd9\xcd\x3a" + "\x5a\x2f\xb9\x59\x9b\xe4\x43\x84\x64\xf3\x09\x1a\x0a\xa2\x64\x0f" + "\x77\x4e\x8d\x8b\xe6\x88\xd1\xfc\xaf\x8f\xdf\x1d\xbc\x31\xb3\xbd", + .expected_ss = + "\x34\xc3\x35\x14\x88\x46\x26\x23\x97\xbb\xdd\x28\x5c\x94\xf6\x47" + "\xca\xb3\x19\xaf\xca\x44\x9b\xc2\x7d\x89\xfd\x96\x14\xfd\x6d\x58" + "\xd8\xc4\x6b\x61\x2a\x0d\xf2\x36\x45\xc8\xe4\xa4\xed\x81\x53\x81" + "\x66\x1e\xe0\x5a\xb1\x78\x2d\x0b\x5c\xb4\xd1\xfc\x90\xc6\x9c\xdb" + "\x5a\x30\x0b\x14\x7d\xbe\xb3\x7d\xb1\xb2\x76\x3c\x6c\xef\x74\x6b" + "\xe7\x1f\x64\x0c\xab\x65\xe1\x76\x5c\x3d\x83\xb5\x8a\xfb\xaf\x0f" + "\xf2\x06\x14\x8f\xa0\xf6\xc1\x89\x78\xf2\xba\x72\x73\x3c\xf7\x76" + "\x21\x67\xbc\x24\x31\xb8\x09\x65\x0f\x0c\x02\x32\x4a\x98\x14\xfc" + "\x72\x2c\x25\x60\x68\x5f\x2f\x30\x1e\x5b\xf0\x3b\xd1\xa2\x87\xa0" + "\x54\xdf\xdb\xc0\xee\x0a\x0f\x47\xc9\x90\x20\x2c\xf9\xe3\x52\xad" + "\x27\x65\x8d\x54\x8d\xa8\xa1\xf3\xed\x15\xd4\x94\x28\x90\x31\x93" + "\x1b\xc0\x51\xbb\x43\x5d\x76\x3b\x1d\x2a\x71\x50\xea\x5d\x48\x94" + "\x7f\x6f\xf1\x48\xdb\x30\xe5\xae\x64\x79\xd9\x7a\xdb\xc6\xff\xd8" + "\x5e\x5a\x64\xbd\xf6\x85\x04\xe8\x28\x6a\xac\xef\xce\x19\x8e\x9a" + "\xfe\x75\xc0\x27\x69\xe3\xb3\x7b\x21\xa7\xb1\x16\xa4\x85\x23\xee" + "\xb0\x1b\x04\x6e\xbd\xab\x16\xde\xfd\x86\x6b\xa9\x95\xd7\x0b\xfd", + .secret_size = 529, + .b_public_size = 256, + .expected_a_public_size = 256, + .expected_ss_size = 256, + } +}; + /* * MD4 test vectors from RFC1320 */ diff --git a/include/crypto/dh.h b/include/crypto/dh.h new file mode 100644 index 0000000000000..5102a8f282e60 --- /dev/null +++ b/include/crypto/dh.h @@ -0,0 +1,29 @@ +/* + * Diffie-Hellman secret to be used with kpp API along with helper functions + * + * Copyright (c) 2016, Intel Corporation + * Authors: Salvatore Benedetto + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#ifndef _CRYPTO_DH_ +#define _CRYPTO_DH_ + +struct dh { + void *key; + void *p; + void *g; + unsigned int key_size; + unsigned int p_size; + unsigned int g_size; +}; + +int crypto_dh_key_len(const struct dh *params); +int crypto_dh_encode_key(char *buf, unsigned int len, const struct dh *params); +int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params); + +#endif diff --git a/include/crypto/kpp.h b/include/crypto/kpp.h index 4fa897f3366bd..937ac122354a3 100644 --- a/include/crypto/kpp.h +++ b/include/crypto/kpp.h @@ -242,6 +242,7 @@ static inline void kpp_request_set_output(struct kpp_request *req, enum { CRYPTO_KPP_SECRET_TYPE_UNKNOWN, + CRYPTO_KPP_SECRET_TYPE_DH, }; /** From 3c4b23901a0c766879dff680cd6bdab47bcdbbd2 Mon Sep 17 00:00:00 2001 From: Salvatore Benedetto Date: Wed, 22 Jun 2016 17:49:15 +0100 Subject: [PATCH 079/180] crypto: ecdh - Add ECDH software support * Implement ECDH under kpp API * Provide ECC software support for curve P-192 and P-256. * Add kpp test for ECDH with data generated by OpenSSL Signed-off-by: Salvatore Benedetto Signed-off-by: Herbert Xu --- crypto/Kconfig | 5 + crypto/Makefile | 4 + crypto/ecc.c | 1018 +++++++++++++++++++++++++++++++++++++++ crypto/ecc.h | 83 ++++ crypto/ecc_curve_defs.h | 57 +++ crypto/ecdh.c | 151 ++++++ crypto/ecdh_helper.c | 86 ++++ crypto/testmgr.c | 10 + crypto/testmgr.h | 93 ++++ include/crypto/ecdh.h | 30 ++ include/crypto/kpp.h | 1 + 11 files changed, 1538 insertions(+) create mode 100644 crypto/ecc.c create mode 100644 crypto/ecc.h create mode 100644 crypto/ecc_curve_defs.h create mode 100644 crypto/ecdh.c create mode 100644 crypto/ecdh_helper.c create mode 100644 include/crypto/ecdh.h diff --git a/crypto/Kconfig b/crypto/Kconfig index 162d2f9aa2425..5baaa9d87574c 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -118,6 +118,11 @@ config CRYPTO_DH help Generic implementation of the Diffie-Hellman algorithm. +config CRYPTO_ECDH + tristate "ECDH algorithm" + select CRYTPO_KPP + help + Generic implementation of the ECDH algorithm config CRYPTO_MANAGER tristate "Cryptographic algorithm manager" diff --git a/crypto/Makefile b/crypto/Makefile index 82897208e8e03..df1bcfb090d21 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -35,6 +35,10 @@ obj-$(CONFIG_CRYPTO_KPP2) += kpp.o dh_generic-y := dh.o dh_generic-y += dh_helper.o obj-$(CONFIG_CRYPTO_DH) += dh_generic.o +ecdh_generic-y := ecc.o +ecdh_generic-y += ecdh.o +ecdh_generic-y += ecdh_helper.o +obj-$(CONFIG_CRYPTO_ECDH) += ecdh_generic.o $(obj)/rsapubkey-asn1.o: $(obj)/rsapubkey-asn1.c $(obj)/rsapubkey-asn1.h $(obj)/rsaprivkey-asn1.o: $(obj)/rsaprivkey-asn1.c $(obj)/rsaprivkey-asn1.h diff --git a/crypto/ecc.c b/crypto/ecc.c new file mode 100644 index 0000000000000..9aedec6bbe728 --- /dev/null +++ b/crypto/ecc.c @@ -0,0 +1,1018 @@ +/* + * Copyright (c) 2013, Kenneth MacKay + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include "ecc.h" +#include "ecc_curve_defs.h" + +typedef struct { + u64 m_low; + u64 m_high; +} uint128_t; + +static inline const struct ecc_curve *ecc_get_curve(unsigned int curve_id) +{ + switch (curve_id) { + /* In FIPS mode only allow P256 and higher */ + case ECC_CURVE_NIST_P192: + return fips_enabled ? NULL : &nist_p192; + case ECC_CURVE_NIST_P256: + return &nist_p256; + default: + return NULL; + } +} + +static u64 *ecc_alloc_digits_space(unsigned int ndigits) +{ + size_t len = ndigits * sizeof(u64); + + if (!len) + return NULL; + + return kmalloc(len, GFP_KERNEL); +} + +static void ecc_free_digits_space(u64 *space) +{ + kzfree(space); +} + +static struct ecc_point *ecc_alloc_point(unsigned int ndigits) +{ + struct ecc_point *p = kmalloc(sizeof(*p), GFP_KERNEL); + + if (!p) + return NULL; + + p->x = ecc_alloc_digits_space(ndigits); + if (!p->x) + goto err_alloc_x; + + p->y = ecc_alloc_digits_space(ndigits); + if (!p->y) + goto err_alloc_y; + + p->ndigits = ndigits; + + return p; + +err_alloc_y: + ecc_free_digits_space(p->x); +err_alloc_x: + kfree(p); + return NULL; +} + +static void ecc_free_point(struct ecc_point *p) +{ + if (!p) + return; + + kzfree(p->x); + kzfree(p->y); + kzfree(p); +} + +static void vli_clear(u64 *vli, unsigned int ndigits) +{ + int i; + + for (i = 0; i < ndigits; i++) + vli[i] = 0; +} + +/* Returns true if vli == 0, false otherwise. */ +static bool vli_is_zero(const u64 *vli, unsigned int ndigits) +{ + int i; + + for (i = 0; i < ndigits; i++) { + if (vli[i]) + return false; + } + + return true; +} + +/* Returns nonzero if bit bit of vli is set. */ +static u64 vli_test_bit(const u64 *vli, unsigned int bit) +{ + return (vli[bit / 64] & ((u64)1 << (bit % 64))); +} + +/* Counts the number of 64-bit "digits" in vli. */ +static unsigned int vli_num_digits(const u64 *vli, unsigned int ndigits) +{ + int i; + + /* Search from the end until we find a non-zero digit. + * We do it in reverse because we expect that most digits will + * be nonzero. + */ + for (i = ndigits - 1; i >= 0 && vli[i] == 0; i--); + + return (i + 1); +} + +/* Counts the number of bits required for vli. */ +static unsigned int vli_num_bits(const u64 *vli, unsigned int ndigits) +{ + unsigned int i, num_digits; + u64 digit; + + num_digits = vli_num_digits(vli, ndigits); + if (num_digits == 0) + return 0; + + digit = vli[num_digits - 1]; + for (i = 0; digit; i++) + digit >>= 1; + + return ((num_digits - 1) * 64 + i); +} + +/* Sets dest = src. */ +static void vli_set(u64 *dest, const u64 *src, unsigned int ndigits) +{ + int i; + + for (i = 0; i < ndigits; i++) + dest[i] = src[i]; +} + +/* Returns sign of left - right. */ +static int vli_cmp(const u64 *left, const u64 *right, unsigned int ndigits) +{ + int i; + + for (i = ndigits - 1; i >= 0; i--) { + if (left[i] > right[i]) + return 1; + else if (left[i] < right[i]) + return -1; + } + + return 0; +} + +/* Computes result = in << c, returning carry. Can modify in place + * (if result == in). 0 < shift < 64. + */ +static u64 vli_lshift(u64 *result, const u64 *in, unsigned int shift, + unsigned int ndigits) +{ + u64 carry = 0; + int i; + + for (i = 0; i < ndigits; i++) { + u64 temp = in[i]; + + result[i] = (temp << shift) | carry; + carry = temp >> (64 - shift); + } + + return carry; +} + +/* Computes vli = vli >> 1. */ +static void vli_rshift1(u64 *vli, unsigned int ndigits) +{ + u64 *end = vli; + u64 carry = 0; + + vli += ndigits; + + while (vli-- > end) { + u64 temp = *vli; + *vli = (temp >> 1) | carry; + carry = temp << 63; + } +} + +/* Computes result = left + right, returning carry. Can modify in place. */ +static u64 vli_add(u64 *result, const u64 *left, const u64 *right, + unsigned int ndigits) +{ + u64 carry = 0; + int i; + + for (i = 0; i < ndigits; i++) { + u64 sum; + + sum = left[i] + right[i] + carry; + if (sum != left[i]) + carry = (sum < left[i]); + + result[i] = sum; + } + + return carry; +} + +/* Computes result = left - right, returning borrow. Can modify in place. */ +static u64 vli_sub(u64 *result, const u64 *left, const u64 *right, + unsigned int ndigits) +{ + u64 borrow = 0; + int i; + + for (i = 0; i < ndigits; i++) { + u64 diff; + + diff = left[i] - right[i] - borrow; + if (diff != left[i]) + borrow = (diff > left[i]); + + result[i] = diff; + } + + return borrow; +} + +static uint128_t mul_64_64(u64 left, u64 right) +{ + u64 a0 = left & 0xffffffffull; + u64 a1 = left >> 32; + u64 b0 = right & 0xffffffffull; + u64 b1 = right >> 32; + u64 m0 = a0 * b0; + u64 m1 = a0 * b1; + u64 m2 = a1 * b0; + u64 m3 = a1 * b1; + uint128_t result; + + m2 += (m0 >> 32); + m2 += m1; + + /* Overflow */ + if (m2 < m1) + m3 += 0x100000000ull; + + result.m_low = (m0 & 0xffffffffull) | (m2 << 32); + result.m_high = m3 + (m2 >> 32); + + return result; +} + +static uint128_t add_128_128(uint128_t a, uint128_t b) +{ + uint128_t result; + + result.m_low = a.m_low + b.m_low; + result.m_high = a.m_high + b.m_high + (result.m_low < a.m_low); + + return result; +} + +static void vli_mult(u64 *result, const u64 *left, const u64 *right, + unsigned int ndigits) +{ + uint128_t r01 = { 0, 0 }; + u64 r2 = 0; + unsigned int i, k; + + /* Compute each digit of result in sequence, maintaining the + * carries. + */ + for (k = 0; k < ndigits * 2 - 1; k++) { + unsigned int min; + + if (k < ndigits) + min = 0; + else + min = (k + 1) - ndigits; + + for (i = min; i <= k && i < ndigits; i++) { + uint128_t product; + + product = mul_64_64(left[i], right[k - i]); + + r01 = add_128_128(r01, product); + r2 += (r01.m_high < product.m_high); + } + + result[k] = r01.m_low; + r01.m_low = r01.m_high; + r01.m_high = r2; + r2 = 0; + } + + result[ndigits * 2 - 1] = r01.m_low; +} + +static void vli_square(u64 *result, const u64 *left, unsigned int ndigits) +{ + uint128_t r01 = { 0, 0 }; + u64 r2 = 0; + int i, k; + + for (k = 0; k < ndigits * 2 - 1; k++) { + unsigned int min; + + if (k < ndigits) + min = 0; + else + min = (k + 1) - ndigits; + + for (i = min; i <= k && i <= k - i; i++) { + uint128_t product; + + product = mul_64_64(left[i], left[k - i]); + + if (i < k - i) { + r2 += product.m_high >> 63; + product.m_high = (product.m_high << 1) | + (product.m_low >> 63); + product.m_low <<= 1; + } + + r01 = add_128_128(r01, product); + r2 += (r01.m_high < product.m_high); + } + + result[k] = r01.m_low; + r01.m_low = r01.m_high; + r01.m_high = r2; + r2 = 0; + } + + result[ndigits * 2 - 1] = r01.m_low; +} + +/* Computes result = (left + right) % mod. + * Assumes that left < mod and right < mod, result != mod. + */ +static void vli_mod_add(u64 *result, const u64 *left, const u64 *right, + const u64 *mod, unsigned int ndigits) +{ + u64 carry; + + carry = vli_add(result, left, right, ndigits); + + /* result > mod (result = mod + remainder), so subtract mod to + * get remainder. + */ + if (carry || vli_cmp(result, mod, ndigits) >= 0) + vli_sub(result, result, mod, ndigits); +} + +/* Computes result = (left - right) % mod. + * Assumes that left < mod and right < mod, result != mod. + */ +static void vli_mod_sub(u64 *result, const u64 *left, const u64 *right, + const u64 *mod, unsigned int ndigits) +{ + u64 borrow = vli_sub(result, left, right, ndigits); + + /* In this case, p_result == -diff == (max int) - diff. + * Since -x % d == d - x, we can get the correct result from + * result + mod (with overflow). + */ + if (borrow) + vli_add(result, result, mod, ndigits); +} + +/* Computes p_result = p_product % curve_p. + * See algorithm 5 and 6 from + * http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf + */ +static void vli_mmod_fast_192(u64 *result, const u64 *product, + const u64 *curve_prime, u64 *tmp) +{ + const unsigned int ndigits = 3; + int carry; + + vli_set(result, product, ndigits); + + vli_set(tmp, &product[3], ndigits); + carry = vli_add(result, result, tmp, ndigits); + + tmp[0] = 0; + tmp[1] = product[3]; + tmp[2] = product[4]; + carry += vli_add(result, result, tmp, ndigits); + + tmp[0] = tmp[1] = product[5]; + tmp[2] = 0; + carry += vli_add(result, result, tmp, ndigits); + + while (carry || vli_cmp(curve_prime, result, ndigits) != 1) + carry -= vli_sub(result, result, curve_prime, ndigits); +} + +/* Computes result = product % curve_prime + * from http://www.nsa.gov/ia/_files/nist-routines.pdf + */ +static void vli_mmod_fast_256(u64 *result, const u64 *product, + const u64 *curve_prime, u64 *tmp) +{ + int carry; + const unsigned int ndigits = 4; + + /* t */ + vli_set(result, product, ndigits); + + /* s1 */ + tmp[0] = 0; + tmp[1] = product[5] & 0xffffffff00000000ull; + tmp[2] = product[6]; + tmp[3] = product[7]; + carry = vli_lshift(tmp, tmp, 1, ndigits); + carry += vli_add(result, result, tmp, ndigits); + + /* s2 */ + tmp[1] = product[6] << 32; + tmp[2] = (product[6] >> 32) | (product[7] << 32); + tmp[3] = product[7] >> 32; + carry += vli_lshift(tmp, tmp, 1, ndigits); + carry += vli_add(result, result, tmp, ndigits); + + /* s3 */ + tmp[0] = product[4]; + tmp[1] = product[5] & 0xffffffff; + tmp[2] = 0; + tmp[3] = product[7]; + carry += vli_add(result, result, tmp, ndigits); + + /* s4 */ + tmp[0] = (product[4] >> 32) | (product[5] << 32); + tmp[1] = (product[5] >> 32) | (product[6] & 0xffffffff00000000ull); + tmp[2] = product[7]; + tmp[3] = (product[6] >> 32) | (product[4] << 32); + carry += vli_add(result, result, tmp, ndigits); + + /* d1 */ + tmp[0] = (product[5] >> 32) | (product[6] << 32); + tmp[1] = (product[6] >> 32); + tmp[2] = 0; + tmp[3] = (product[4] & 0xffffffff) | (product[5] << 32); + carry -= vli_sub(result, result, tmp, ndigits); + + /* d2 */ + tmp[0] = product[6]; + tmp[1] = product[7]; + tmp[2] = 0; + tmp[3] = (product[4] >> 32) | (product[5] & 0xffffffff00000000ull); + carry -= vli_sub(result, result, tmp, ndigits); + + /* d3 */ + tmp[0] = (product[6] >> 32) | (product[7] << 32); + tmp[1] = (product[7] >> 32) | (product[4] << 32); + tmp[2] = (product[4] >> 32) | (product[5] << 32); + tmp[3] = (product[6] << 32); + carry -= vli_sub(result, result, tmp, ndigits); + + /* d4 */ + tmp[0] = product[7]; + tmp[1] = product[4] & 0xffffffff00000000ull; + tmp[2] = product[5]; + tmp[3] = product[6] & 0xffffffff00000000ull; + carry -= vli_sub(result, result, tmp, ndigits); + + if (carry < 0) { + do { + carry += vli_add(result, result, curve_prime, ndigits); + } while (carry < 0); + } else { + while (carry || vli_cmp(curve_prime, result, ndigits) != 1) + carry -= vli_sub(result, result, curve_prime, ndigits); + } +} + +/* Computes result = product % curve_prime + * from http://www.nsa.gov/ia/_files/nist-routines.pdf +*/ +static bool vli_mmod_fast(u64 *result, u64 *product, + const u64 *curve_prime, unsigned int ndigits) +{ + u64 tmp[2 * ndigits]; + + switch (ndigits) { + case 3: + vli_mmod_fast_192(result, product, curve_prime, tmp); + break; + case 4: + vli_mmod_fast_256(result, product, curve_prime, tmp); + break; + default: + pr_err("unsupports digits size!\n"); + return false; + } + + return true; +} + +/* Computes result = (left * right) % curve_prime. */ +static void vli_mod_mult_fast(u64 *result, const u64 *left, const u64 *right, + const u64 *curve_prime, unsigned int ndigits) +{ + u64 product[2 * ndigits]; + + vli_mult(product, left, right, ndigits); + vli_mmod_fast(result, product, curve_prime, ndigits); +} + +/* Computes result = left^2 % curve_prime. */ +static void vli_mod_square_fast(u64 *result, const u64 *left, + const u64 *curve_prime, unsigned int ndigits) +{ + u64 product[2 * ndigits]; + + vli_square(product, left, ndigits); + vli_mmod_fast(result, product, curve_prime, ndigits); +} + +#define EVEN(vli) (!(vli[0] & 1)) +/* Computes result = (1 / p_input) % mod. All VLIs are the same size. + * See "From Euclid's GCD to Montgomery Multiplication to the Great Divide" + * https://labs.oracle.com/techrep/2001/smli_tr-2001-95.pdf + */ +static void vli_mod_inv(u64 *result, const u64 *input, const u64 *mod, + unsigned int ndigits) +{ + u64 a[ndigits], b[ndigits]; + u64 u[ndigits], v[ndigits]; + u64 carry; + int cmp_result; + + if (vli_is_zero(input, ndigits)) { + vli_clear(result, ndigits); + return; + } + + vli_set(a, input, ndigits); + vli_set(b, mod, ndigits); + vli_clear(u, ndigits); + u[0] = 1; + vli_clear(v, ndigits); + + while ((cmp_result = vli_cmp(a, b, ndigits)) != 0) { + carry = 0; + + if (EVEN(a)) { + vli_rshift1(a, ndigits); + + if (!EVEN(u)) + carry = vli_add(u, u, mod, ndigits); + + vli_rshift1(u, ndigits); + if (carry) + u[ndigits - 1] |= 0x8000000000000000ull; + } else if (EVEN(b)) { + vli_rshift1(b, ndigits); + + if (!EVEN(v)) + carry = vli_add(v, v, mod, ndigits); + + vli_rshift1(v, ndigits); + if (carry) + v[ndigits - 1] |= 0x8000000000000000ull; + } else if (cmp_result > 0) { + vli_sub(a, a, b, ndigits); + vli_rshift1(a, ndigits); + + if (vli_cmp(u, v, ndigits) < 0) + vli_add(u, u, mod, ndigits); + + vli_sub(u, u, v, ndigits); + if (!EVEN(u)) + carry = vli_add(u, u, mod, ndigits); + + vli_rshift1(u, ndigits); + if (carry) + u[ndigits - 1] |= 0x8000000000000000ull; + } else { + vli_sub(b, b, a, ndigits); + vli_rshift1(b, ndigits); + + if (vli_cmp(v, u, ndigits) < 0) + vli_add(v, v, mod, ndigits); + + vli_sub(v, v, u, ndigits); + if (!EVEN(v)) + carry = vli_add(v, v, mod, ndigits); + + vli_rshift1(v, ndigits); + if (carry) + v[ndigits - 1] |= 0x8000000000000000ull; + } + } + + vli_set(result, u, ndigits); +} + +/* ------ Point operations ------ */ + +/* Returns true if p_point is the point at infinity, false otherwise. */ +static bool ecc_point_is_zero(const struct ecc_point *point) +{ + return (vli_is_zero(point->x, point->ndigits) && + vli_is_zero(point->y, point->ndigits)); +} + +/* Point multiplication algorithm using Montgomery's ladder with co-Z + * coordinates. From http://eprint.iacr.org/2011/338.pdf + */ + +/* Double in place */ +static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1, + u64 *curve_prime, unsigned int ndigits) +{ + /* t1 = x, t2 = y, t3 = z */ + u64 t4[ndigits]; + u64 t5[ndigits]; + + if (vli_is_zero(z1, ndigits)) + return; + + /* t4 = y1^2 */ + vli_mod_square_fast(t4, y1, curve_prime, ndigits); + /* t5 = x1*y1^2 = A */ + vli_mod_mult_fast(t5, x1, t4, curve_prime, ndigits); + /* t4 = y1^4 */ + vli_mod_square_fast(t4, t4, curve_prime, ndigits); + /* t2 = y1*z1 = z3 */ + vli_mod_mult_fast(y1, y1, z1, curve_prime, ndigits); + /* t3 = z1^2 */ + vli_mod_square_fast(z1, z1, curve_prime, ndigits); + + /* t1 = x1 + z1^2 */ + vli_mod_add(x1, x1, z1, curve_prime, ndigits); + /* t3 = 2*z1^2 */ + vli_mod_add(z1, z1, z1, curve_prime, ndigits); + /* t3 = x1 - z1^2 */ + vli_mod_sub(z1, x1, z1, curve_prime, ndigits); + /* t1 = x1^2 - z1^4 */ + vli_mod_mult_fast(x1, x1, z1, curve_prime, ndigits); + + /* t3 = 2*(x1^2 - z1^4) */ + vli_mod_add(z1, x1, x1, curve_prime, ndigits); + /* t1 = 3*(x1^2 - z1^4) */ + vli_mod_add(x1, x1, z1, curve_prime, ndigits); + if (vli_test_bit(x1, 0)) { + u64 carry = vli_add(x1, x1, curve_prime, ndigits); + + vli_rshift1(x1, ndigits); + x1[ndigits - 1] |= carry << 63; + } else { + vli_rshift1(x1, ndigits); + } + /* t1 = 3/2*(x1^2 - z1^4) = B */ + + /* t3 = B^2 */ + vli_mod_square_fast(z1, x1, curve_prime, ndigits); + /* t3 = B^2 - A */ + vli_mod_sub(z1, z1, t5, curve_prime, ndigits); + /* t3 = B^2 - 2A = x3 */ + vli_mod_sub(z1, z1, t5, curve_prime, ndigits); + /* t5 = A - x3 */ + vli_mod_sub(t5, t5, z1, curve_prime, ndigits); + /* t1 = B * (A - x3) */ + vli_mod_mult_fast(x1, x1, t5, curve_prime, ndigits); + /* t4 = B * (A - x3) - y1^4 = y3 */ + vli_mod_sub(t4, x1, t4, curve_prime, ndigits); + + vli_set(x1, z1, ndigits); + vli_set(z1, y1, ndigits); + vli_set(y1, t4, ndigits); +} + +/* Modify (x1, y1) => (x1 * z^2, y1 * z^3) */ +static void apply_z(u64 *x1, u64 *y1, u64 *z, u64 *curve_prime, + unsigned int ndigits) +{ + u64 t1[ndigits]; + + vli_mod_square_fast(t1, z, curve_prime, ndigits); /* z^2 */ + vli_mod_mult_fast(x1, x1, t1, curve_prime, ndigits); /* x1 * z^2 */ + vli_mod_mult_fast(t1, t1, z, curve_prime, ndigits); /* z^3 */ + vli_mod_mult_fast(y1, y1, t1, curve_prime, ndigits); /* y1 * z^3 */ +} + +/* P = (x1, y1) => 2P, (x2, y2) => P' */ +static void xycz_initial_double(u64 *x1, u64 *y1, u64 *x2, u64 *y2, + u64 *p_initial_z, u64 *curve_prime, + unsigned int ndigits) +{ + u64 z[ndigits]; + + vli_set(x2, x1, ndigits); + vli_set(y2, y1, ndigits); + + vli_clear(z, ndigits); + z[0] = 1; + + if (p_initial_z) + vli_set(z, p_initial_z, ndigits); + + apply_z(x1, y1, z, curve_prime, ndigits); + + ecc_point_double_jacobian(x1, y1, z, curve_prime, ndigits); + + apply_z(x2, y2, z, curve_prime, ndigits); +} + +/* Input P = (x1, y1, Z), Q = (x2, y2, Z) + * Output P' = (x1', y1', Z3), P + Q = (x3, y3, Z3) + * or P => P', Q => P + Q + */ +static void xycz_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime, + unsigned int ndigits) +{ + /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */ + u64 t5[ndigits]; + + /* t5 = x2 - x1 */ + vli_mod_sub(t5, x2, x1, curve_prime, ndigits); + /* t5 = (x2 - x1)^2 = A */ + vli_mod_square_fast(t5, t5, curve_prime, ndigits); + /* t1 = x1*A = B */ + vli_mod_mult_fast(x1, x1, t5, curve_prime, ndigits); + /* t3 = x2*A = C */ + vli_mod_mult_fast(x2, x2, t5, curve_prime, ndigits); + /* t4 = y2 - y1 */ + vli_mod_sub(y2, y2, y1, curve_prime, ndigits); + /* t5 = (y2 - y1)^2 = D */ + vli_mod_square_fast(t5, y2, curve_prime, ndigits); + + /* t5 = D - B */ + vli_mod_sub(t5, t5, x1, curve_prime, ndigits); + /* t5 = D - B - C = x3 */ + vli_mod_sub(t5, t5, x2, curve_prime, ndigits); + /* t3 = C - B */ + vli_mod_sub(x2, x2, x1, curve_prime, ndigits); + /* t2 = y1*(C - B) */ + vli_mod_mult_fast(y1, y1, x2, curve_prime, ndigits); + /* t3 = B - x3 */ + vli_mod_sub(x2, x1, t5, curve_prime, ndigits); + /* t4 = (y2 - y1)*(B - x3) */ + vli_mod_mult_fast(y2, y2, x2, curve_prime, ndigits); + /* t4 = y3 */ + vli_mod_sub(y2, y2, y1, curve_prime, ndigits); + + vli_set(x2, t5, ndigits); +} + +/* Input P = (x1, y1, Z), Q = (x2, y2, Z) + * Output P + Q = (x3, y3, Z3), P - Q = (x3', y3', Z3) + * or P => P - Q, Q => P + Q + */ +static void xycz_add_c(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime, + unsigned int ndigits) +{ + /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */ + u64 t5[ndigits]; + u64 t6[ndigits]; + u64 t7[ndigits]; + + /* t5 = x2 - x1 */ + vli_mod_sub(t5, x2, x1, curve_prime, ndigits); + /* t5 = (x2 - x1)^2 = A */ + vli_mod_square_fast(t5, t5, curve_prime, ndigits); + /* t1 = x1*A = B */ + vli_mod_mult_fast(x1, x1, t5, curve_prime, ndigits); + /* t3 = x2*A = C */ + vli_mod_mult_fast(x2, x2, t5, curve_prime, ndigits); + /* t4 = y2 + y1 */ + vli_mod_add(t5, y2, y1, curve_prime, ndigits); + /* t4 = y2 - y1 */ + vli_mod_sub(y2, y2, y1, curve_prime, ndigits); + + /* t6 = C - B */ + vli_mod_sub(t6, x2, x1, curve_prime, ndigits); + /* t2 = y1 * (C - B) */ + vli_mod_mult_fast(y1, y1, t6, curve_prime, ndigits); + /* t6 = B + C */ + vli_mod_add(t6, x1, x2, curve_prime, ndigits); + /* t3 = (y2 - y1)^2 */ + vli_mod_square_fast(x2, y2, curve_prime, ndigits); + /* t3 = x3 */ + vli_mod_sub(x2, x2, t6, curve_prime, ndigits); + + /* t7 = B - x3 */ + vli_mod_sub(t7, x1, x2, curve_prime, ndigits); + /* t4 = (y2 - y1)*(B - x3) */ + vli_mod_mult_fast(y2, y2, t7, curve_prime, ndigits); + /* t4 = y3 */ + vli_mod_sub(y2, y2, y1, curve_prime, ndigits); + + /* t7 = (y2 + y1)^2 = F */ + vli_mod_square_fast(t7, t5, curve_prime, ndigits); + /* t7 = x3' */ + vli_mod_sub(t7, t7, t6, curve_prime, ndigits); + /* t6 = x3' - B */ + vli_mod_sub(t6, t7, x1, curve_prime, ndigits); + /* t6 = (y2 + y1)*(x3' - B) */ + vli_mod_mult_fast(t6, t6, t5, curve_prime, ndigits); + /* t2 = y3' */ + vli_mod_sub(y1, t6, y1, curve_prime, ndigits); + + vli_set(x1, t7, ndigits); +} + +static void ecc_point_mult(struct ecc_point *result, + const struct ecc_point *point, const u64 *scalar, + u64 *initial_z, u64 *curve_prime, + unsigned int ndigits) +{ + /* R0 and R1 */ + u64 rx[2][ndigits]; + u64 ry[2][ndigits]; + u64 z[ndigits]; + int i, nb; + int num_bits = vli_num_bits(scalar, ndigits); + + vli_set(rx[1], point->x, ndigits); + vli_set(ry[1], point->y, ndigits); + + xycz_initial_double(rx[1], ry[1], rx[0], ry[0], initial_z, curve_prime, + ndigits); + + for (i = num_bits - 2; i > 0; i--) { + nb = !vli_test_bit(scalar, i); + xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb], curve_prime, + ndigits); + xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb], curve_prime, + ndigits); + } + + nb = !vli_test_bit(scalar, 0); + xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb], curve_prime, + ndigits); + + /* Find final 1/Z value. */ + /* X1 - X0 */ + vli_mod_sub(z, rx[1], rx[0], curve_prime, ndigits); + /* Yb * (X1 - X0) */ + vli_mod_mult_fast(z, z, ry[1 - nb], curve_prime, ndigits); + /* xP * Yb * (X1 - X0) */ + vli_mod_mult_fast(z, z, point->x, curve_prime, ndigits); + + /* 1 / (xP * Yb * (X1 - X0)) */ + vli_mod_inv(z, z, curve_prime, point->ndigits); + + /* yP / (xP * Yb * (X1 - X0)) */ + vli_mod_mult_fast(z, z, point->y, curve_prime, ndigits); + /* Xb * yP / (xP * Yb * (X1 - X0)) */ + vli_mod_mult_fast(z, z, rx[1 - nb], curve_prime, ndigits); + /* End 1/Z calculation */ + + xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb], curve_prime, ndigits); + + apply_z(rx[0], ry[0], z, curve_prime, ndigits); + + vli_set(result->x, rx[0], ndigits); + vli_set(result->y, ry[0], ndigits); +} + +static inline void ecc_swap_digits(const u64 *in, u64 *out, + unsigned int ndigits) +{ + int i; + + for (i = 0; i < ndigits; i++) + out[i] = __swab64(in[ndigits - 1 - i]); +} + +int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits, + const u8 *private_key, unsigned int private_key_len) +{ + int nbytes; + const struct ecc_curve *curve = ecc_get_curve(curve_id); + + if (!private_key) + return -EINVAL; + + nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT; + + if (private_key_len != nbytes) + return -EINVAL; + + if (vli_is_zero((const u64 *)&private_key[0], ndigits)) + return -EINVAL; + + /* Make sure the private key is in the range [1, n-1]. */ + if (vli_cmp(curve->n, (const u64 *)&private_key[0], ndigits) != 1) + return -EINVAL; + + return 0; +} + +int ecdh_make_pub_key(unsigned int curve_id, unsigned int ndigits, + const u8 *private_key, unsigned int private_key_len, + u8 *public_key, unsigned int public_key_len) +{ + int ret = 0; + struct ecc_point *pk; + u64 priv[ndigits]; + unsigned int nbytes; + const struct ecc_curve *curve = ecc_get_curve(curve_id); + + if (!private_key || !curve) { + ret = -EINVAL; + goto out; + } + + ecc_swap_digits((const u64 *)private_key, priv, ndigits); + + pk = ecc_alloc_point(ndigits); + if (!pk) { + ret = -ENOMEM; + goto out; + } + + ecc_point_mult(pk, &curve->g, priv, NULL, curve->p, ndigits); + if (ecc_point_is_zero(pk)) { + ret = -EAGAIN; + goto err_free_point; + } + + nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT; + ecc_swap_digits(pk->x, (u64 *)public_key, ndigits); + ecc_swap_digits(pk->y, (u64 *)&public_key[nbytes], ndigits); + +err_free_point: + ecc_free_point(pk); +out: + return ret; +} + +int ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits, + const u8 *private_key, unsigned int private_key_len, + const u8 *public_key, unsigned int public_key_len, + u8 *secret, unsigned int secret_len) +{ + int ret = 0; + struct ecc_point *product, *pk; + u64 priv[ndigits]; + u64 rand_z[ndigits]; + unsigned int nbytes; + const struct ecc_curve *curve = ecc_get_curve(curve_id); + + if (!private_key || !public_key || !curve) { + ret = -EINVAL; + goto out; + } + + nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT; + + get_random_bytes(rand_z, nbytes); + + pk = ecc_alloc_point(ndigits); + if (!pk) { + ret = -ENOMEM; + goto out; + } + + product = ecc_alloc_point(ndigits); + if (!product) { + ret = -ENOMEM; + goto err_alloc_product; + } + + ecc_swap_digits((const u64 *)public_key, pk->x, ndigits); + ecc_swap_digits((const u64 *)&public_key[nbytes], pk->y, ndigits); + ecc_swap_digits((const u64 *)private_key, priv, ndigits); + + ecc_point_mult(product, pk, priv, rand_z, curve->p, ndigits); + + ecc_swap_digits(product->x, (u64 *)secret, ndigits); + + if (ecc_point_is_zero(product)) + ret = -EFAULT; + + ecc_free_point(product); +err_alloc_product: + ecc_free_point(pk); +out: + return ret; +} diff --git a/crypto/ecc.h b/crypto/ecc.h new file mode 100644 index 0000000000000..b5db4b989f3c5 --- /dev/null +++ b/crypto/ecc.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2013, Kenneth MacKay + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _CRYPTO_ECC_H +#define _CRYPTO_ECC_H + +#define ECC_MAX_DIGITS 4 /* 256 */ + +#define ECC_DIGITS_TO_BYTES_SHIFT 3 + +/** + * ecc_is_key_valid() - Validate a given ECDH private key + * + * @curve_id: id representing the curve to use + * @ndigits: curve number of digits + * @private_key: private key to be used for the given curve + * @private_key_len: private key len + * + * Returns 0 if the key is acceptable, a negative value otherwise + */ +int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits, + const u8 *private_key, unsigned int private_key_len); + +/** + * ecdh_make_pub_key() - Compute an ECC public key + * + * @curve_id: id representing the curve to use + * @private_key: pregenerated private key for the given curve + * @private_key_len: length of private_key + * @public_key: buffer for storing the public key generated + * @public_key_len: length of the public_key buffer + * + * Returns 0 if the public key was generated successfully, a negative value + * if an error occurred. + */ +int ecdh_make_pub_key(const unsigned int curve_id, unsigned int ndigits, + const u8 *private_key, unsigned int private_key_len, + u8 *public_key, unsigned int public_key_len); + +/** + * ecdh_shared_secret() - Compute a shared secret + * + * @curve_id: id representing the curve to use + * @private_key: private key of part A + * @private_key_len: length of private_key + * @public_key: public key of counterpart B + * @public_key_len: length of public_key + * @secret: buffer for storing the calculated shared secret + * @secret_len: length of the secret buffer + * + * Note: It is recommended that you hash the result of ecdh_shared_secret + * before using it for symmetric encryption or HMAC. + * + * Returns 0 if the shared secret was generated successfully, a negative value + * if an error occurred. + */ +int ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits, + const u8 *private_key, unsigned int private_key_len, + const u8 *public_key, unsigned int public_key_len, + u8 *secret, unsigned int secret_len); +#endif diff --git a/crypto/ecc_curve_defs.h b/crypto/ecc_curve_defs.h new file mode 100644 index 0000000000000..03ae5f7140284 --- /dev/null +++ b/crypto/ecc_curve_defs.h @@ -0,0 +1,57 @@ +#ifndef _CRYTO_ECC_CURVE_DEFS_H +#define _CRYTO_ECC_CURVE_DEFS_H + +struct ecc_point { + u64 *x; + u64 *y; + u8 ndigits; +}; + +struct ecc_curve { + char *name; + struct ecc_point g; + u64 *p; + u64 *n; +}; + +/* NIST P-192 */ +static u64 nist_p192_g_x[] = { 0xF4FF0AFD82FF1012ull, 0x7CBF20EB43A18800ull, + 0x188DA80EB03090F6ull }; +static u64 nist_p192_g_y[] = { 0x73F977A11E794811ull, 0x631011ED6B24CDD5ull, + 0x07192B95FFC8DA78ull }; +static u64 nist_p192_p[] = { 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFEull, + 0xFFFFFFFFFFFFFFFFull }; +static u64 nist_p192_n[] = { 0x146BC9B1B4D22831ull, 0xFFFFFFFF99DEF836ull, + 0xFFFFFFFFFFFFFFFFull }; +static struct ecc_curve nist_p192 = { + .name = "nist_192", + .g = { + .x = nist_p192_g_x, + .y = nist_p192_g_y, + .ndigits = 3, + }, + .p = nist_p192_p, + .n = nist_p192_n +}; + +/* NIST P-256 */ +static u64 nist_p256_g_x[] = { 0xF4A13945D898C296ull, 0x77037D812DEB33A0ull, + 0xF8BCE6E563A440F2ull, 0x6B17D1F2E12C4247ull }; +static u64 nist_p256_g_y[] = { 0xCBB6406837BF51F5ull, 0x2BCE33576B315ECEull, + 0x8EE7EB4A7C0F9E16ull, 0x4FE342E2FE1A7F9Bull }; +static u64 nist_p256_p[] = { 0xFFFFFFFFFFFFFFFFull, 0x00000000FFFFFFFFull, + 0x0000000000000000ull, 0xFFFFFFFF00000001ull }; +static u64 nist_p256_n[] = { 0xF3B9CAC2FC632551ull, 0xBCE6FAADA7179E84ull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFF00000000ull }; +static struct ecc_curve nist_p256 = { + .name = "nist_256", + .g = { + .x = nist_p256_g_x, + .y = nist_p256_g_y, + .ndigits = 4, + }, + .p = nist_p256_p, + .n = nist_p256_n +}; + +#endif diff --git a/crypto/ecdh.c b/crypto/ecdh.c new file mode 100644 index 0000000000000..d3a9eeca4b326 --- /dev/null +++ b/crypto/ecdh.c @@ -0,0 +1,151 @@ +/* ECDH key-agreement protocol + * + * Copyright (c) 2016, Intel Corporation + * Authors: Salvator Benedetto + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include "ecc.h" + +struct ecdh_ctx { + unsigned int curve_id; + unsigned int ndigits; + u64 private_key[ECC_MAX_DIGITS]; + u64 public_key[2 * ECC_MAX_DIGITS]; + u64 shared_secret[ECC_MAX_DIGITS]; +}; + +static inline struct ecdh_ctx *ecdh_get_ctx(struct crypto_kpp *tfm) +{ + return kpp_tfm_ctx(tfm); +} + +static unsigned int ecdh_supported_curve(unsigned int curve_id) +{ + switch (curve_id) { + case ECC_CURVE_NIST_P192: return 3; + case ECC_CURVE_NIST_P256: return 4; + default: return 0; + } +} + +static int ecdh_set_secret(struct crypto_kpp *tfm, void *buf, unsigned int len) +{ + struct ecdh_ctx *ctx = ecdh_get_ctx(tfm); + struct ecdh params; + unsigned int ndigits; + + if (crypto_ecdh_decode_key(buf, len, ¶ms) < 0) + return -EINVAL; + + ndigits = ecdh_supported_curve(params.curve_id); + if (!ndigits) + return -EINVAL; + + ctx->curve_id = params.curve_id; + ctx->ndigits = ndigits; + + if (ecc_is_key_valid(ctx->curve_id, ctx->ndigits, + (const u8 *)params.key, params.key_size) < 0) + return -EINVAL; + + memcpy(ctx->private_key, params.key, params.key_size); + + return 0; +} + +static int ecdh_compute_value(struct kpp_request *req) +{ + int ret = 0; + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); + struct ecdh_ctx *ctx = ecdh_get_ctx(tfm); + size_t copied, nbytes; + void *buf; + + nbytes = ctx->ndigits << ECC_DIGITS_TO_BYTES_SHIFT; + + if (req->src) { + copied = sg_copy_to_buffer(req->src, 1, ctx->public_key, + 2 * nbytes); + if (copied != 2 * nbytes) + return -EINVAL; + + ret = ecdh_shared_secret(ctx->curve_id, ctx->ndigits, + (const u8 *)ctx->private_key, nbytes, + (const u8 *)ctx->public_key, 2 * nbytes, + (u8 *)ctx->shared_secret, nbytes); + + buf = ctx->shared_secret; + } else { + ret = ecdh_make_pub_key(ctx->curve_id, ctx->ndigits, + (const u8 *)ctx->private_key, nbytes, + (u8 *)ctx->public_key, + sizeof(ctx->public_key)); + buf = ctx->public_key; + /* Public part is a point thus it has both coordinates */ + nbytes *= 2; + } + + if (ret < 0) + return ret; + + copied = sg_copy_from_buffer(req->dst, 1, buf, nbytes); + if (copied != nbytes) + return -EINVAL; + + return ret; +} + +static int ecdh_max_size(struct crypto_kpp *tfm) +{ + struct ecdh_ctx *ctx = ecdh_get_ctx(tfm); + int nbytes = ctx->ndigits << ECC_DIGITS_TO_BYTES_SHIFT; + + /* Public key is made of two coordinates */ + return 2 * nbytes; +} + +static void no_exit_tfm(struct crypto_kpp *tfm) +{ + return; +} + +static struct kpp_alg ecdh = { + .set_secret = ecdh_set_secret, + .generate_public_key = ecdh_compute_value, + .compute_shared_secret = ecdh_compute_value, + .max_size = ecdh_max_size, + .exit = no_exit_tfm, + .base = { + .cra_name = "ecdh", + .cra_driver_name = "ecdh-generic", + .cra_priority = 100, + .cra_module = THIS_MODULE, + .cra_ctxsize = sizeof(struct ecdh_ctx), + }, +}; + +static int ecdh_init(void) +{ + return crypto_register_kpp(&ecdh); +} + +static void ecdh_exit(void) +{ + crypto_unregister_kpp(&ecdh); +} + +module_init(ecdh_init); +module_exit(ecdh_exit); +MODULE_ALIAS_CRYPTO("ecdh"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ECDH generic algorithm"); diff --git a/crypto/ecdh_helper.c b/crypto/ecdh_helper.c new file mode 100644 index 0000000000000..3cd8a2414e60e --- /dev/null +++ b/crypto/ecdh_helper.c @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016, Intel Corporation + * Authors: Salvatore Benedetto + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include + +#define ECDH_KPP_SECRET_MIN_SIZE (sizeof(struct kpp_secret) + 2 * sizeof(short)) + +static inline u8 *ecdh_pack_data(void *dst, const void *src, size_t sz) +{ + memcpy(dst, src, sz); + return dst + sz; +} + +static inline const u8 *ecdh_unpack_data(void *dst, const void *src, size_t sz) +{ + memcpy(dst, src, sz); + return src + sz; +} + +int crypto_ecdh_key_len(const struct ecdh *params) +{ + return ECDH_KPP_SECRET_MIN_SIZE + params->key_size; +} +EXPORT_SYMBOL_GPL(crypto_ecdh_key_len); + +int crypto_ecdh_encode_key(char *buf, unsigned int len, + const struct ecdh *params) +{ + u8 *ptr = buf; + struct kpp_secret secret = { + .type = CRYPTO_KPP_SECRET_TYPE_ECDH, + .len = len + }; + + if (unlikely(!buf)) + return -EINVAL; + + if (len != crypto_ecdh_key_len(params)) + return -EINVAL; + + ptr = ecdh_pack_data(ptr, &secret, sizeof(secret)); + ptr = ecdh_pack_data(ptr, ¶ms->curve_id, sizeof(params->curve_id)); + ptr = ecdh_pack_data(ptr, ¶ms->key_size, sizeof(params->key_size)); + ecdh_pack_data(ptr, params->key, params->key_size); + + return 0; +} +EXPORT_SYMBOL_GPL(crypto_ecdh_encode_key); + +int crypto_ecdh_decode_key(const char *buf, unsigned int len, + struct ecdh *params) +{ + const u8 *ptr = buf; + struct kpp_secret secret; + + if (unlikely(!buf || len < ECDH_KPP_SECRET_MIN_SIZE)) + return -EINVAL; + + ptr = ecdh_unpack_data(&secret, ptr, sizeof(secret)); + if (secret.type != CRYPTO_KPP_SECRET_TYPE_ECDH) + return -EINVAL; + + ptr = ecdh_unpack_data(¶ms->curve_id, ptr, sizeof(params->curve_id)); + ptr = ecdh_unpack_data(¶ms->key_size, ptr, sizeof(params->key_size)); + if (secret.len != crypto_ecdh_key_len(params)) + return -EINVAL; + + /* Don't allocate memory. Set pointer to data + * within the given buffer + */ + params->key = (void *)ptr; + + return 0; +} +EXPORT_SYMBOL_GPL(crypto_ecdh_decode_key); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index ff79eb887fd0b..537fdc380a7b8 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -3300,6 +3300,16 @@ static const struct alg_test_desc alg_test_descs[] = { } } } + }, { + .alg = "ecdh", + .test = alg_test_kpp, + .fips_allowed = 1, + .suite = { + .kpp = { + .vecs = ecdh_tv_template, + .count = ECDH_TEST_VECTORS + } + } }, { .alg = "gcm(aes)", .test = alg_test_aead, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 78e874eca031d..7358931b3082c 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -560,6 +560,99 @@ struct kpp_testvec dh_tv_template[] = { } }; +#ifdef CONFIG_CRYPTO_FIPS +#define ECDH_TEST_VECTORS 1 +#else +#define ECDH_TEST_VECTORS 2 +#endif +struct kpp_testvec ecdh_tv_template[] = { + { +#ifndef CONFIG_CRYPTO_FIPS + .secret = +#ifdef __LITTLE_ENDIAN + "\x02\x00" /* type */ + "\x20\x00" /* len */ + "\x01\x00" /* curve_id */ + "\x18\x00" /* key_size */ +#else + "\x00\x02" /* type */ + "\x00\x20" /* len */ + "\x00\x01" /* curve_id */ + "\x00\x18" /* key_size */ +#endif + "\xb5\x05\xb1\x71\x1e\xbf\x8c\xda" + "\x4e\x19\x1e\x62\x1f\x23\x23\x31" + "\x36\x1e\xd3\x84\x2f\xcc\x21\x72", + .b_public = + "\xc3\xba\x67\x4b\x71\xec\xd0\x76" + "\x7a\x99\x75\x64\x36\x13\x9a\x94" + "\x5d\x8b\xdc\x60\x90\x91\xfd\x3f" + "\xb0\x1f\x8a\x0a\x68\xc6\x88\x6e" + "\x83\x87\xdd\x67\x09\xf8\x8d\x96" + "\x07\xd6\xbd\x1c\xe6\x8d\x9d\x67", + .expected_a_public = + "\x1a\x04\xdb\xa5\xe1\xdd\x4e\x79" + "\xa3\xe6\xef\x0e\x5c\x80\x49\x85" + "\xfa\x78\xb4\xef\x49\xbd\x4c\x7c" + "\x22\x90\x21\x02\xf9\x1b\x81\x5d" + "\x0c\x8a\xa8\x98\xd6\x27\x69\x88" + "\x5e\xbc\x94\xd8\x15\x9e\x21\xce", + .expected_ss = + "\xf4\x57\xcc\x4f\x1f\x4e\x31\xcc" + "\xe3\x40\x60\xc8\x06\x93\xc6\x2e" + "\x99\x80\x81\x28\xaf\xc5\x51\x74", + .secret_size = 32, + .b_public_size = 48, + .expected_a_public_size = 48, + .expected_ss_size = 24 + }, { +#endif + .secret = +#ifdef __LITTLE_ENDIAN + "\x02\x00" /* type */ + "\x28\x00" /* len */ + "\x02\x00" /* curve_id */ + "\x20\x00" /* key_size */ +#else + "\x00\x02" /* type */ + "\x00\x28" /* len */ + "\x00\x02" /* curve_id */ + "\x00\x20" /* key_size */ +#endif + "\x24\xd1\x21\xeb\xe5\xcf\x2d\x83" + "\xf6\x62\x1b\x6e\x43\x84\x3a\xa3" + "\x8b\xe0\x86\xc3\x20\x19\xda\x92" + "\x50\x53\x03\xe1\xc0\xea\xb8\x82", + .expected_a_public = + "\x1a\x7f\xeb\x52\x00\xbd\x3c\x31" + "\x7d\xb6\x70\xc1\x86\xa6\xc7\xc4" + "\x3b\xc5\x5f\x6c\x6f\x58\x3c\xf5" + "\xb6\x63\x82\x77\x33\x24\xa1\x5f" + "\x6a\xca\x43\x6f\xf7\x7e\xff\x02" + "\x37\x08\xcc\x40\x5e\x7a\xfd\x6a" + "\x6a\x02\x6e\x41\x87\x68\x38\x77" + "\xfa\xa9\x44\x43\x2d\xef\x09\xdf", + .expected_ss = + "\xea\x17\x6f\x7e\x6e\x57\x26\x38" + "\x8b\xfb\x41\xeb\xba\xc8\x6d\xa5" + "\xa8\x72\xd1\xff\xc9\x47\x3d\xaa" + "\x58\x43\x9f\x34\x0f\x8c\xf3\xc9", + .b_public = + "\xcc\xb4\xda\x74\xb1\x47\x3f\xea" + "\x6c\x70\x9e\x38\x2d\xc7\xaa\xb7" + "\x29\xb2\x47\x03\x19\xab\xdd\x34" + "\xbd\xa8\x2c\x93\xe1\xa4\x74\xd9" + "\x64\x63\xf7\x70\x20\x2f\xa4\xe6" + "\x9f\x4a\x38\xcc\xc0\x2c\x49\x2f" + "\xb1\x32\xbb\xaf\x22\x61\xda\xcb" + "\x6f\xdb\xa9\xaa\xfc\x77\x81\xf3", + .secret_size = 40, + .b_public_size = 64, + .expected_a_public_size = 64, + .expected_ss_size = 32 + } +}; + /* * MD4 test vectors from RFC1320 */ diff --git a/include/crypto/ecdh.h b/include/crypto/ecdh.h new file mode 100644 index 0000000000000..84bad548d1944 --- /dev/null +++ b/include/crypto/ecdh.h @@ -0,0 +1,30 @@ +/* + * ECDH params to be used with kpp API + * + * Copyright (c) 2016, Intel Corporation + * Authors: Salvatore Benedetto + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#ifndef _CRYPTO_ECDH_ +#define _CRYPTO_ECDH_ + +/* Curves IDs */ +#define ECC_CURVE_NIST_P192 0x0001 +#define ECC_CURVE_NIST_P256 0x0002 + +struct ecdh { + unsigned short curve_id; + char *key; + unsigned short key_size; +}; + +int crypto_ecdh_key_len(const struct ecdh *params); +int crypto_ecdh_encode_key(char *buf, unsigned int len, const struct ecdh *p); +int crypto_ecdh_decode_key(const char *buf, unsigned int len, struct ecdh *p); + +#endif diff --git a/include/crypto/kpp.h b/include/crypto/kpp.h index 937ac122354a3..30791f75c180d 100644 --- a/include/crypto/kpp.h +++ b/include/crypto/kpp.h @@ -243,6 +243,7 @@ static inline void kpp_request_set_output(struct kpp_request *req, enum { CRYPTO_KPP_SECRET_TYPE_UNKNOWN, CRYPTO_KPP_SECRET_TYPE_DH, + CRYPTO_KPP_SECRET_TYPE_ECDH, }; /** From e93f767bec1c4b58ac24bd59143c0030b9555426 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Wed, 22 Jun 2016 16:23:34 +0300 Subject: [PATCH 080/180] crypto: omap-sham - use runtime_pm autosuspend for clock handling Calling runtime PM API for every block causes serious performance hit to crypto operations that are done on a long buffer. As crypto is performed on a page boundary, encrypting large buffers can cause a series of crypto operations divided by page. The runtime PM API is also called those many times. Convert the driver to use runtime_pm autosuspend instead, with a default timeout value of 1 second. This results in upto ~50% speedup. Signed-off-by: Tero Kristo Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 63464e86f2b1b..887bc32b79938 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -100,6 +100,8 @@ #define DEFAULT_TIMEOUT_INTERVAL HZ +#define DEFAULT_AUTOSUSPEND_DELAY 1000 + /* mostly device flags */ #define FLAGS_BUSY 0 #define FLAGS_FINAL 1 @@ -999,7 +1001,8 @@ static void omap_sham_finish_req(struct ahash_request *req, int err) dd->flags &= ~(BIT(FLAGS_BUSY) | BIT(FLAGS_FINAL) | BIT(FLAGS_CPU) | BIT(FLAGS_DMA_READY) | BIT(FLAGS_OUTPUT_READY)); - pm_runtime_put(dd->dev); + pm_runtime_mark_last_busy(dd->dev); + pm_runtime_put_autosuspend(dd->dev); if (req->base.complete) req->base.complete(&req->base, err); @@ -1946,6 +1949,9 @@ static int omap_sham_probe(struct platform_device *pdev) dd->flags |= dd->pdata->flags; + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, DEFAULT_AUTOSUSPEND_DELAY); + pm_runtime_enable(dev); pm_runtime_irq_safe(dev); From 65e7a549af295cb2034f17e99211b97e9d02cbee Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Wed, 22 Jun 2016 16:23:35 +0300 Subject: [PATCH 081/180] crypto: omap-sham - change queue size from 1 to 10 Change crypto queue size from 1 to 10 for omap SHA driver. This should allow clients to enqueue requests more effectively to avoid serializing whole crypto sequences, giving extra performance. Signed-off-by: Tero Kristo Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 887bc32b79938..8090db0de4668 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -175,7 +175,7 @@ struct omap_sham_ctx { struct omap_sham_hmac_ctx base[0]; }; -#define OMAP_SHAM_QUEUE_LENGTH 1 +#define OMAP_SHAM_QUEUE_LENGTH 10 struct omap_sham_algs_info { struct ahash_alg *algs_list; From b973eaab68db858cb42f5283b1b0ed6773d8fdd9 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Wed, 22 Jun 2016 16:23:36 +0300 Subject: [PATCH 082/180] crypto: omap - do not call dmaengine_terminate_all The extra call to dmaengine_terminate_all is not needed, as the DMA is not running at this point. This improves performance slightly. Signed-off-by: Lokesh Vutla Signed-off-by: Tero Kristo Signed-off-by: Herbert Xu --- drivers/crypto/omap-aes.c | 2 -- drivers/crypto/omap-sham.c | 1 - 2 files changed, 3 deletions(-) diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 4a0e6a545ba26..8178632de788f 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -528,8 +528,6 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd) omap_aes_dma_stop(dd); - dmaengine_terminate_all(dd->dma_lch_in); - dmaengine_terminate_all(dd->dma_lch_out); return 0; } diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 8090db0de4668..3321f003f4655 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -815,7 +815,6 @@ static int omap_sham_update_dma_stop(struct omap_sham_dev *dd) { struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); - dmaengine_terminate_all(dd->dma_lch); if (ctx->flags & BIT(FLAGS_SG)) { dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE); From 85e0687f8fac9032681b163a17f806b52205922e Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Wed, 22 Jun 2016 16:23:37 +0300 Subject: [PATCH 083/180] crypto: omap-sham - set sw fallback to 240 bytes Adds software fallback support for small crypto requests. In these cases, it is undesirable to use DMA, as setting it up itself is rather heavy operation. Gives about 40% extra performance in ipsec usecase. Signed-off-by: Bin Liu [t-kristo@ti.com: dropped the extra traces, updated some comments on the code] Signed-off-by: Tero Kristo Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 3321f003f4655..ae6f841f81ffa 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -1095,7 +1095,7 @@ static int omap_sham_update(struct ahash_request *req) ctx->offset = 0; if (ctx->flags & BIT(FLAGS_FINUP)) { - if ((ctx->digcnt + ctx->bufcnt + ctx->total) < 9) { + if ((ctx->digcnt + ctx->bufcnt + ctx->total) < 240) { /* * OMAP HW accel works only with buffers >= 9 * will switch to bypass in final() @@ -1151,9 +1151,13 @@ static int omap_sham_final(struct ahash_request *req) if (ctx->flags & BIT(FLAGS_ERROR)) return 0; /* uncompleted hash is not needed */ - /* OMAP HW accel works only with buffers >= 9 */ - /* HMAC is always >= 9 because ipad == block size */ - if ((ctx->digcnt + ctx->bufcnt) < 9) + /* + * OMAP HW accel works only with buffers >= 9. + * HMAC is always >= 9 because ipad == block size. + * If buffersize is less than 240, we use fallback SW encoding, + * as using DMA + HW in this case doesn't provide any benefit. + */ + if ((ctx->digcnt + ctx->bufcnt) < 240) return omap_sham_final_shash(req); else if (ctx->bufcnt) return omap_sham_enqueue(req, OP_FINAL); From d56d72c6a0612be14ccb455c92886d2cb102c2ab Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 22 Jun 2016 22:13:53 +0800 Subject: [PATCH 084/180] KEYS: Use skcipher for big keys This patch replaces use of the obsolete blkcipher with skcipher. Signed-off-by: Herbert Xu Acked-by: David Howells --- security/keys/big_key.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/security/keys/big_key.c b/security/keys/big_key.c index 9e443fccad4cf..c0b3030b56348 100644 --- a/security/keys/big_key.c +++ b/security/keys/big_key.c @@ -18,6 +18,7 @@ #include #include #include +#include /* * Layout of key payload words. @@ -74,7 +75,7 @@ static const char big_key_alg_name[] = "ecb(aes)"; * Crypto algorithms for big_key data encryption */ static struct crypto_rng *big_key_rng; -static struct crypto_blkcipher *big_key_blkcipher; +static struct crypto_skcipher *big_key_skcipher; /* * Generate random key to encrypt big_key data @@ -91,22 +92,26 @@ static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key) { int ret = -EINVAL; struct scatterlist sgio; - struct blkcipher_desc desc; + SKCIPHER_REQUEST_ON_STACK(req, big_key_skcipher); - if (crypto_blkcipher_setkey(big_key_blkcipher, key, ENC_KEY_SIZE)) { + if (crypto_skcipher_setkey(big_key_skcipher, key, ENC_KEY_SIZE)) { ret = -EAGAIN; goto error; } - desc.flags = 0; - desc.tfm = big_key_blkcipher; + skcipher_request_set_tfm(req, big_key_skcipher); + skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, + NULL, NULL); sg_init_one(&sgio, data, datalen); + skcipher_request_set_crypt(req, &sgio, &sgio, datalen, NULL); if (op == BIG_KEY_ENC) - ret = crypto_blkcipher_encrypt(&desc, &sgio, &sgio, datalen); + ret = crypto_skcipher_encrypt(req); else - ret = crypto_blkcipher_decrypt(&desc, &sgio, &sgio, datalen); + ret = crypto_skcipher_decrypt(req); + + skcipher_request_zero(req); error: return ret; @@ -140,7 +145,7 @@ int big_key_preparse(struct key_preparsed_payload *prep) * * File content is stored encrypted with randomly generated key. */ - size_t enclen = ALIGN(datalen, crypto_blkcipher_blocksize(big_key_blkcipher)); + size_t enclen = ALIGN(datalen, crypto_skcipher_blocksize(big_key_skcipher)); /* prepare aligned data to encrypt */ data = kmalloc(enclen, GFP_KERNEL); @@ -288,7 +293,7 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen) struct file *file; u8 *data; u8 *enckey = (u8 *)key->payload.data[big_key_data]; - size_t enclen = ALIGN(datalen, crypto_blkcipher_blocksize(big_key_blkcipher)); + size_t enclen = ALIGN(datalen, crypto_skcipher_blocksize(big_key_skcipher)); data = kmalloc(enclen, GFP_KERNEL); if (!data) @@ -359,9 +364,10 @@ static int __init big_key_crypto_init(void) goto error; /* init block cipher */ - big_key_blkcipher = crypto_alloc_blkcipher(big_key_alg_name, 0, 0); - if (IS_ERR(big_key_blkcipher)) { - big_key_blkcipher = NULL; + big_key_skcipher = crypto_alloc_skcipher(big_key_alg_name, + 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(big_key_skcipher)) { + big_key_skcipher = NULL; ret = -EFAULT; goto error; } From b578456c342ecd4266dac96c87ca803602ea9c48 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Wed, 22 Jun 2016 19:26:06 +0200 Subject: [PATCH 085/180] crypto: jitterentropy - use ktime_get_ns as fallback As part of the Y2038 development, __getnstimeofday is not supposed to be used any more. It is now replaced with ktime_get_ns. The Jitter RNG uses the time stamp to measure the execution time of a given code path and tries to detect variations in the execution time. Therefore, the only requirement the Jitter RNG has, is a sufficient high resolution to detect these variations. The change was tested on x86 to show an identical behavior as RDTSC. The used test code simply measures the execution time of the heart of the RNG: jent_get_nstime(&time); jent_memaccess(ec, min); jent_fold_time(NULL, time, &folded, min); jent_get_nstime(&time2); return ((time2 - time)); Signed-off-by: Stephan Mueller Acked-by: Arnd Bergmann Signed-off-by: Herbert Xu --- crypto/jitterentropy-kcapi.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/crypto/jitterentropy-kcapi.c b/crypto/jitterentropy-kcapi.c index 597cedd3531c8..c4938497eedbe 100644 --- a/crypto/jitterentropy-kcapi.c +++ b/crypto/jitterentropy-kcapi.c @@ -87,24 +87,28 @@ void jent_memcpy(void *dest, const void *src, unsigned int n) memcpy(dest, src, n); } +/* + * Obtain a high-resolution time stamp value. The time stamp is used to measure + * the execution time of a given code path and its variations. Hence, the time + * stamp must have a sufficiently high resolution. + * + * Note, if the function returns zero because a given architecture does not + * implement a high-resolution time stamp, the RNG code's runtime test + * will detect it and will not produce output. + */ void jent_get_nstime(__u64 *out) { - struct timespec ts; __u64 tmp = 0; tmp = random_get_entropy(); /* - * If random_get_entropy does not return a value (which is possible on, - * for example, MIPS), invoke __getnstimeofday + * If random_get_entropy does not return a value, i.e. it is not + * implemented for a given architecture, use a clock source. * hoping that there are timers we can work with. */ - if ((0 == tmp) && - (0 == __getnstimeofday(&ts))) { - tmp = ts.tv_sec; - tmp = tmp << 32; - tmp = tmp | ts.tv_nsec; - } + if (tmp == 0) + tmp = ktime_get_ns(); *out = tmp; } From 52140993d4fccde6004df9e282a81580d9d5e4da Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 22 Jun 2016 17:27:02 -0700 Subject: [PATCH 086/180] hwrng: bcm2835 - Add support for Broadcom BCM5301x The Broadcom BCM5301x SoCs (Northstar) utilize the same random number generator peripheral as Northstar Plus and BCM2835, but just like the NSP SoC, we need to enable the interrupt. Signed-off-by: Florian Fainelli Signed-off-by: Herbert Xu --- drivers/char/hw_random/Kconfig | 2 +- drivers/char/hw_random/bcm2835-rng.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index 51401100466ba..56ad5a5936a9a 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -90,7 +90,7 @@ config HW_RANDOM_BCM63XX config HW_RANDOM_BCM2835 tristate "Broadcom BCM2835 Random Number Generator support" - depends on ARCH_BCM2835 || ARCH_BCM_NSP + depends on ARCH_BCM2835 || ARCH_BCM_NSP || ARCH_BCM_5301X default HW_RANDOM ---help--- This driver provides kernel-side support for the Random Number diff --git a/drivers/char/hw_random/bcm2835-rng.c b/drivers/char/hw_random/bcm2835-rng.c index 75ca820730be8..af2149273fe04 100644 --- a/drivers/char/hw_random/bcm2835-rng.c +++ b/drivers/char/hw_random/bcm2835-rng.c @@ -70,6 +70,7 @@ static struct hwrng bcm2835_rng_ops = { static const struct of_device_id bcm2835_rng_of_match[] = { { .compatible = "brcm,bcm2835-rng"}, { .compatible = "brcm,bcm-nsp-rng", .data = nsp_rng_init}, + { .compatible = "brcm,bcm5301x-rng", .data = nsp_rng_init}, {}, }; From 8f44df154de79a61b0e86734f51737b8cccf8dfe Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 24 Jun 2016 16:20:22 +1000 Subject: [PATCH 087/180] crypto: ecdh - make ecdh_shared_secret unique There is another ecdh_shared_secret in net/bluetooth/ecc.c Fixes: 3c4b23901a0c ("crypto: ecdh - Add ECDH software support") Signed-off-by: Stephen Rothwell Signed-off-by: Herbert Xu --- crypto/ecc.c | 2 +- crypto/ecc.h | 6 +++--- crypto/ecdh.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/crypto/ecc.c b/crypto/ecc.c index 9aedec6bbe728..414c78a9c2143 100644 --- a/crypto/ecc.c +++ b/crypto/ecc.c @@ -966,7 +966,7 @@ int ecdh_make_pub_key(unsigned int curve_id, unsigned int ndigits, return ret; } -int ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits, +int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits, const u8 *private_key, unsigned int private_key_len, const u8 *public_key, unsigned int public_key_len, u8 *secret, unsigned int secret_len) diff --git a/crypto/ecc.h b/crypto/ecc.h index b5db4b989f3c5..663d598c7406c 100644 --- a/crypto/ecc.h +++ b/crypto/ecc.h @@ -60,7 +60,7 @@ int ecdh_make_pub_key(const unsigned int curve_id, unsigned int ndigits, u8 *public_key, unsigned int public_key_len); /** - * ecdh_shared_secret() - Compute a shared secret + * crypto_ecdh_shared_secret() - Compute a shared secret * * @curve_id: id representing the curve to use * @private_key: private key of part A @@ -70,13 +70,13 @@ int ecdh_make_pub_key(const unsigned int curve_id, unsigned int ndigits, * @secret: buffer for storing the calculated shared secret * @secret_len: length of the secret buffer * - * Note: It is recommended that you hash the result of ecdh_shared_secret + * Note: It is recommended that you hash the result of crypto_ecdh_shared_secret * before using it for symmetric encryption or HMAC. * * Returns 0 if the shared secret was generated successfully, a negative value * if an error occurred. */ -int ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits, +int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits, const u8 *private_key, unsigned int private_key_len, const u8 *public_key, unsigned int public_key_len, u8 *secret, unsigned int secret_len); diff --git a/crypto/ecdh.c b/crypto/ecdh.c index d3a9eeca4b326..3de289806d678 100644 --- a/crypto/ecdh.c +++ b/crypto/ecdh.c @@ -79,7 +79,7 @@ static int ecdh_compute_value(struct kpp_request *req) if (copied != 2 * nbytes) return -EINVAL; - ret = ecdh_shared_secret(ctx->curve_id, ctx->ndigits, + ret = crypto_ecdh_shared_secret(ctx->curve_id, ctx->ndigits, (const u8 *)ctx->private_key, nbytes, (const u8 *)ctx->public_key, 2 * nbytes, (u8 *)ctx->shared_secret, nbytes); From ab1778752921800468057d996f9a3bb9426870c3 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 22 Jun 2016 17:27:01 -0700 Subject: [PATCH 088/180] Documentation: devicetree: bindings: Add BCM5301x binding Document the binding used by the Broadcom BCM5301x (Northstar) SoC random number generator. Signed-off-by: Florian Fainelli Signed-off-by: Herbert Xu --- Documentation/devicetree/bindings/rng/brcm,bcm2835.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt b/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt index aa304d4120588..26542690b5789 100644 --- a/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt +++ b/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt @@ -2,7 +2,8 @@ BCM2835 Random number generator Required properties: -- compatible : should be "brcm,bcm2835-rng" or "brcm,bcm-nsp-rng" +- compatible : should be "brcm,bcm2835-rng" or "brcm,bcm-nsp-rng" or + "brcm,bcm5301x-rng" - reg : Specifies base physical address and size of the registers. Example: From f876f440df3973ab7f1d20e3d34d000fc9422a78 Mon Sep 17 00:00:00 2001 From: Megha Dey Date: Thu, 23 Jun 2016 18:40:42 -0700 Subject: [PATCH 089/180] crypto: sha256-mb - SHA256 multibuffer job manager and glue code This patch introduces the multi-buffer job manager which is responsible for submitting scatter-gather buffers from several SHA256 jobs to the multi-buffer algorithm. It also contains the flush routine to that's called by the crypto daemon to complete the job when no new jobs arrive before the deadline of maximum latency of a SHA256 crypto job. The SHA256 multi-buffer crypto algorithm is defined and initialized in this patch. Signed-off-by: Megha Dey Reviewed-by: Fenghua Yu Reviewed-by: Tim Chen Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 1 + arch/x86/crypto/sha256-mb/Makefile | 11 + arch/x86/crypto/sha256-mb/sha256_mb.c | 1027 +++++++++++++++++++++++++ 3 files changed, 1039 insertions(+) create mode 100644 arch/x86/crypto/sha256-mb/Makefile create mode 100644 arch/x86/crypto/sha256-mb/sha256_mb.c diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index b9b912a44d61b..aafff22b0ea61 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -50,6 +50,7 @@ ifeq ($(avx2_supported),yes) obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o obj-$(CONFIG_CRYPTO_SHA1_MB) += sha-mb/ + obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb/ endif aes-i586-y := aes-i586-asm_32.o aes_glue.o diff --git a/arch/x86/crypto/sha256-mb/Makefile b/arch/x86/crypto/sha256-mb/Makefile new file mode 100644 index 0000000000000..41089e7c400c3 --- /dev/null +++ b/arch/x86/crypto/sha256-mb/Makefile @@ -0,0 +1,11 @@ +# +# Arch-specific CryptoAPI modules. +# + +avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ + $(comma)4)$(comma)%ymm2,yes,no) +ifeq ($(avx2_supported),yes) + obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb.o + sha256-mb-y := sha256_mb.o sha256_mb_mgr_flush_avx2.o \ + sha256_mb_mgr_init_avx2.o sha256_mb_mgr_submit_avx2.o sha256_x8_avx2.o +endif diff --git a/arch/x86/crypto/sha256-mb/sha256_mb.c b/arch/x86/crypto/sha256-mb/sha256_mb.c new file mode 100644 index 0000000000000..c9d5dcc81c96a --- /dev/null +++ b/arch/x86/crypto/sha256-mb/sha256_mb.c @@ -0,0 +1,1027 @@ +/* + * Multi buffer SHA256 algorithm Glue Code + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Megha Dey + * + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sha256_mb_ctx.h" + +#define FLUSH_INTERVAL 1000 /* in usec */ + +static struct mcryptd_alg_state sha256_mb_alg_state; + +struct sha256_mb_ctx { + struct mcryptd_ahash *mcryptd_tfm; +}; + +static inline struct mcryptd_hash_request_ctx + *cast_hash_to_mcryptd_ctx(struct sha256_hash_ctx *hash_ctx) +{ + struct ahash_request *areq; + + areq = container_of((void *) hash_ctx, struct ahash_request, __ctx); + return container_of(areq, struct mcryptd_hash_request_ctx, areq); +} + +static inline struct ahash_request + *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx) +{ + return container_of((void *) ctx, struct ahash_request, __ctx); +} + +static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx, + struct ahash_request *areq) +{ + rctx->flag = HASH_UPDATE; +} + +static asmlinkage void (*sha256_job_mgr_init)(struct sha256_mb_mgr *state); +static asmlinkage struct job_sha256* (*sha256_job_mgr_submit) + (struct sha256_mb_mgr *state, struct job_sha256 *job); +static asmlinkage struct job_sha256* (*sha256_job_mgr_flush) + (struct sha256_mb_mgr *state); +static asmlinkage struct job_sha256* (*sha256_job_mgr_get_comp_job) + (struct sha256_mb_mgr *state); + +inline void sha256_init_digest(uint32_t *digest) +{ + static const uint32_t initial_digest[SHA256_DIGEST_LENGTH] = { + SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, + SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7}; + memcpy(digest, initial_digest, sizeof(initial_digest)); +} + +inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2], + uint32_t total_len) +{ + uint32_t i = total_len & (SHA256_BLOCK_SIZE - 1); + + memset(&padblock[i], 0, SHA256_BLOCK_SIZE); + padblock[i] = 0x80; + + i += ((SHA256_BLOCK_SIZE - 1) & + (0 - (total_len + SHA256_PADLENGTHFIELD_SIZE + 1))) + + 1 + SHA256_PADLENGTHFIELD_SIZE; + +#if SHA256_PADLENGTHFIELD_SIZE == 16 + *((uint64_t *) &padblock[i - 16]) = 0; +#endif + + *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3); + + /* Number of extra blocks to hash */ + return i >> SHA256_LOG2_BLOCK_SIZE; +} + +static struct sha256_hash_ctx + *sha256_ctx_mgr_resubmit(struct sha256_ctx_mgr *mgr, + struct sha256_hash_ctx *ctx) +{ + while (ctx) { + if (ctx->status & HASH_CTX_STS_COMPLETE) { + /* Clear PROCESSING bit */ + ctx->status = HASH_CTX_STS_COMPLETE; + return ctx; + } + + /* + * If the extra blocks are empty, begin hashing what remains + * in the user's buffer. + */ + if (ctx->partial_block_buffer_length == 0 && + ctx->incoming_buffer_length) { + + const void *buffer = ctx->incoming_buffer; + uint32_t len = ctx->incoming_buffer_length; + uint32_t copy_len; + + /* + * Only entire blocks can be hashed. + * Copy remainder to extra blocks buffer. + */ + copy_len = len & (SHA256_BLOCK_SIZE-1); + + if (copy_len) { + len -= copy_len; + memcpy(ctx->partial_block_buffer, + ((const char *) buffer + len), + copy_len); + ctx->partial_block_buffer_length = copy_len; + } + + ctx->incoming_buffer_length = 0; + + /* len should be a multiple of the block size now */ + assert((len % SHA256_BLOCK_SIZE) == 0); + + /* Set len to the number of blocks to be hashed */ + len >>= SHA256_LOG2_BLOCK_SIZE; + + if (len) { + + ctx->job.buffer = (uint8_t *) buffer; + ctx->job.len = len; + ctx = (struct sha256_hash_ctx *) + sha256_job_mgr_submit(&mgr->mgr, &ctx->job); + continue; + } + } + + /* + * If the extra blocks are not empty, then we are + * either on the last block(s) or we need more + * user input before continuing. + */ + if (ctx->status & HASH_CTX_STS_LAST) { + + uint8_t *buf = ctx->partial_block_buffer; + uint32_t n_extra_blocks = + sha256_pad(buf, ctx->total_length); + + ctx->status = (HASH_CTX_STS_PROCESSING | + HASH_CTX_STS_COMPLETE); + ctx->job.buffer = buf; + ctx->job.len = (uint32_t) n_extra_blocks; + ctx = (struct sha256_hash_ctx *) + sha256_job_mgr_submit(&mgr->mgr, &ctx->job); + continue; + } + + ctx->status = HASH_CTX_STS_IDLE; + return ctx; + } + + return NULL; +} + +static struct sha256_hash_ctx + *sha256_ctx_mgr_get_comp_ctx(struct sha256_ctx_mgr *mgr) +{ + /* + * If get_comp_job returns NULL, there are no jobs complete. + * If get_comp_job returns a job, verify that it is safe to return to + * the user. If it is not ready, resubmit the job to finish processing. + * If sha256_ctx_mgr_resubmit returned a job, it is ready to be + * returned. Otherwise, all jobs currently being managed by the + * hash_ctx_mgr still need processing. + */ + struct sha256_hash_ctx *ctx; + + ctx = (struct sha256_hash_ctx *) sha256_job_mgr_get_comp_job(&mgr->mgr); + return sha256_ctx_mgr_resubmit(mgr, ctx); +} + +static void sha256_ctx_mgr_init(struct sha256_ctx_mgr *mgr) +{ + sha256_job_mgr_init(&mgr->mgr); +} + +static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr, + struct sha256_hash_ctx *ctx, + const void *buffer, + uint32_t len, + int flags) +{ + if (flags & (~HASH_ENTIRE)) { + /* User should not pass anything other than FIRST, UPDATE + * or LAST + */ + ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; + return ctx; + } + + if (ctx->status & HASH_CTX_STS_PROCESSING) { + /* Cannot submit to a currently processing job. */ + ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING; + return ctx; + } + + if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) { + /* Cannot update a finished job. */ + ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; + return ctx; + } + + if (flags & HASH_FIRST) { + /* Init digest */ + sha256_init_digest(ctx->job.result_digest); + + /* Reset byte counter */ + ctx->total_length = 0; + + /* Clear extra blocks */ + ctx->partial_block_buffer_length = 0; + } + + /* If we made it here, there was no error during this call to submit */ + ctx->error = HASH_CTX_ERROR_NONE; + + /* Store buffer ptr info from user */ + ctx->incoming_buffer = buffer; + ctx->incoming_buffer_length = len; + + /* Store the user's request flags and mark this ctx as currently + * being processed. + */ + ctx->status = (flags & HASH_LAST) ? + (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) : + HASH_CTX_STS_PROCESSING; + + /* Advance byte counter */ + ctx->total_length += len; + + /* + * If there is anything currently buffered in the extra blocks, + * append to it until it contains a whole block. + * Or if the user's buffer contains less than a whole block, + * append as much as possible to the extra block. + */ + if ((ctx->partial_block_buffer_length) | (len < SHA256_BLOCK_SIZE)) { + /* Compute how many bytes to copy from user buffer into + * extra block + */ + uint32_t copy_len = SHA256_BLOCK_SIZE - + ctx->partial_block_buffer_length; + if (len < copy_len) + copy_len = len; + + if (copy_len) { + /* Copy and update relevant pointers and counters */ + memcpy( + &ctx->partial_block_buffer[ctx->partial_block_buffer_length], + buffer, copy_len); + + ctx->partial_block_buffer_length += copy_len; + ctx->incoming_buffer = (const void *) + ((const char *)buffer + copy_len); + ctx->incoming_buffer_length = len - copy_len; + } + + /* The extra block should never contain more than 1 block */ + assert(ctx->partial_block_buffer_length <= SHA256_BLOCK_SIZE); + + /* If the extra block buffer contains exactly 1 block, + * it can be hashed. + */ + if (ctx->partial_block_buffer_length >= SHA256_BLOCK_SIZE) { + ctx->partial_block_buffer_length = 0; + + ctx->job.buffer = ctx->partial_block_buffer; + ctx->job.len = 1; + ctx = (struct sha256_hash_ctx *) + sha256_job_mgr_submit(&mgr->mgr, &ctx->job); + } + } + + return sha256_ctx_mgr_resubmit(mgr, ctx); +} + +static struct sha256_hash_ctx *sha256_ctx_mgr_flush(struct sha256_ctx_mgr *mgr) +{ + struct sha256_hash_ctx *ctx; + + while (1) { + ctx = (struct sha256_hash_ctx *) + sha256_job_mgr_flush(&mgr->mgr); + + /* If flush returned 0, there are no more jobs in flight. */ + if (!ctx) + return NULL; + + /* + * If flush returned a job, resubmit the job to finish + * processing. + */ + ctx = sha256_ctx_mgr_resubmit(mgr, ctx); + + /* + * If sha256_ctx_mgr_resubmit returned a job, it is ready to + * be returned. Otherwise, all jobs currently being managed by + * the sha256_ctx_mgr still need processing. Loop. + */ + if (ctx) + return ctx; + } +} + +static int sha256_mb_init(struct ahash_request *areq) +{ + struct sha256_hash_ctx *sctx = ahash_request_ctx(areq); + + hash_ctx_init(sctx); + sctx->job.result_digest[0] = SHA256_H0; + sctx->job.result_digest[1] = SHA256_H1; + sctx->job.result_digest[2] = SHA256_H2; + sctx->job.result_digest[3] = SHA256_H3; + sctx->job.result_digest[4] = SHA256_H4; + sctx->job.result_digest[5] = SHA256_H5; + sctx->job.result_digest[6] = SHA256_H6; + sctx->job.result_digest[7] = SHA256_H7; + sctx->total_length = 0; + sctx->partial_block_buffer_length = 0; + sctx->status = HASH_CTX_STS_IDLE; + + return 0; +} + +static int sha256_mb_set_results(struct mcryptd_hash_request_ctx *rctx) +{ + int i; + struct sha256_hash_ctx *sctx = ahash_request_ctx(&rctx->areq); + __be32 *dst = (__be32 *) rctx->out; + + for (i = 0; i < 8; ++i) + dst[i] = cpu_to_be32(sctx->job.result_digest[i]); + + return 0; +} + +static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx, + struct mcryptd_alg_cstate *cstate, bool flush) +{ + int flag = HASH_UPDATE; + int nbytes, err = 0; + struct mcryptd_hash_request_ctx *rctx = *ret_rctx; + struct sha256_hash_ctx *sha_ctx; + + /* more work ? */ + while (!(rctx->flag & HASH_DONE)) { + nbytes = crypto_ahash_walk_done(&rctx->walk, 0); + if (nbytes < 0) { + err = nbytes; + goto out; + } + /* check if the walk is done */ + if (crypto_ahash_walk_last(&rctx->walk)) { + rctx->flag |= HASH_DONE; + if (rctx->flag & HASH_FINAL) + flag |= HASH_LAST; + + } + sha_ctx = (struct sha256_hash_ctx *) + ahash_request_ctx(&rctx->areq); + kernel_fpu_begin(); + sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, + rctx->walk.data, nbytes, flag); + if (!sha_ctx) { + if (flush) + sha_ctx = sha256_ctx_mgr_flush(cstate->mgr); + } + kernel_fpu_end(); + if (sha_ctx) + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + else { + rctx = NULL; + goto out; + } + } + + /* copy the results */ + if (rctx->flag & HASH_FINAL) + sha256_mb_set_results(rctx); + +out: + *ret_rctx = rctx; + return err; +} + +static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, + struct mcryptd_alg_cstate *cstate, + int err) +{ + struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); + struct sha256_hash_ctx *sha_ctx; + struct mcryptd_hash_request_ctx *req_ctx; + int ret; + + /* remove from work list */ + spin_lock(&cstate->work_lock); + list_del(&rctx->waiter); + spin_unlock(&cstate->work_lock); + + if (irqs_disabled()) + rctx->complete(&req->base, err); + else { + local_bh_disable(); + rctx->complete(&req->base, err); + local_bh_enable(); + } + + /* check to see if there are other jobs that are done */ + sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr); + while (sha_ctx) { + req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx); + ret = sha_finish_walk(&req_ctx, cstate, false); + if (req_ctx) { + spin_lock(&cstate->work_lock); + list_del(&req_ctx->waiter); + spin_unlock(&cstate->work_lock); + + req = cast_mcryptd_ctx_to_req(req_ctx); + if (irqs_disabled()) + rctx->complete(&req->base, ret); + else { + local_bh_disable(); + rctx->complete(&req->base, ret); + local_bh_enable(); + } + } + sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr); + } + + return 0; +} + +static void sha256_mb_add_list(struct mcryptd_hash_request_ctx *rctx, + struct mcryptd_alg_cstate *cstate) +{ + unsigned long next_flush; + unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL); + + /* initialize tag */ + rctx->tag.arrival = jiffies; /* tag the arrival time */ + rctx->tag.seq_num = cstate->next_seq_num++; + next_flush = rctx->tag.arrival + delay; + rctx->tag.expire = next_flush; + + spin_lock(&cstate->work_lock); + list_add_tail(&rctx->waiter, &cstate->work_list); + spin_unlock(&cstate->work_lock); + + mcryptd_arm_flusher(cstate, delay); +} + +static int sha256_mb_update(struct ahash_request *areq) +{ + struct mcryptd_hash_request_ctx *rctx = + container_of(areq, struct mcryptd_hash_request_ctx, areq); + struct mcryptd_alg_cstate *cstate = + this_cpu_ptr(sha256_mb_alg_state.alg_cstate); + + struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); + struct sha256_hash_ctx *sha_ctx; + int ret = 0, nbytes; + + /* sanity check */ + if (rctx->tag.cpu != smp_processor_id()) { + pr_err("mcryptd error: cpu clash\n"); + goto done; + } + + /* need to init context */ + req_ctx_init(rctx, areq); + + nbytes = crypto_ahash_walk_first(req, &rctx->walk); + + if (nbytes < 0) { + ret = nbytes; + goto done; + } + + if (crypto_ahash_walk_last(&rctx->walk)) + rctx->flag |= HASH_DONE; + + /* submit */ + sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq); + sha256_mb_add_list(rctx, cstate); + kernel_fpu_begin(); + sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, + nbytes, HASH_UPDATE); + kernel_fpu_end(); + + /* check if anything is returned */ + if (!sha_ctx) + return -EINPROGRESS; + + if (sha_ctx->error) { + ret = sha_ctx->error; + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + goto done; + } + + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + ret = sha_finish_walk(&rctx, cstate, false); + + if (!rctx) + return -EINPROGRESS; +done: + sha_complete_job(rctx, cstate, ret); + return ret; +} + +static int sha256_mb_finup(struct ahash_request *areq) +{ + struct mcryptd_hash_request_ctx *rctx = + container_of(areq, struct mcryptd_hash_request_ctx, areq); + struct mcryptd_alg_cstate *cstate = + this_cpu_ptr(sha256_mb_alg_state.alg_cstate); + + struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); + struct sha256_hash_ctx *sha_ctx; + int ret = 0, flag = HASH_UPDATE, nbytes; + + /* sanity check */ + if (rctx->tag.cpu != smp_processor_id()) { + pr_err("mcryptd error: cpu clash\n"); + goto done; + } + + /* need to init context */ + req_ctx_init(rctx, areq); + + nbytes = crypto_ahash_walk_first(req, &rctx->walk); + + if (nbytes < 0) { + ret = nbytes; + goto done; + } + + if (crypto_ahash_walk_last(&rctx->walk)) { + rctx->flag |= HASH_DONE; + flag = HASH_LAST; + } + + /* submit */ + rctx->flag |= HASH_FINAL; + sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq); + sha256_mb_add_list(rctx, cstate); + + kernel_fpu_begin(); + sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, + nbytes, flag); + kernel_fpu_end(); + + /* check if anything is returned */ + if (!sha_ctx) + return -EINPROGRESS; + + if (sha_ctx->error) { + ret = sha_ctx->error; + goto done; + } + + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + ret = sha_finish_walk(&rctx, cstate, false); + if (!rctx) + return -EINPROGRESS; +done: + sha_complete_job(rctx, cstate, ret); + return ret; +} + +static int sha256_mb_final(struct ahash_request *areq) +{ + struct mcryptd_hash_request_ctx *rctx = + container_of(areq, struct mcryptd_hash_request_ctx, + areq); + struct mcryptd_alg_cstate *cstate = + this_cpu_ptr(sha256_mb_alg_state.alg_cstate); + + struct sha256_hash_ctx *sha_ctx; + int ret = 0; + u8 data; + + /* sanity check */ + if (rctx->tag.cpu != smp_processor_id()) { + pr_err("mcryptd error: cpu clash\n"); + goto done; + } + + /* need to init context */ + req_ctx_init(rctx, areq); + + rctx->flag |= HASH_DONE | HASH_FINAL; + + sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq); + /* flag HASH_FINAL and 0 data size */ + sha256_mb_add_list(rctx, cstate); + kernel_fpu_begin(); + sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0, + HASH_LAST); + kernel_fpu_end(); + + /* check if anything is returned */ + if (!sha_ctx) + return -EINPROGRESS; + + if (sha_ctx->error) { + ret = sha_ctx->error; + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + goto done; + } + + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + ret = sha_finish_walk(&rctx, cstate, false); + if (!rctx) + return -EINPROGRESS; +done: + sha_complete_job(rctx, cstate, ret); + return ret; +} + +static int sha256_mb_export(struct ahash_request *areq, void *out) +{ + struct sha256_hash_ctx *sctx = ahash_request_ctx(areq); + + memcpy(out, sctx, sizeof(*sctx)); + + return 0; +} + +static int sha256_mb_import(struct ahash_request *areq, const void *in) +{ + struct sha256_hash_ctx *sctx = ahash_request_ctx(areq); + + memcpy(sctx, in, sizeof(*sctx)); + + return 0; +} + +static int sha256_mb_async_init_tfm(struct crypto_tfm *tfm) +{ + struct mcryptd_ahash *mcryptd_tfm; + struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm); + struct mcryptd_hash_ctx *mctx; + + mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha256-mb", + CRYPTO_ALG_INTERNAL, + CRYPTO_ALG_INTERNAL); + if (IS_ERR(mcryptd_tfm)) + return PTR_ERR(mcryptd_tfm); + mctx = crypto_ahash_ctx(&mcryptd_tfm->base); + mctx->alg_state = &sha256_mb_alg_state; + ctx->mcryptd_tfm = mcryptd_tfm; + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct ahash_request) + + crypto_ahash_reqsize(&mcryptd_tfm->base)); + + return 0; +} + +static void sha256_mb_async_exit_tfm(struct crypto_tfm *tfm) +{ + struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm); + + mcryptd_free_ahash(ctx->mcryptd_tfm); +} + +static int sha256_mb_areq_init_tfm(struct crypto_tfm *tfm) +{ + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct ahash_request) + + sizeof(struct sha256_hash_ctx)); + + return 0; +} + +static void sha256_mb_areq_exit_tfm(struct crypto_tfm *tfm) +{ + struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm); + + mcryptd_free_ahash(ctx->mcryptd_tfm); +} + +static struct ahash_alg sha256_mb_areq_alg = { + .init = sha256_mb_init, + .update = sha256_mb_update, + .final = sha256_mb_final, + .finup = sha256_mb_finup, + .export = sha256_mb_export, + .import = sha256_mb_import, + .halg = { + .digestsize = SHA256_DIGEST_SIZE, + .statesize = sizeof(struct sha256_hash_ctx), + .base = { + .cra_name = "__sha256-mb", + .cra_driver_name = "__intel_sha256-mb", + .cra_priority = 100, + /* + * use ASYNC flag as some buffers in multi-buffer + * algo may not have completed before hashing thread + * sleep + */ + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_INTERNAL, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT + (sha256_mb_areq_alg.halg.base.cra_list), + .cra_init = sha256_mb_areq_init_tfm, + .cra_exit = sha256_mb_areq_exit_tfm, + .cra_ctxsize = sizeof(struct sha256_hash_ctx), + } + } +}; + +static int sha256_mb_async_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + return crypto_ahash_init(mcryptd_req); +} + +static int sha256_mb_async_update(struct ahash_request *req) +{ + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + return crypto_ahash_update(mcryptd_req); +} + +static int sha256_mb_async_finup(struct ahash_request *req) +{ + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + return crypto_ahash_finup(mcryptd_req); +} + +static int sha256_mb_async_final(struct ahash_request *req) +{ + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + return crypto_ahash_final(mcryptd_req); +} + +static int sha256_mb_async_digest(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + return crypto_ahash_digest(mcryptd_req); +} + +static int sha256_mb_async_export(struct ahash_request *req, void *out) +{ + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + return crypto_ahash_export(mcryptd_req, out); +} + +static int sha256_mb_async_import(struct ahash_request *req, const void *in) +{ + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm); + struct mcryptd_hash_request_ctx *rctx; + struct ahash_request *areq; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + rctx = ahash_request_ctx(mcryptd_req); + areq = &rctx->areq; + + ahash_request_set_tfm(areq, child); + ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP, + rctx->complete, req); + + return crypto_ahash_import(mcryptd_req, in); +} + +static struct ahash_alg sha256_mb_async_alg = { + .init = sha256_mb_async_init, + .update = sha256_mb_async_update, + .final = sha256_mb_async_final, + .finup = sha256_mb_async_finup, + .export = sha256_mb_async_export, + .import = sha256_mb_async_import, + .digest = sha256_mb_async_digest, + .halg = { + .digestsize = SHA256_DIGEST_SIZE, + .statesize = sizeof(struct sha256_hash_ctx), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256_mb", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_type = &crypto_ahash_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT + (sha256_mb_async_alg.halg.base.cra_list), + .cra_init = sha256_mb_async_init_tfm, + .cra_exit = sha256_mb_async_exit_tfm, + .cra_ctxsize = sizeof(struct sha256_mb_ctx), + .cra_alignmask = 0, + }, + }, +}; + +static unsigned long sha256_mb_flusher(struct mcryptd_alg_cstate *cstate) +{ + struct mcryptd_hash_request_ctx *rctx; + unsigned long cur_time; + unsigned long next_flush = 0; + struct sha256_hash_ctx *sha_ctx; + + + cur_time = jiffies; + + while (!list_empty(&cstate->work_list)) { + rctx = list_entry(cstate->work_list.next, + struct mcryptd_hash_request_ctx, waiter); + if (time_before(cur_time, rctx->tag.expire)) + break; + kernel_fpu_begin(); + sha_ctx = (struct sha256_hash_ctx *) + sha256_ctx_mgr_flush(cstate->mgr); + kernel_fpu_end(); + if (!sha_ctx) { + pr_err("sha256_mb error: nothing got" + " flushed for non-empty list\n"); + break; + } + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + sha_finish_walk(&rctx, cstate, true); + sha_complete_job(rctx, cstate, 0); + } + + if (!list_empty(&cstate->work_list)) { + rctx = list_entry(cstate->work_list.next, + struct mcryptd_hash_request_ctx, waiter); + /* get the hash context and then flush time */ + next_flush = rctx->tag.expire; + mcryptd_arm_flusher(cstate, get_delay(next_flush)); + } + return next_flush; +} + +static int __init sha256_mb_mod_init(void) +{ + + int cpu; + int err; + struct mcryptd_alg_cstate *cpu_state; + + /* check for dependent cpu features */ + if (!boot_cpu_has(X86_FEATURE_AVX2) || + !boot_cpu_has(X86_FEATURE_BMI2)) + return -ENODEV; + + /* initialize multibuffer structures */ + sha256_mb_alg_state.alg_cstate = alloc_percpu + (struct mcryptd_alg_cstate); + + sha256_job_mgr_init = sha256_mb_mgr_init_avx2; + sha256_job_mgr_submit = sha256_mb_mgr_submit_avx2; + sha256_job_mgr_flush = sha256_mb_mgr_flush_avx2; + sha256_job_mgr_get_comp_job = sha256_mb_mgr_get_comp_job_avx2; + + if (!sha256_mb_alg_state.alg_cstate) + return -ENOMEM; + for_each_possible_cpu(cpu) { + cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu); + cpu_state->next_flush = 0; + cpu_state->next_seq_num = 0; + cpu_state->flusher_engaged = false; + INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher); + cpu_state->cpu = cpu; + cpu_state->alg_state = &sha256_mb_alg_state; + cpu_state->mgr = kzalloc(sizeof(struct sha256_ctx_mgr), + GFP_KERNEL); + if (!cpu_state->mgr) + goto err2; + sha256_ctx_mgr_init(cpu_state->mgr); + INIT_LIST_HEAD(&cpu_state->work_list); + spin_lock_init(&cpu_state->work_lock); + } + sha256_mb_alg_state.flusher = &sha256_mb_flusher; + + err = crypto_register_ahash(&sha256_mb_areq_alg); + if (err) + goto err2; + err = crypto_register_ahash(&sha256_mb_async_alg); + if (err) + goto err1; + + + return 0; +err1: + crypto_unregister_ahash(&sha256_mb_areq_alg); +err2: + for_each_possible_cpu(cpu) { + cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu); + kfree(cpu_state->mgr); + } + free_percpu(sha256_mb_alg_state.alg_cstate); + return -ENODEV; +} + +static void __exit sha256_mb_mod_fini(void) +{ + int cpu; + struct mcryptd_alg_cstate *cpu_state; + + crypto_unregister_ahash(&sha256_mb_async_alg); + crypto_unregister_ahash(&sha256_mb_areq_alg); + for_each_possible_cpu(cpu) { + cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu); + kfree(cpu_state->mgr); + } + free_percpu(sha256_mb_alg_state.alg_cstate); +} + +module_init(sha256_mb_mod_init); +module_exit(sha256_mb_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, multi buffer accelerated"); + +MODULE_ALIAS_CRYPTO("sha256"); From 9be7e24483998fa6a34c2b191c4798b8189f8f9e Mon Sep 17 00:00:00 2001 From: Megha Dey Date: Thu, 23 Jun 2016 18:40:43 -0700 Subject: [PATCH 090/180] crypto: sha256-mb - Enable multibuffer support Add the config CRYPTO_SHA256_MB which will enable the computation using the SHA256 multi-buffer algorithm. Signed-off-by: Megha Dey Reviewed-by: Fenghua Yu Reviewed-by: Tim Chen Signed-off-by: Herbert Xu --- crypto/Kconfig | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/crypto/Kconfig b/crypto/Kconfig index 5baaa9d87574c..d8cc0f0852786 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -704,6 +704,22 @@ config CRYPTO_SHA1_MB lanes remain unfilled, a flush operation will be initiated to process the crypto jobs, adding a slight latency. +config CRYPTO_SHA256_MB + tristate "SHA256 digest algorithm (x86_64 Multi-Buffer, Experimental)" + depends on X86 && 64BIT + select CRYPTO_SHA256 + select CRYPTO_HASH + select CRYPTO_MCRYPTD + help + SHA-256 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented + using multi-buffer technique. This algorithm computes on + multiple data lanes concurrently with SIMD instructions for + better throughput. It should not be enabled by default but + used when there is significant amount of work to keep the keep + the data lanes filled to get performance benefit. If the data + lanes remain unfilled, a flush operation will be initiated to + process the crypto jobs, adding a slight latency. + config CRYPTO_SHA256 tristate "SHA224 and SHA256 digest algorithm" select CRYPTO_HASH From a377c6b1876e7ac847a124998e828baf9d8c89c2 Mon Sep 17 00:00:00 2001 From: Megha Dey Date: Thu, 23 Jun 2016 18:40:44 -0700 Subject: [PATCH 091/180] crypto: sha256-mb - submit/flush routines for AVX2 This patch introduces the routines used to submit and flush buffers belonging to SHA256 crypto jobs to the SHA256 multibuffer algorithm. It is implemented mostly in assembly optimized with AVX2 instructions. Signed-off-by: Megha Dey Reviewed-by: Fenghua Yu Reviewed-by: Tim Chen Signed-off-by: Herbert Xu --- .../sha256-mb/sha256_mb_mgr_flush_avx2.S | 304 ++++++++++++++++++ .../sha256-mb/sha256_mb_mgr_init_avx2.c | 65 ++++ .../sha256-mb/sha256_mb_mgr_submit_avx2.S | 215 +++++++++++++ 3 files changed, 584 insertions(+) create mode 100644 arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S create mode 100644 arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c create mode 100644 arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S new file mode 100644 index 0000000000000..b691da981cd9c --- /dev/null +++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S @@ -0,0 +1,304 @@ +/* + * Flush routine for SHA256 multibuffer + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Megha Dey + * + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include "sha256_mb_mgr_datastruct.S" + +.extern sha256_x8_avx2 + +#LINUX register definitions +#define arg1 %rdi +#define arg2 %rsi + +# Common register definitions +#define state arg1 +#define job arg2 +#define len2 arg2 + +# idx must be a register not clobberred by sha1_mult +#define idx %r8 +#define DWORD_idx %r8d + +#define unused_lanes %rbx +#define lane_data %rbx +#define tmp2 %rbx +#define tmp2_w %ebx + +#define job_rax %rax +#define tmp1 %rax +#define size_offset %rax +#define tmp %rax +#define start_offset %rax + +#define tmp3 %arg1 + +#define extra_blocks %arg2 +#define p %arg2 + +.macro LABEL prefix n +\prefix\n\(): +.endm + +.macro JNE_SKIP i +jne skip_\i +.endm + +.altmacro +.macro SET_OFFSET _offset +offset = \_offset +.endm +.noaltmacro + +# JOB_SHA256* sha256_mb_mgr_flush_avx2(MB_MGR *state) +# arg 1 : rcx : state +ENTRY(sha256_mb_mgr_flush_avx2) + FRAME_BEGIN + push %rbx + + # If bit (32+3) is set, then all lanes are empty + mov _unused_lanes(state), unused_lanes + bt $32+3, unused_lanes + jc return_null + + # find a lane with a non-null job + xor idx, idx + offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) + cmovne one(%rip), idx + offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) + cmovne two(%rip), idx + offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) + cmovne three(%rip), idx + offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) + cmovne four(%rip), idx + offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) + cmovne five(%rip), idx + offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) + cmovne six(%rip), idx + offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) + cmovne seven(%rip), idx + + # copy idx to empty lanes +copy_lane_data: + offset = (_args + _data_ptr) + mov offset(state,idx,8), tmp + + I = 0 +.rep 8 + offset = (_ldata + I * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) +.altmacro + JNE_SKIP %I + offset = (_args + _data_ptr + 8*I) + mov tmp, offset(state) + offset = (_lens + 4*I) + movl $0xFFFFFFFF, offset(state) +LABEL skip_ %I + I = (I+1) +.noaltmacro +.endr + + # Find min length + vmovdqa _lens+0*16(state), %xmm0 + vmovdqa _lens+1*16(state), %xmm1 + + vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} + vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} + vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} + vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} + vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword + + vmovd %xmm2, DWORD_idx + mov idx, len2 + and $0xF, idx + shr $4, len2 + jz len_is_0 + + vpand clear_low_nibble(%rip), %xmm2, %xmm2 + vpshufd $0, %xmm2, %xmm2 + + vpsubd %xmm2, %xmm0, %xmm0 + vpsubd %xmm2, %xmm1, %xmm1 + + vmovdqa %xmm0, _lens+0*16(state) + vmovdqa %xmm1, _lens+1*16(state) + + # "state" and "args" are the same address, arg1 + # len is arg2 + call sha256_x8_avx2 + # state and idx are intact + +len_is_0: + # process completed job "idx" + imul $_LANE_DATA_size, idx, lane_data + lea _ldata(state, lane_data), lane_data + + mov _job_in_lane(lane_data), job_rax + movq $0, _job_in_lane(lane_data) + movl $STS_COMPLETED, _status(job_rax) + mov _unused_lanes(state), unused_lanes + shl $4, unused_lanes + or idx, unused_lanes + + mov unused_lanes, _unused_lanes(state) + movl $0xFFFFFFFF, _lens(state,idx,4) + + vmovd _args_digest(state , idx, 4) , %xmm0 + vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 + vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 + vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 + vmovd _args_digest+4*32(state, idx, 4), %xmm1 + vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1 + vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1 + vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1 + + vmovdqu %xmm0, _result_digest(job_rax) + offset = (_result_digest + 1*16) + vmovdqu %xmm1, offset(job_rax) + +return: + pop %rbx + FRAME_END + ret + +return_null: + xor job_rax, job_rax + jmp return +ENDPROC(sha256_mb_mgr_flush_avx2) + +############################################################################## + +.align 16 +ENTRY(sha256_mb_mgr_get_comp_job_avx2) + push %rbx + + ## if bit 32+3 is set, then all lanes are empty + mov _unused_lanes(state), unused_lanes + bt $(32+3), unused_lanes + jc .return_null + + # Find min length + vmovdqa _lens(state), %xmm0 + vmovdqa _lens+1*16(state), %xmm1 + + vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} + vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} + vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} + vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} + vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword + + vmovd %xmm2, DWORD_idx + test $~0xF, idx + jnz .return_null + + # process completed job "idx" + imul $_LANE_DATA_size, idx, lane_data + lea _ldata(state, lane_data), lane_data + + mov _job_in_lane(lane_data), job_rax + movq $0, _job_in_lane(lane_data) + movl $STS_COMPLETED, _status(job_rax) + mov _unused_lanes(state), unused_lanes + shl $4, unused_lanes + or idx, unused_lanes + mov unused_lanes, _unused_lanes(state) + + movl $0xFFFFFFFF, _lens(state, idx, 4) + + vmovd _args_digest(state, idx, 4), %xmm0 + vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 + vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 + vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 + movl _args_digest+4*32(state, idx, 4), tmp2_w + vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1 + vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1 + vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1 + + vmovdqu %xmm0, _result_digest(job_rax) + movl tmp2_w, _result_digest+1*16(job_rax) + + pop %rbx + + ret + +.return_null: + xor job_rax, job_rax + pop %rbx + ret +ENDPROC(sha256_mb_mgr_get_comp_job_avx2) + +.data + +.align 16 +clear_low_nibble: +.octa 0x000000000000000000000000FFFFFFF0 +one: +.quad 1 +two: +.quad 2 +three: +.quad 3 +four: +.quad 4 +five: +.quad 5 +six: +.quad 6 +seven: +.quad 7 diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c new file mode 100644 index 0000000000000..b0c498371e671 --- /dev/null +++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c @@ -0,0 +1,65 @@ +/* + * Initialization code for multi buffer SHA256 algorithm for AVX2 + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Megha Dey + * + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "sha256_mb_mgr.h" + +void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state) +{ + unsigned int j; + + state->unused_lanes = 0xF76543210ULL; + for (j = 0; j < 8; j++) { + state->lens[j] = 0xFFFFFFFF; + state->ldata[j].job_in_lane = NULL; + } +} diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S new file mode 100644 index 0000000000000..7ea670e25acc8 --- /dev/null +++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S @@ -0,0 +1,215 @@ +/* + * Buffer submit code for multi buffer SHA256 algorithm + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Megha Dey + * + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "sha256_mb_mgr_datastruct.S" + +.extern sha256_x8_avx2 + +# LINUX register definitions +arg1 = %rdi +arg2 = %rsi +size_offset = %rcx +tmp2 = %rcx +extra_blocks = %rdx + +# Common definitions +#define state arg1 +#define job %rsi +#define len2 arg2 +#define p2 arg2 + +# idx must be a register not clobberred by sha1_x8_avx2 +idx = %r8 +DWORD_idx = %r8d +last_len = %r8 + +p = %r11 +start_offset = %r11 + +unused_lanes = %rbx +BYTE_unused_lanes = %bl + +job_rax = %rax +len = %rax +DWORD_len = %eax + +lane = %r12 +tmp3 = %r12 + +tmp = %r9 +DWORD_tmp = %r9d + +lane_data = %r10 + +# JOB* sha256_mb_mgr_submit_avx2(MB_MGR *state, JOB_SHA256 *job) +# arg 1 : rcx : state +# arg 2 : rdx : job +ENTRY(sha256_mb_mgr_submit_avx2) + FRAME_BEGIN + push %rbx + push %r12 + + mov _unused_lanes(state), unused_lanes + mov unused_lanes, lane + and $0xF, lane + shr $4, unused_lanes + imul $_LANE_DATA_size, lane, lane_data + movl $STS_BEING_PROCESSED, _status(job) + lea _ldata(state, lane_data), lane_data + mov unused_lanes, _unused_lanes(state) + movl _len(job), DWORD_len + + mov job, _job_in_lane(lane_data) + shl $4, len + or lane, len + + movl DWORD_len, _lens(state , lane, 4) + + # Load digest words from result_digest + vmovdqu _result_digest(job), %xmm0 + vmovdqu _result_digest+1*16(job), %xmm1 + vmovd %xmm0, _args_digest(state, lane, 4) + vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4) + vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4) + vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4) + vmovd %xmm1, _args_digest+4*32(state , lane, 4) + + vpextrd $1, %xmm1, _args_digest+5*32(state , lane, 4) + vpextrd $2, %xmm1, _args_digest+6*32(state , lane, 4) + vpextrd $3, %xmm1, _args_digest+7*32(state , lane, 4) + + mov _buffer(job), p + mov p, _args_data_ptr(state, lane, 8) + + cmp $0xF, unused_lanes + jne return_null + +start_loop: + # Find min length + vmovdqa _lens(state), %xmm0 + vmovdqa _lens+1*16(state), %xmm1 + + vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} + vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} + vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} + vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} + vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword + + vmovd %xmm2, DWORD_idx + mov idx, len2 + and $0xF, idx + shr $4, len2 + jz len_is_0 + + vpand clear_low_nibble(%rip), %xmm2, %xmm2 + vpshufd $0, %xmm2, %xmm2 + + vpsubd %xmm2, %xmm0, %xmm0 + vpsubd %xmm2, %xmm1, %xmm1 + + vmovdqa %xmm0, _lens + 0*16(state) + vmovdqa %xmm1, _lens + 1*16(state) + + # "state" and "args" are the same address, arg1 + # len is arg2 + call sha256_x8_avx2 + + # state and idx are intact + +len_is_0: + # process completed job "idx" + imul $_LANE_DATA_size, idx, lane_data + lea _ldata(state, lane_data), lane_data + + mov _job_in_lane(lane_data), job_rax + mov _unused_lanes(state), unused_lanes + movq $0, _job_in_lane(lane_data) + movl $STS_COMPLETED, _status(job_rax) + shl $4, unused_lanes + or idx, unused_lanes + mov unused_lanes, _unused_lanes(state) + + movl $0xFFFFFFFF, _lens(state,idx,4) + + vmovd _args_digest(state, idx, 4), %xmm0 + vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0 + vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0 + vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0 + vmovd _args_digest+4*32(state, idx, 4), %xmm1 + + vpinsrd $1, _args_digest+5*32(state , idx, 4), %xmm1, %xmm1 + vpinsrd $2, _args_digest+6*32(state , idx, 4), %xmm1, %xmm1 + vpinsrd $3, _args_digest+7*32(state , idx, 4), %xmm1, %xmm1 + + vmovdqu %xmm0, _result_digest(job_rax) + vmovdqu %xmm1, _result_digest+1*16(job_rax) + +return: + pop %r12 + pop %rbx + FRAME_END + ret + +return_null: + xor job_rax, job_rax + jmp return + +ENDPROC(sha256_mb_mgr_submit_avx2) + +.data + +.align 16 +clear_low_nibble: + .octa 0x000000000000000000000000FFFFFFF0 From 98cf10383a5507147793789bc5c2c02688df44b2 Mon Sep 17 00:00:00 2001 From: Megha Dey Date: Thu, 23 Jun 2016 18:40:45 -0700 Subject: [PATCH 092/180] crypto: sha256-mb - Algorithm data structures This patch introduces the data structures and prototypes of functions needed for computing SHA256 hash using multi-buffer. Included are the structures of the multi-buffer SHA256 job, job scheduler in C and x86 assembly. Signed-off-by: Megha Dey Reviewed-by: Fenghua Yu Reviewed-by: Tim Chen Signed-off-by: Herbert Xu --- arch/x86/crypto/sha256-mb/sha256_mb_ctx.h | 136 ++++++++ arch/x86/crypto/sha256-mb/sha256_mb_mgr.h | 108 +++++++ .../sha256-mb/sha256_mb_mgr_datastruct.S | 304 ++++++++++++++++++ 3 files changed, 548 insertions(+) create mode 100644 arch/x86/crypto/sha256-mb/sha256_mb_ctx.h create mode 100644 arch/x86/crypto/sha256-mb/sha256_mb_mgr.h create mode 100644 arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h new file mode 100644 index 0000000000000..edd252b732067 --- /dev/null +++ b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h @@ -0,0 +1,136 @@ +/* + * Header file for multi buffer SHA256 context + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Megha Dey + * + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SHA_MB_CTX_INTERNAL_H +#define _SHA_MB_CTX_INTERNAL_H + +#include "sha256_mb_mgr.h" + +#define HASH_UPDATE 0x00 +#define HASH_FIRST 0x01 +#define HASH_LAST 0x02 +#define HASH_ENTIRE 0x03 +#define HASH_DONE 0x04 +#define HASH_FINAL 0x08 + +#define HASH_CTX_STS_IDLE 0x00 +#define HASH_CTX_STS_PROCESSING 0x01 +#define HASH_CTX_STS_LAST 0x02 +#define HASH_CTX_STS_COMPLETE 0x04 + +enum hash_ctx_error { + HASH_CTX_ERROR_NONE = 0, + HASH_CTX_ERROR_INVALID_FLAGS = -1, + HASH_CTX_ERROR_ALREADY_PROCESSING = -2, + HASH_CTX_ERROR_ALREADY_COMPLETED = -3, + +#ifdef HASH_CTX_DEBUG + HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4, +#endif +}; + + +#define hash_ctx_user_data(ctx) ((ctx)->user_data) +#define hash_ctx_digest(ctx) ((ctx)->job.result_digest) +#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING) +#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE) +#define hash_ctx_status(ctx) ((ctx)->status) +#define hash_ctx_error(ctx) ((ctx)->error) +#define hash_ctx_init(ctx) \ + do { \ + (ctx)->error = HASH_CTX_ERROR_NONE; \ + (ctx)->status = HASH_CTX_STS_COMPLETE; \ + } while (0) + + +/* Hash Constants and Typedefs */ +#define SHA256_DIGEST_LENGTH 8 +#define SHA256_LOG2_BLOCK_SIZE 6 + +#define SHA256_PADLENGTHFIELD_SIZE 8 + +#ifdef SHA_MB_DEBUG +#define assert(expr) \ +do { \ + if (unlikely(!(expr))) { \ + printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \ + #expr, __FILE__, __func__, __LINE__); \ + } \ +} while (0) +#else +#define assert(expr) do {} while (0) +#endif + +struct sha256_ctx_mgr { + struct sha256_mb_mgr mgr; +}; + +/* typedef struct sha256_ctx_mgr sha256_ctx_mgr; */ + +struct sha256_hash_ctx { + /* Must be at struct offset 0 */ + struct job_sha256 job; + /* status flag */ + int status; + /* error flag */ + int error; + + uint32_t total_length; + const void *incoming_buffer; + uint32_t incoming_buffer_length; + uint8_t partial_block_buffer[SHA256_BLOCK_SIZE * 2]; + uint32_t partial_block_buffer_length; + void *user_data; +}; + +#endif diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h b/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h new file mode 100644 index 0000000000000..b01ae408c56d7 --- /dev/null +++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h @@ -0,0 +1,108 @@ +/* + * Header file for multi buffer SHA256 algorithm manager + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Megha Dey + * + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef __SHA_MB_MGR_H +#define __SHA_MB_MGR_H + +#include + +#define NUM_SHA256_DIGEST_WORDS 8 + +enum job_sts { STS_UNKNOWN = 0, + STS_BEING_PROCESSED = 1, + STS_COMPLETED = 2, + STS_INTERNAL_ERROR = 3, + STS_ERROR = 4 +}; + +struct job_sha256 { + u8 *buffer; + u32 len; + u32 result_digest[NUM_SHA256_DIGEST_WORDS] __aligned(32); + enum job_sts status; + void *user_data; +}; + +/* SHA256 out-of-order scheduler */ + +/* typedef uint32_t sha8_digest_array[8][8]; */ + +struct sha256_args_x8 { + uint32_t digest[8][8]; + uint8_t *data_ptr[8]; +}; + +struct sha256_lane_data { + struct job_sha256 *job_in_lane; +}; + +struct sha256_mb_mgr { + struct sha256_args_x8 args; + + uint32_t lens[8]; + + /* each byte is index (0...7) of unused lanes */ + uint64_t unused_lanes; + /* byte 4 is set to FF as a flag */ + struct sha256_lane_data ldata[8]; +}; + + +#define SHA256_MB_MGR_NUM_LANES_AVX2 8 + +void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state); +struct job_sha256 *sha256_mb_mgr_submit_avx2(struct sha256_mb_mgr *state, + struct job_sha256 *job); +struct job_sha256 *sha256_mb_mgr_flush_avx2(struct sha256_mb_mgr *state); +struct job_sha256 *sha256_mb_mgr_get_comp_job_avx2(struct sha256_mb_mgr *state); + +#endif diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S new file mode 100644 index 0000000000000..5c377bac21d0c --- /dev/null +++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S @@ -0,0 +1,304 @@ +/* + * Header file for multi buffer SHA256 algorithm data structure + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Megha Dey + * + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +# Macros for defining data structures + +# Usage example + +#START_FIELDS # JOB_AES +### name size align +#FIELD _plaintext, 8, 8 # pointer to plaintext +#FIELD _ciphertext, 8, 8 # pointer to ciphertext +#FIELD _IV, 16, 8 # IV +#FIELD _keys, 8, 8 # pointer to keys +#FIELD _len, 4, 4 # length in bytes +#FIELD _status, 4, 4 # status enumeration +#FIELD _user_data, 8, 8 # pointer to user data +#UNION _union, size1, align1, \ +# size2, align2, \ +# size3, align3, \ +# ... +#END_FIELDS +#%assign _JOB_AES_size _FIELD_OFFSET +#%assign _JOB_AES_align _STRUCT_ALIGN + +######################################################################### + +# Alternate "struc-like" syntax: +# STRUCT job_aes2 +# RES_Q .plaintext, 1 +# RES_Q .ciphertext, 1 +# RES_DQ .IV, 1 +# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN +# RES_U .union, size1, align1, \ +# size2, align2, \ +# ... +# ENDSTRUCT +# # Following only needed if nesting +# %assign job_aes2_size _FIELD_OFFSET +# %assign job_aes2_align _STRUCT_ALIGN +# +# RES_* macros take a name, a count and an optional alignment. +# The count in in terms of the base size of the macro, and the +# default alignment is the base size. +# The macros are: +# Macro Base size +# RES_B 1 +# RES_W 2 +# RES_D 4 +# RES_Q 8 +# RES_DQ 16 +# RES_Y 32 +# RES_Z 64 +# +# RES_U defines a union. It's arguments are a name and two or more +# pairs of "size, alignment" +# +# The two assigns are only needed if this structure is being nested +# within another. Even if the assigns are not done, one can still use +# STRUCT_NAME_size as the size of the structure. +# +# Note that for nesting, you still need to assign to STRUCT_NAME_size. +# +# The differences between this and using "struc" directly are that each +# type is implicitly aligned to its natural length (although this can be +# over-ridden with an explicit third parameter), and that the structure +# is padded at the end to its overall alignment. +# + +######################################################################### + +#ifndef _DATASTRUCT_ASM_ +#define _DATASTRUCT_ASM_ + +#define SZ8 8*SHA256_DIGEST_WORD_SIZE +#define ROUNDS 64*SZ8 +#define PTR_SZ 8 +#define SHA256_DIGEST_WORD_SIZE 4 +#define MAX_SHA256_LANES 8 +#define SHA256_DIGEST_WORDS 8 +#define SHA256_DIGEST_ROW_SIZE (MAX_SHA256_LANES * SHA256_DIGEST_WORD_SIZE) +#define SHA256_DIGEST_SIZE (SHA256_DIGEST_ROW_SIZE * SHA256_DIGEST_WORDS) +#define SHA256_BLK_SZ 64 + +# START_FIELDS +.macro START_FIELDS + _FIELD_OFFSET = 0 + _STRUCT_ALIGN = 0 +.endm + +# FIELD name size align +.macro FIELD name size align + _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1)) + \name = _FIELD_OFFSET + _FIELD_OFFSET = _FIELD_OFFSET + (\size) +.if (\align > _STRUCT_ALIGN) + _STRUCT_ALIGN = \align +.endif +.endm + +# END_FIELDS +.macro END_FIELDS + _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1)) +.endm + +######################################################################## + +.macro STRUCT p1 +START_FIELDS +.struc \p1 +.endm + +.macro ENDSTRUCT + tmp = _FIELD_OFFSET + END_FIELDS + tmp = (_FIELD_OFFSET - %%tmp) +.if (tmp > 0) + .lcomm tmp +.endif +.endstruc +.endm + +## RES_int name size align +.macro RES_int p1 p2 p3 + name = \p1 + size = \p2 + align = .\p3 + + _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1)) +.align align +.lcomm name size + _FIELD_OFFSET = _FIELD_OFFSET + (size) +.if (align > _STRUCT_ALIGN) + _STRUCT_ALIGN = align +.endif +.endm + +# macro RES_B name, size [, align] +.macro RES_B _name, _size, _align=1 +RES_int _name _size _align +.endm + +# macro RES_W name, size [, align] +.macro RES_W _name, _size, _align=2 +RES_int _name 2*(_size) _align +.endm + +# macro RES_D name, size [, align] +.macro RES_D _name, _size, _align=4 +RES_int _name 4*(_size) _align +.endm + +# macro RES_Q name, size [, align] +.macro RES_Q _name, _size, _align=8 +RES_int _name 8*(_size) _align +.endm + +# macro RES_DQ name, size [, align] +.macro RES_DQ _name, _size, _align=16 +RES_int _name 16*(_size) _align +.endm + +# macro RES_Y name, size [, align] +.macro RES_Y _name, _size, _align=32 +RES_int _name 32*(_size) _align +.endm + +# macro RES_Z name, size [, align] +.macro RES_Z _name, _size, _align=64 +RES_int _name 64*(_size) _align +.endm + +#endif + + +######################################################################## +#### Define SHA256 Out Of Order Data Structures +######################################################################## + +START_FIELDS # LANE_DATA +### name size align +FIELD _job_in_lane, 8, 8 # pointer to job object +END_FIELDS + + _LANE_DATA_size = _FIELD_OFFSET + _LANE_DATA_align = _STRUCT_ALIGN + +######################################################################## + +START_FIELDS # SHA256_ARGS_X4 +### name size align +FIELD _digest, 4*8*8, 4 # transposed digest +FIELD _data_ptr, 8*8, 8 # array of pointers to data +END_FIELDS + + _SHA256_ARGS_X4_size = _FIELD_OFFSET + _SHA256_ARGS_X4_align = _STRUCT_ALIGN + _SHA256_ARGS_X8_size = _FIELD_OFFSET + _SHA256_ARGS_X8_align = _STRUCT_ALIGN + +####################################################################### + +START_FIELDS # MB_MGR +### name size align +FIELD _args, _SHA256_ARGS_X4_size, _SHA256_ARGS_X4_align +FIELD _lens, 4*8, 8 +FIELD _unused_lanes, 8, 8 +FIELD _ldata, _LANE_DATA_size*8, _LANE_DATA_align +END_FIELDS + + _MB_MGR_size = _FIELD_OFFSET + _MB_MGR_align = _STRUCT_ALIGN + +_args_digest = _args + _digest +_args_data_ptr = _args + _data_ptr + +####################################################################### + +START_FIELDS #STACK_FRAME +### name size align +FIELD _data, 16*SZ8, 1 # transposed digest +FIELD _digest, 8*SZ8, 1 # array of pointers to data +FIELD _ytmp, 4*SZ8, 1 +FIELD _rsp, 8, 1 +END_FIELDS + + _STACK_FRAME_size = _FIELD_OFFSET + _STACK_FRAME_align = _STRUCT_ALIGN + +####################################################################### + +######################################################################## +#### Define constants +######################################################################## + +#define STS_UNKNOWN 0 +#define STS_BEING_PROCESSED 1 +#define STS_COMPLETED 2 + +######################################################################## +#### Define JOB_SHA256 structure +######################################################################## + +START_FIELDS # JOB_SHA256 + +### name size align +FIELD _buffer, 8, 8 # pointer to buffer +FIELD _len, 8, 8 # length in bytes +FIELD _result_digest, 8*4, 32 # Digest (output) +FIELD _status, 4, 4 +FIELD _user_data, 8, 8 +END_FIELDS + + _JOB_SHA256_size = _FIELD_OFFSET + _JOB_SHA256_align = _STRUCT_ALIGN From 992532474ffa954ff678627a1c0f815d7b6cd7fc Mon Sep 17 00:00:00 2001 From: Megha Dey Date: Thu, 23 Jun 2016 18:40:46 -0700 Subject: [PATCH 093/180] crypto: sha256-mb - Crypto computation (x8 AVX2) This patch introduces the assembly routines to do SHA256 computation on buffers belonging to several jobs at once. The assembly routines are optimized with AVX2 instructions that have 8 data lanes and using AVX2 registers. Signed-off-by: Megha Dey Reviewed-by: Fenghua Yu Reviewed-by: Tim Chen Signed-off-by: Herbert Xu --- arch/x86/crypto/sha256-mb/sha256_x8_avx2.S | 593 +++++++++++++++++++++ 1 file changed, 593 insertions(+) create mode 100644 arch/x86/crypto/sha256-mb/sha256_x8_avx2.S diff --git a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S new file mode 100644 index 0000000000000..aa21aea4c722d --- /dev/null +++ b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S @@ -0,0 +1,593 @@ +/* + * Multi-buffer SHA256 algorithm hash compute routine + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Megha Dey + * + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "sha256_mb_mgr_datastruct.S" + +## code to compute oct SHA256 using SSE-256 +## outer calling routine takes care of save and restore of XMM registers +## Logic designed/laid out by JDG + +## Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; %ymm0-15 +## Linux clobbers: rax rbx rcx rdx rsi r9 r10 r11 r12 r13 r14 r15 +## Linux preserves: rdi rbp r8 +## +## clobbers %ymm0-15 + +arg1 = %rdi +arg2 = %rsi +reg3 = %rcx +reg4 = %rdx + +# Common definitions +STATE = arg1 +INP_SIZE = arg2 + +IDX = %rax +ROUND = %rbx +TBL = reg3 + +inp0 = %r9 +inp1 = %r10 +inp2 = %r11 +inp3 = %r12 +inp4 = %r13 +inp5 = %r14 +inp6 = %r15 +inp7 = reg4 + +a = %ymm0 +b = %ymm1 +c = %ymm2 +d = %ymm3 +e = %ymm4 +f = %ymm5 +g = %ymm6 +h = %ymm7 + +T1 = %ymm8 + +a0 = %ymm12 +a1 = %ymm13 +a2 = %ymm14 +TMP = %ymm15 +TMP0 = %ymm6 +TMP1 = %ymm7 + +TT0 = %ymm8 +TT1 = %ymm9 +TT2 = %ymm10 +TT3 = %ymm11 +TT4 = %ymm12 +TT5 = %ymm13 +TT6 = %ymm14 +TT7 = %ymm15 + +# Define stack usage + +# Assume stack aligned to 32 bytes before call +# Therefore FRAMESZ mod 32 must be 32-8 = 24 + +#define FRAMESZ 0x388 + +#define VMOVPS vmovups + +# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1 +# "transpose" data in {r0...r7} using temps {t0...t1} +# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7} +# r0 = {a7 a6 a5 a4 a3 a2 a1 a0} +# r1 = {b7 b6 b5 b4 b3 b2 b1 b0} +# r2 = {c7 c6 c5 c4 c3 c2 c1 c0} +# r3 = {d7 d6 d5 d4 d3 d2 d1 d0} +# r4 = {e7 e6 e5 e4 e3 e2 e1 e0} +# r5 = {f7 f6 f5 f4 f3 f2 f1 f0} +# r6 = {g7 g6 g5 g4 g3 g2 g1 g0} +# r7 = {h7 h6 h5 h4 h3 h2 h1 h0} +# +# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7} +# r0 = {h0 g0 f0 e0 d0 c0 b0 a0} +# r1 = {h1 g1 f1 e1 d1 c1 b1 a1} +# r2 = {h2 g2 f2 e2 d2 c2 b2 a2} +# r3 = {h3 g3 f3 e3 d3 c3 b3 a3} +# r4 = {h4 g4 f4 e4 d4 c4 b4 a4} +# r5 = {h5 g5 f5 e5 d5 c5 b5 a5} +# r6 = {h6 g6 f6 e6 d6 c6 b6 a6} +# r7 = {h7 g7 f7 e7 d7 c7 b7 a7} +# + +.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1 + # process top half (r0..r3) {a...d} + vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0} + vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2} + vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0} + vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2} + vshufps $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5 d1 c1 b1 a1} + vshufps $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6 d2 c2 b2 a2} + vshufps $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7 d3 c3 b3 a3} + vshufps $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4 d0 c0 b0 a0} + + # use r2 in place of t0 + # process bottom half (r4..r7) {e...h} + vshufps $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4 f1 f0 e1 e0} + vshufps $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6 f3 f2 e3 e2} + vshufps $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4 h1 h0 g1 g0} + vshufps $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6 h3 h2 g3 g2} + vshufps $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5 h1 g1 f1 e1} + vshufps $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6 h2 g2 f2 e2} + vshufps $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7 h3 g3 f3 e3} + vshufps $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4 h0 g0 f0 e0} + + vperm2f128 $0x13, \r1, \r5, \r6 # h6...a6 + vperm2f128 $0x02, \r1, \r5, \r2 # h2...a2 + vperm2f128 $0x13, \r3, \r7, \r5 # h5...a5 + vperm2f128 $0x02, \r3, \r7, \r1 # h1...a1 + vperm2f128 $0x13, \r0, \r4, \r7 # h7...a7 + vperm2f128 $0x02, \r0, \r4, \r3 # h3...a3 + vperm2f128 $0x13, \t0, \t1, \r4 # h4...a4 + vperm2f128 $0x02, \t0, \t1, \r0 # h0...a0 + +.endm + +.macro ROTATE_ARGS +TMP_ = h +h = g +g = f +f = e +e = d +d = c +c = b +b = a +a = TMP_ +.endm + +.macro _PRORD reg imm tmp + vpslld $(32-\imm),\reg,\tmp + vpsrld $\imm,\reg, \reg + vpor \tmp,\reg, \reg +.endm + +# PRORD_nd reg, imm, tmp, src +.macro _PRORD_nd reg imm tmp src + vpslld $(32-\imm), \src, \tmp + vpsrld $\imm, \src, \reg + vpor \tmp, \reg, \reg +.endm + +# PRORD dst/src, amt +.macro PRORD reg imm + _PRORD \reg,\imm,TMP +.endm + +# PRORD_nd dst, src, amt +.macro PRORD_nd reg tmp imm + _PRORD_nd \reg, \imm, TMP, \tmp +.endm + +# arguments passed implicitly in preprocessor symbols i, a...h +.macro ROUND_00_15 _T1 i + PRORD_nd a0,e,5 # sig1: a0 = (e >> 5) + + vpxor g, f, a2 # ch: a2 = f^g + vpand e,a2, a2 # ch: a2 = (f^g)&e + vpxor g, a2, a2 # a2 = ch + + PRORD_nd a1,e,25 # sig1: a1 = (e >> 25) + + vmovdqu \_T1,(SZ8*(\i & 0xf))(%rsp) + vpaddd (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K + vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5) + PRORD a0, 6 # sig1: a0 = (e >> 6) ^ (e >> 11) + vpaddd a2, h, h # h = h + ch + PRORD_nd a2,a,11 # sig0: a2 = (a >> 11) + vpaddd \_T1,h, h # h = h + ch + W + K + vpxor a1, a0, a0 # a0 = sigma1 + PRORD_nd a1,a,22 # sig0: a1 = (a >> 22) + vpxor c, a, \_T1 # maj: T1 = a^c + add $SZ8, ROUND # ROUND++ + vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b + vpaddd a0, h, h + vpaddd h, d, d + vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11) + PRORD a2,2 # sig0: a2 = (a >> 2) ^ (a >> 13) + vpxor a1, a2, a2 # a2 = sig0 + vpand c, a, a1 # maj: a1 = a&c + vpor \_T1, a1, a1 # a1 = maj + vpaddd a1, h, h # h = h + ch + W + K + maj + vpaddd a2, h, h # h = h + ch + W + K + maj + sigma0 + ROTATE_ARGS +.endm + +# arguments passed implicitly in preprocessor symbols i, a...h +.macro ROUND_16_XX _T1 i + vmovdqu (SZ8*((\i-15)&0xf))(%rsp), \_T1 + vmovdqu (SZ8*((\i-2)&0xf))(%rsp), a1 + vmovdqu \_T1, a0 + PRORD \_T1,11 + vmovdqu a1, a2 + PRORD a1,2 + vpxor a0, \_T1, \_T1 + PRORD \_T1, 7 + vpxor a2, a1, a1 + PRORD a1, 17 + vpsrld $3, a0, a0 + vpxor a0, \_T1, \_T1 + vpsrld $10, a2, a2 + vpxor a2, a1, a1 + vpaddd (SZ8*((\i-16)&0xf))(%rsp), \_T1, \_T1 + vpaddd (SZ8*((\i-7)&0xf))(%rsp), a1, a1 + vpaddd a1, \_T1, \_T1 + + ROUND_00_15 \_T1,\i +.endm + +# SHA256_ARGS: +# UINT128 digest[8]; // transposed digests +# UINT8 *data_ptr[4]; + +# void sha256_x8_avx2(SHA256_ARGS *args, UINT64 bytes); +# arg 1 : STATE : pointer to array of pointers to input data +# arg 2 : INP_SIZE : size of input in blocks + # general registers preserved in outer calling routine + # outer calling routine saves all the XMM registers + # save rsp, allocate 32-byte aligned for local variables +ENTRY(sha256_x8_avx2) + + # save callee-saved clobbered registers to comply with C function ABI + push %r12 + push %r13 + push %r14 + push %r15 + + mov %rsp, IDX + sub $FRAMESZ, %rsp + and $~0x1F, %rsp + mov IDX, _rsp(%rsp) + + # Load the pre-transposed incoming digest. + vmovdqu 0*SHA256_DIGEST_ROW_SIZE(STATE),a + vmovdqu 1*SHA256_DIGEST_ROW_SIZE(STATE),b + vmovdqu 2*SHA256_DIGEST_ROW_SIZE(STATE),c + vmovdqu 3*SHA256_DIGEST_ROW_SIZE(STATE),d + vmovdqu 4*SHA256_DIGEST_ROW_SIZE(STATE),e + vmovdqu 5*SHA256_DIGEST_ROW_SIZE(STATE),f + vmovdqu 6*SHA256_DIGEST_ROW_SIZE(STATE),g + vmovdqu 7*SHA256_DIGEST_ROW_SIZE(STATE),h + + lea K256_8(%rip),TBL + + # load the address of each of the 4 message lanes + # getting ready to transpose input onto stack + mov _args_data_ptr+0*PTR_SZ(STATE),inp0 + mov _args_data_ptr+1*PTR_SZ(STATE),inp1 + mov _args_data_ptr+2*PTR_SZ(STATE),inp2 + mov _args_data_ptr+3*PTR_SZ(STATE),inp3 + mov _args_data_ptr+4*PTR_SZ(STATE),inp4 + mov _args_data_ptr+5*PTR_SZ(STATE),inp5 + mov _args_data_ptr+6*PTR_SZ(STATE),inp6 + mov _args_data_ptr+7*PTR_SZ(STATE),inp7 + + xor IDX, IDX +lloop: + xor ROUND, ROUND + + # save old digest + vmovdqu a, _digest(%rsp) + vmovdqu b, _digest+1*SZ8(%rsp) + vmovdqu c, _digest+2*SZ8(%rsp) + vmovdqu d, _digest+3*SZ8(%rsp) + vmovdqu e, _digest+4*SZ8(%rsp) + vmovdqu f, _digest+5*SZ8(%rsp) + vmovdqu g, _digest+6*SZ8(%rsp) + vmovdqu h, _digest+7*SZ8(%rsp) + i = 0 +.rep 2 + VMOVPS i*32(inp0, IDX), TT0 + VMOVPS i*32(inp1, IDX), TT1 + VMOVPS i*32(inp2, IDX), TT2 + VMOVPS i*32(inp3, IDX), TT3 + VMOVPS i*32(inp4, IDX), TT4 + VMOVPS i*32(inp5, IDX), TT5 + VMOVPS i*32(inp6, IDX), TT6 + VMOVPS i*32(inp7, IDX), TT7 + vmovdqu g, _ytmp(%rsp) + vmovdqu h, _ytmp+1*SZ8(%rsp) + TRANSPOSE8 TT0, TT1, TT2, TT3, TT4, TT5, TT6, TT7, TMP0, TMP1 + vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP1 + vmovdqu _ytmp(%rsp), g + vpshufb TMP1, TT0, TT0 + vpshufb TMP1, TT1, TT1 + vpshufb TMP1, TT2, TT2 + vpshufb TMP1, TT3, TT3 + vpshufb TMP1, TT4, TT4 + vpshufb TMP1, TT5, TT5 + vpshufb TMP1, TT6, TT6 + vpshufb TMP1, TT7, TT7 + vmovdqu _ytmp+1*SZ8(%rsp), h + vmovdqu TT4, _ytmp(%rsp) + vmovdqu TT5, _ytmp+1*SZ8(%rsp) + vmovdqu TT6, _ytmp+2*SZ8(%rsp) + vmovdqu TT7, _ytmp+3*SZ8(%rsp) + ROUND_00_15 TT0,(i*8+0) + vmovdqu _ytmp(%rsp), TT0 + ROUND_00_15 TT1,(i*8+1) + vmovdqu _ytmp+1*SZ8(%rsp), TT1 + ROUND_00_15 TT2,(i*8+2) + vmovdqu _ytmp+2*SZ8(%rsp), TT2 + ROUND_00_15 TT3,(i*8+3) + vmovdqu _ytmp+3*SZ8(%rsp), TT3 + ROUND_00_15 TT0,(i*8+4) + ROUND_00_15 TT1,(i*8+5) + ROUND_00_15 TT2,(i*8+6) + ROUND_00_15 TT3,(i*8+7) + i = (i+1) +.endr + add $64, IDX + i = (i*8) + + jmp Lrounds_16_xx +.align 16 +Lrounds_16_xx: +.rep 16 + ROUND_16_XX T1, i + i = (i+1) +.endr + + cmp $ROUNDS,ROUND + jb Lrounds_16_xx + + # add old digest + vpaddd _digest+0*SZ8(%rsp), a, a + vpaddd _digest+1*SZ8(%rsp), b, b + vpaddd _digest+2*SZ8(%rsp), c, c + vpaddd _digest+3*SZ8(%rsp), d, d + vpaddd _digest+4*SZ8(%rsp), e, e + vpaddd _digest+5*SZ8(%rsp), f, f + vpaddd _digest+6*SZ8(%rsp), g, g + vpaddd _digest+7*SZ8(%rsp), h, h + + sub $1, INP_SIZE # unit is blocks + jne lloop + + # write back to memory (state object) the transposed digest + vmovdqu a, 0*SHA256_DIGEST_ROW_SIZE(STATE) + vmovdqu b, 1*SHA256_DIGEST_ROW_SIZE(STATE) + vmovdqu c, 2*SHA256_DIGEST_ROW_SIZE(STATE) + vmovdqu d, 3*SHA256_DIGEST_ROW_SIZE(STATE) + vmovdqu e, 4*SHA256_DIGEST_ROW_SIZE(STATE) + vmovdqu f, 5*SHA256_DIGEST_ROW_SIZE(STATE) + vmovdqu g, 6*SHA256_DIGEST_ROW_SIZE(STATE) + vmovdqu h, 7*SHA256_DIGEST_ROW_SIZE(STATE) + + # update input pointers + add IDX, inp0 + mov inp0, _args_data_ptr+0*8(STATE) + add IDX, inp1 + mov inp1, _args_data_ptr+1*8(STATE) + add IDX, inp2 + mov inp2, _args_data_ptr+2*8(STATE) + add IDX, inp3 + mov inp3, _args_data_ptr+3*8(STATE) + add IDX, inp4 + mov inp4, _args_data_ptr+4*8(STATE) + add IDX, inp5 + mov inp5, _args_data_ptr+5*8(STATE) + add IDX, inp6 + mov inp6, _args_data_ptr+6*8(STATE) + add IDX, inp7 + mov inp7, _args_data_ptr+7*8(STATE) + + # Postamble + mov _rsp(%rsp), %rsp + + # restore callee-saved clobbered registers + pop %r15 + pop %r14 + pop %r13 + pop %r12 + + ret +ENDPROC(sha256_x8_avx2) +.data +.align 64 +K256_8: + .octa 0x428a2f98428a2f98428a2f98428a2f98 + .octa 0x428a2f98428a2f98428a2f98428a2f98 + .octa 0x71374491713744917137449171374491 + .octa 0x71374491713744917137449171374491 + .octa 0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf + .octa 0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf + .octa 0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5 + .octa 0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5 + .octa 0x3956c25b3956c25b3956c25b3956c25b + .octa 0x3956c25b3956c25b3956c25b3956c25b + .octa 0x59f111f159f111f159f111f159f111f1 + .octa 0x59f111f159f111f159f111f159f111f1 + .octa 0x923f82a4923f82a4923f82a4923f82a4 + .octa 0x923f82a4923f82a4923f82a4923f82a4 + .octa 0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5 + .octa 0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5 + .octa 0xd807aa98d807aa98d807aa98d807aa98 + .octa 0xd807aa98d807aa98d807aa98d807aa98 + .octa 0x12835b0112835b0112835b0112835b01 + .octa 0x12835b0112835b0112835b0112835b01 + .octa 0x243185be243185be243185be243185be + .octa 0x243185be243185be243185be243185be + .octa 0x550c7dc3550c7dc3550c7dc3550c7dc3 + .octa 0x550c7dc3550c7dc3550c7dc3550c7dc3 + .octa 0x72be5d7472be5d7472be5d7472be5d74 + .octa 0x72be5d7472be5d7472be5d7472be5d74 + .octa 0x80deb1fe80deb1fe80deb1fe80deb1fe + .octa 0x80deb1fe80deb1fe80deb1fe80deb1fe + .octa 0x9bdc06a79bdc06a79bdc06a79bdc06a7 + .octa 0x9bdc06a79bdc06a79bdc06a79bdc06a7 + .octa 0xc19bf174c19bf174c19bf174c19bf174 + .octa 0xc19bf174c19bf174c19bf174c19bf174 + .octa 0xe49b69c1e49b69c1e49b69c1e49b69c1 + .octa 0xe49b69c1e49b69c1e49b69c1e49b69c1 + .octa 0xefbe4786efbe4786efbe4786efbe4786 + .octa 0xefbe4786efbe4786efbe4786efbe4786 + .octa 0x0fc19dc60fc19dc60fc19dc60fc19dc6 + .octa 0x0fc19dc60fc19dc60fc19dc60fc19dc6 + .octa 0x240ca1cc240ca1cc240ca1cc240ca1cc + .octa 0x240ca1cc240ca1cc240ca1cc240ca1cc + .octa 0x2de92c6f2de92c6f2de92c6f2de92c6f + .octa 0x2de92c6f2de92c6f2de92c6f2de92c6f + .octa 0x4a7484aa4a7484aa4a7484aa4a7484aa + .octa 0x4a7484aa4a7484aa4a7484aa4a7484aa + .octa 0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc + .octa 0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc + .octa 0x76f988da76f988da76f988da76f988da + .octa 0x76f988da76f988da76f988da76f988da + .octa 0x983e5152983e5152983e5152983e5152 + .octa 0x983e5152983e5152983e5152983e5152 + .octa 0xa831c66da831c66da831c66da831c66d + .octa 0xa831c66da831c66da831c66da831c66d + .octa 0xb00327c8b00327c8b00327c8b00327c8 + .octa 0xb00327c8b00327c8b00327c8b00327c8 + .octa 0xbf597fc7bf597fc7bf597fc7bf597fc7 + .octa 0xbf597fc7bf597fc7bf597fc7bf597fc7 + .octa 0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3 + .octa 0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3 + .octa 0xd5a79147d5a79147d5a79147d5a79147 + .octa 0xd5a79147d5a79147d5a79147d5a79147 + .octa 0x06ca635106ca635106ca635106ca6351 + .octa 0x06ca635106ca635106ca635106ca6351 + .octa 0x14292967142929671429296714292967 + .octa 0x14292967142929671429296714292967 + .octa 0x27b70a8527b70a8527b70a8527b70a85 + .octa 0x27b70a8527b70a8527b70a8527b70a85 + .octa 0x2e1b21382e1b21382e1b21382e1b2138 + .octa 0x2e1b21382e1b21382e1b21382e1b2138 + .octa 0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc + .octa 0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc + .octa 0x53380d1353380d1353380d1353380d13 + .octa 0x53380d1353380d1353380d1353380d13 + .octa 0x650a7354650a7354650a7354650a7354 + .octa 0x650a7354650a7354650a7354650a7354 + .octa 0x766a0abb766a0abb766a0abb766a0abb + .octa 0x766a0abb766a0abb766a0abb766a0abb + .octa 0x81c2c92e81c2c92e81c2c92e81c2c92e + .octa 0x81c2c92e81c2c92e81c2c92e81c2c92e + .octa 0x92722c8592722c8592722c8592722c85 + .octa 0x92722c8592722c8592722c8592722c85 + .octa 0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1 + .octa 0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1 + .octa 0xa81a664ba81a664ba81a664ba81a664b + .octa 0xa81a664ba81a664ba81a664ba81a664b + .octa 0xc24b8b70c24b8b70c24b8b70c24b8b70 + .octa 0xc24b8b70c24b8b70c24b8b70c24b8b70 + .octa 0xc76c51a3c76c51a3c76c51a3c76c51a3 + .octa 0xc76c51a3c76c51a3c76c51a3c76c51a3 + .octa 0xd192e819d192e819d192e819d192e819 + .octa 0xd192e819d192e819d192e819d192e819 + .octa 0xd6990624d6990624d6990624d6990624 + .octa 0xd6990624d6990624d6990624d6990624 + .octa 0xf40e3585f40e3585f40e3585f40e3585 + .octa 0xf40e3585f40e3585f40e3585f40e3585 + .octa 0x106aa070106aa070106aa070106aa070 + .octa 0x106aa070106aa070106aa070106aa070 + .octa 0x19a4c11619a4c11619a4c11619a4c116 + .octa 0x19a4c11619a4c11619a4c11619a4c116 + .octa 0x1e376c081e376c081e376c081e376c08 + .octa 0x1e376c081e376c081e376c081e376c08 + .octa 0x2748774c2748774c2748774c2748774c + .octa 0x2748774c2748774c2748774c2748774c + .octa 0x34b0bcb534b0bcb534b0bcb534b0bcb5 + .octa 0x34b0bcb534b0bcb534b0bcb534b0bcb5 + .octa 0x391c0cb3391c0cb3391c0cb3391c0cb3 + .octa 0x391c0cb3391c0cb3391c0cb3391c0cb3 + .octa 0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a + .octa 0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a + .octa 0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f + .octa 0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f + .octa 0x682e6ff3682e6ff3682e6ff3682e6ff3 + .octa 0x682e6ff3682e6ff3682e6ff3682e6ff3 + .octa 0x748f82ee748f82ee748f82ee748f82ee + .octa 0x748f82ee748f82ee748f82ee748f82ee + .octa 0x78a5636f78a5636f78a5636f78a5636f + .octa 0x78a5636f78a5636f78a5636f78a5636f + .octa 0x84c8781484c8781484c8781484c87814 + .octa 0x84c8781484c8781484c8781484c87814 + .octa 0x8cc702088cc702088cc702088cc70208 + .octa 0x8cc702088cc702088cc702088cc70208 + .octa 0x90befffa90befffa90befffa90befffa + .octa 0x90befffa90befffa90befffa90befffa + .octa 0xa4506ceba4506ceba4506ceba4506ceb + .octa 0xa4506ceba4506ceba4506ceba4506ceb + .octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7 + .octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7 + .octa 0xc67178f2c67178f2c67178f2c67178f2 + .octa 0xc67178f2c67178f2c67178f2c67178f2 +PSHUFFLE_BYTE_FLIP_MASK: +.octa 0x0c0d0e0f08090a0b0405060700010203 +.octa 0x0c0d0e0f08090a0b0405060700010203 + +.align 64 +.global K256 +K256: + .int 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .int 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .int 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .int 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .int 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .int 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .int 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .int 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .int 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .int 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .int 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .int 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .int 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .int 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .int 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .int 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 From 087bcd225c5656a0beac02739471085d000c9680 Mon Sep 17 00:00:00 2001 From: Megha Dey Date: Thu, 23 Jun 2016 18:40:47 -0700 Subject: [PATCH 094/180] crypto: tcrypt - Add speed tests for SHA multibuffer algorithms The existing test suite to calculate the speed of the SHA algorithms assumes serial (single buffer)) computation of data. With the SHA multibuffer algorithms, we work on 8 lanes of data in parallel. Hence, the need to introduce a new test suite to calculate the speed for these algorithms. Signed-off-by: Megha Dey Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 4675459e82da7..6ef78157a0ab0 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -578,6 +578,117 @@ static inline int do_one_ahash_op(struct ahash_request *req, int ret) return ret; } +char ptext[4096]; +struct scatterlist sg[8][8]; +char result[8][64]; +struct ahash_request *req[8]; +struct tcrypt_result tresult[8]; +char *xbuf[8][XBUFSIZE]; +cycles_t start[8], end[8], mid; + +static void test_mb_ahash_speed(const char *algo, unsigned int sec, + struct hash_speed *speed) +{ + unsigned int i, j, k; + void *hash_buff; + int ret = -ENOMEM; + struct crypto_ahash *tfm; + + tfm = crypto_alloc_ahash(algo, 0, 0); + if (IS_ERR(tfm)) { + pr_err("failed to load transform for %s: %ld\n", + algo, PTR_ERR(tfm)); + return; + } + for (i = 0; i < 8; ++i) { + if (testmgr_alloc_buf(xbuf[i])) + goto out_nobuf; + + init_completion(&tresult[i].completion); + + req[i] = ahash_request_alloc(tfm, GFP_KERNEL); + if (!req[i]) { + printk(KERN_ERR "alg: hash: Failed to allocate " + "request for %s\n", algo); + goto out_noreq; + } + ahash_request_set_callback(req[i], CRYPTO_TFM_REQ_MAY_BACKLOG, + tcrypt_complete, &tresult[i]); + + hash_buff = xbuf[i][0]; + memcpy(hash_buff, ptext, 4096); + } + + j = 0; + + printk(KERN_INFO "\ntesting speed of %s (%s)\n", algo, + get_driver_name(crypto_ahash, tfm)); + + for (i = 0; speed[i].blen != 0; i++) { + if (speed[i].blen > TVMEMSIZE * PAGE_SIZE) { + printk(KERN_ERR + "template (%u) too big for tvmem (%lu)\n", + speed[i].blen, TVMEMSIZE * PAGE_SIZE); + goto out; + } + + if (speed[i].klen) + crypto_ahash_setkey(tfm, tvmem[0], speed[i].klen); + + for (k = 0; k < 8; ++k) { + sg_init_one(&sg[k][0], (void *) xbuf[k][0], + speed[i].blen); + ahash_request_set_crypt(req[k], sg[k], + result[k], speed[i].blen); + } + + printk(KERN_INFO "test%3u " + "(%5u byte blocks,%5u bytes per update,%4u updates): ", + i, speed[i].blen, speed[i].plen, + speed[i].blen / speed[i].plen); + + for (k = 0; k < 8; ++k) { + start[k] = get_cycles(); + ret = crypto_ahash_digest(req[k]); + if (ret == -EBUSY || ret == -EINPROGRESS) + continue; + if (ret) { + printk(KERN_ERR + "alg (%s) something wrong, ret = %d ...\n", + algo, ret); + goto out; + } + } + mid = get_cycles(); + + for (k = 0; k < 8; ++k) { + struct tcrypt_result *tr = &tresult[k]; + + ret = wait_for_completion_interruptible + (&tr->completion); + if (ret) + printk(KERN_ERR + "alg(%s): hash: digest failed\n", algo); + end[k] = get_cycles(); + } + + printk("\nBlock: %lld cycles (%lld cycles/byte), %d bytes\n", + (s64) (end[7]-start[0])/1, + (s64) (end[7]-start[0])/(8*speed[i].blen), + 8*speed[i].blen); + } + ret = 0; + +out: + for (k = 0; k < 8; ++k) + ahash_request_free(req[k]); +out_noreq: + for (k = 0; k < 8; ++k) + testmgr_free_buf(xbuf[k]); +out_nobuf: + return; +} + static int test_ahash_jiffies_digest(struct ahash_request *req, int blen, char *out, int secs) { @@ -1820,6 +1931,13 @@ static int do_test(const char *alg, u32 type, u32 mask, int m) test_ahash_speed("sha3-512", sec, generic_hash_speed_template); if (mode > 400 && mode < 500) break; + case 422: + test_mb_ahash_speed("sha1", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + + case 423: + test_mb_ahash_speed("sha256", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; case 499: break; From 4c79f6f81a23052e5d1f13a3e6e9aaf915621430 Mon Sep 17 00:00:00 2001 From: Megha Dey Date: Thu, 23 Jun 2016 18:40:48 -0700 Subject: [PATCH 095/180] crypto: sha1-mb - rename sha-mb to sha1-mb Until now, there was only support for the SHA1 multibuffer algorithm. Hence, there was just one sha-mb folder. Now, with the introduction of the SHA256 multi-buffer algorithm , it is logical to name the existing folder as sha1-mb. Signed-off-by: Megha Dey Reviewed-by: Fenghua Yu Reviewed-by: Tim Chen Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 2 +- arch/x86/crypto/{sha-mb => sha1-mb}/Makefile | 0 arch/x86/crypto/{sha-mb => sha1-mb}/sha1_mb.c | 2 +- arch/x86/crypto/{sha-mb/sha_mb_ctx.h => sha1-mb/sha1_mb_ctx.h} | 2 +- arch/x86/crypto/{sha-mb/sha_mb_mgr.h => sha1-mb/sha1_mb_mgr.h} | 0 arch/x86/crypto/{sha-mb => sha1-mb}/sha1_mb_mgr_datastruct.S | 0 arch/x86/crypto/{sha-mb => sha1-mb}/sha1_mb_mgr_flush_avx2.S | 0 arch/x86/crypto/{sha-mb => sha1-mb}/sha1_mb_mgr_init_avx2.c | 2 +- arch/x86/crypto/{sha-mb => sha1-mb}/sha1_mb_mgr_submit_avx2.S | 0 arch/x86/crypto/{sha-mb => sha1-mb}/sha1_x8_avx2.S | 0 10 files changed, 4 insertions(+), 4 deletions(-) rename arch/x86/crypto/{sha-mb => sha1-mb}/Makefile (100%) rename arch/x86/crypto/{sha-mb => sha1-mb}/sha1_mb.c (99%) rename arch/x86/crypto/{sha-mb/sha_mb_ctx.h => sha1-mb/sha1_mb_ctx.h} (99%) rename arch/x86/crypto/{sha-mb/sha_mb_mgr.h => sha1-mb/sha1_mb_mgr.h} (100%) rename arch/x86/crypto/{sha-mb => sha1-mb}/sha1_mb_mgr_datastruct.S (100%) rename arch/x86/crypto/{sha-mb => sha1-mb}/sha1_mb_mgr_flush_avx2.S (100%) rename arch/x86/crypto/{sha-mb => sha1-mb}/sha1_mb_mgr_init_avx2.c (99%) rename arch/x86/crypto/{sha-mb => sha1-mb}/sha1_mb_mgr_submit_avx2.S (100%) rename arch/x86/crypto/{sha-mb => sha1-mb}/sha1_x8_avx2.S (100%) diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index aafff22b0ea61..52c8685239214 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -49,7 +49,7 @@ endif ifeq ($(avx2_supported),yes) obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o - obj-$(CONFIG_CRYPTO_SHA1_MB) += sha-mb/ + obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb/ obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb/ endif diff --git a/arch/x86/crypto/sha-mb/Makefile b/arch/x86/crypto/sha1-mb/Makefile similarity index 100% rename from arch/x86/crypto/sha-mb/Makefile rename to arch/x86/crypto/sha1-mb/Makefile diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha1-mb/sha1_mb.c similarity index 99% rename from arch/x86/crypto/sha-mb/sha1_mb.c rename to arch/x86/crypto/sha1-mb/sha1_mb.c index 669cc37268e1c..561b2867b8788 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb.c +++ b/arch/x86/crypto/sha1-mb/sha1_mb.c @@ -67,7 +67,7 @@ #include #include #include -#include "sha_mb_ctx.h" +#include "sha1_mb_ctx.h" #define FLUSH_INTERVAL 1000 /* in usec */ diff --git a/arch/x86/crypto/sha-mb/sha_mb_ctx.h b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h similarity index 99% rename from arch/x86/crypto/sha-mb/sha_mb_ctx.h rename to arch/x86/crypto/sha1-mb/sha1_mb_ctx.h index e36069d0c1bdb..98a35bcc6f4ad 100644 --- a/arch/x86/crypto/sha-mb/sha_mb_ctx.h +++ b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h @@ -54,7 +54,7 @@ #ifndef _SHA_MB_CTX_INTERNAL_H #define _SHA_MB_CTX_INTERNAL_H -#include "sha_mb_mgr.h" +#include "sha1_mb_mgr.h" #define HASH_UPDATE 0x00 #define HASH_FIRST 0x01 diff --git a/arch/x86/crypto/sha-mb/sha_mb_mgr.h b/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h similarity index 100% rename from arch/x86/crypto/sha-mb/sha_mb_mgr.h rename to arch/x86/crypto/sha1-mb/sha1_mb_mgr.h diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_datastruct.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S similarity index 100% rename from arch/x86/crypto/sha-mb/sha1_mb_mgr_datastruct.S rename to arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S similarity index 100% rename from arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S rename to arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c similarity index 99% rename from arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c rename to arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c index 822acb5b464c2..d2add0d35f43b 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c +++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c @@ -51,7 +51,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "sha_mb_mgr.h" +#include "sha1_mb_mgr.h" void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state) { diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S similarity index 100% rename from arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S rename to arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S diff --git a/arch/x86/crypto/sha-mb/sha1_x8_avx2.S b/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S similarity index 100% rename from arch/x86/crypto/sha-mb/sha1_x8_avx2.S rename to arch/x86/crypto/sha1-mb/sha1_x8_avx2.S From 90ab5a814a74dfc4981806ed68fad64ebc98e2d2 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Fri, 24 Jun 2016 07:01:33 -0700 Subject: [PATCH 096/180] MAINTAINERS: update maintainer for qat Add Giovanni and Salvatore who will take over the qat maintenance. Signed-off-by: Tadeusz Struk Signed-off-by: Herbert Xu --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index f4a9c13208edd..5d813a37f6d79 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9292,7 +9292,8 @@ L: rtc-linux@googlegroups.com S: Maintained QAT DRIVER -M: Tadeusz Struk +M: Giovanni Cabiddu +M: Salvatore Benedetto L: qat-linux@intel.com S: Supported F: drivers/crypto/qat/ From 61dc0a446e5d08f2de8a24b45f69a1e302bb1b1b Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Fri, 24 Jun 2016 11:50:39 -0500 Subject: [PATCH 097/180] hwrng: omap - Fix assumption that runtime_get_sync will always succeed pm_runtime_get_sync does return a error value that must be checked for error conditions, else, due to various reasons, the device maynot be enabled and the system will crash due to lack of clock to the hardware module. Before: 12.562784] [00000000] *pgd=fe193835 12.562792] Internal error: : 1406 [#1] SMP ARM [...] 12.562864] CPU: 1 PID: 241 Comm: modprobe Not tainted 4.7.0-rc4-next-20160624 #2 12.562867] Hardware name: Generic DRA74X (Flattened Device Tree) 12.562872] task: ed51f140 ti: ed44c000 task.ti: ed44c000 12.562886] PC is at omap4_rng_init+0x20/0x84 [omap_rng] 12.562899] LR is at set_current_rng+0xc0/0x154 [rng_core] [...] After the proper checks: [ 94.366705] omap_rng 48090000.rng: _od_fail_runtime_resume: FIXME: missing hwmod/omap_dev info [ 94.375767] omap_rng 48090000.rng: Failed to runtime_get device -19 [ 94.382351] omap_rng 48090000.rng: initialization failed. Fixes: 665d92fa85b5 ("hwrng: OMAP: convert to use runtime PM") Cc: Paul Walmsley Signed-off-by: Nishanth Menon Signed-off-by: Herbert Xu --- drivers/char/hw_random/omap-rng.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c index 8a1432e8bb800..01d4be2c354b0 100644 --- a/drivers/char/hw_random/omap-rng.c +++ b/drivers/char/hw_random/omap-rng.c @@ -384,7 +384,12 @@ static int omap_rng_probe(struct platform_device *pdev) } pm_runtime_enable(&pdev->dev); - pm_runtime_get_sync(&pdev->dev); + ret = pm_runtime_get_sync(&pdev->dev); + if (ret) { + dev_err(&pdev->dev, "Failed to runtime_get device: %d\n", ret); + pm_runtime_put_noidle(&pdev->dev); + goto err_ioremap; + } ret = (dev->of_node) ? of_get_omap_rng_device_details(priv, pdev) : get_omap_rng_device_details(priv); @@ -435,8 +440,15 @@ static int __maybe_unused omap_rng_suspend(struct device *dev) static int __maybe_unused omap_rng_resume(struct device *dev) { struct omap_rng_dev *priv = dev_get_drvdata(dev); + int ret; + + ret = pm_runtime_get_sync(dev); + if (ret) { + dev_err(dev, "Failed to runtime_get device: %d\n", ret); + pm_runtime_put_noidle(dev); + return ret; + } - pm_runtime_get_sync(dev); priv->pdata->init(priv); return 0; From 9ac1c3200c79e7b83b881232f40bc9e34b040516 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 27 Jun 2016 11:17:40 +0200 Subject: [PATCH 098/180] crypto: ux500 - do not build with -O0 The ARM allmodconfig build currently warngs because of the ux500 crypto driver not working well with the jump label implementation that we started using for dynamic debug, which breaks building with 'gcc -O0': In file included from /git/arm-soc/include/linux/jump_label.h:105:0, from /git/arm-soc/include/linux/dynamic_debug.h:5, from /git/arm-soc/include/linux/printk.h:289, from /git/arm-soc/include/linux/kernel.h:13, from /git/arm-soc/include/linux/clk.h:16, from /git/arm-soc/drivers/crypto/ux500/hash/hash_core.c:16: /git/arm-soc/arch/arm/include/asm/jump_label.h: In function 'hash_set_dma_transfer': /git/arm-soc/arch/arm/include/asm/jump_label.h:13:7: error: asm operand 0 probably doesn't match constraints [-Werror] asm_volatile_goto("1:\n\t" Turning off compiler optimizations has never really been supported here, and it's only used when debugging the driver. I have not found a good reason for doing this here, other than a misguided attempt to produce more readable assembly output. Also, the driver is only used in obsolete hardware that almost certainly nobody will spend time debugging any more. This just removes the -O0 flag from the compiler options. Signed-off-by: Arnd Bergmann Signed-off-by: Herbert Xu --- drivers/crypto/ux500/cryp/Makefile | 6 +++--- drivers/crypto/ux500/hash/Makefile | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/ux500/cryp/Makefile b/drivers/crypto/ux500/cryp/Makefile index e5d362a6f680e..b497ae3dde076 100644 --- a/drivers/crypto/ux500/cryp/Makefile +++ b/drivers/crypto/ux500/cryp/Makefile @@ -4,9 +4,9 @@ # * License terms: GNU General Public License (GPL) version 2 */ ifdef CONFIG_CRYPTO_DEV_UX500_DEBUG -CFLAGS_cryp_core.o := -DDEBUG -O0 -CFLAGS_cryp.o := -DDEBUG -O0 -CFLAGS_cryp_irq.o := -DDEBUG -O0 +CFLAGS_cryp_core.o := -DDEBUG +CFLAGS_cryp.o := -DDEBUG +CFLAGS_cryp_irq.o := -DDEBUG endif obj-$(CONFIG_CRYPTO_DEV_UX500_CRYP) += ux500_cryp.o diff --git a/drivers/crypto/ux500/hash/Makefile b/drivers/crypto/ux500/hash/Makefile index b2f90d9bac722..784d9c0a88531 100644 --- a/drivers/crypto/ux500/hash/Makefile +++ b/drivers/crypto/ux500/hash/Makefile @@ -4,7 +4,7 @@ # License terms: GNU General Public License (GPL) version 2 # ifdef CONFIG_CRYPTO_DEV_UX500_DEBUG -CFLAGS_hash_core.o := -DDEBUG -O0 +CFLAGS_hash_core.o := -DDEBUG endif obj-$(CONFIG_CRYPTO_DEV_UX500_HASH) += ux500_hash.o From 8c603ff28659e65fdae960cd3f952ec168fc773a Mon Sep 17 00:00:00 2001 From: Megha Dey Date: Mon, 27 Jun 2016 10:20:04 -0700 Subject: [PATCH 099/180] crypto: sha512-mb - SHA512 multibuffer job manager and glue code This patch introduces the multi-buffer job manager which is responsible for submitting scatter-gather buffers from several SHA512 jobs to the multi-buffer algorithm. It also contains the flush routine that's called by the crypto daemon to complete the job when no new jobs arrive before the deadline of maximum latency of a SHA512 crypto job. The SHA512 multi-buffer crypto algorithm is defined and initialized in this patch. Signed-off-by: Megha Dey Reviewed-by: Fenghua Yu Reviewed-by: Tim Chen Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 1 + arch/x86/crypto/sha512-mb/Makefile | 11 + arch/x86/crypto/sha512-mb/sha512_mb.c | 1043 +++++++++++++++++++++++++ 3 files changed, 1055 insertions(+) create mode 100644 arch/x86/crypto/sha512-mb/Makefile create mode 100644 arch/x86/crypto/sha512-mb/sha512_mb.c diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 52c8685239214..34b3fa2889d1f 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -51,6 +51,7 @@ ifeq ($(avx2_supported),yes) obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb/ obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb/ + obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb/ endif aes-i586-y := aes-i586-asm_32.o aes_glue.o diff --git a/arch/x86/crypto/sha512-mb/Makefile b/arch/x86/crypto/sha512-mb/Makefile new file mode 100644 index 0000000000000..0a57e21039809 --- /dev/null +++ b/arch/x86/crypto/sha512-mb/Makefile @@ -0,0 +1,11 @@ +# +# Arch-specific CryptoAPI modules. +# + +avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ + $(comma)4)$(comma)%ymm2,yes,no) +ifeq ($(avx2_supported),yes) + obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb.o + sha512-mb-y := sha512_mb.o sha512_mb_mgr_flush_avx2.o \ + sha512_mb_mgr_init_avx2.o sha512_mb_mgr_submit_avx2.o sha512_x4_avx2.o +endif diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c new file mode 100644 index 0000000000000..676f0f272f2b5 --- /dev/null +++ b/arch/x86/crypto/sha512-mb/sha512_mb.c @@ -0,0 +1,1043 @@ +/* + * Multi buffer SHA512 algorithm Glue Code + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Megha Dey + * + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sha512_mb_ctx.h" + +#define FLUSH_INTERVAL 1000 /* in usec */ + +static struct mcryptd_alg_state sha512_mb_alg_state; + +struct sha512_mb_ctx { + struct mcryptd_ahash *mcryptd_tfm; +}; + +static inline struct mcryptd_hash_request_ctx + *cast_hash_to_mcryptd_ctx(struct sha512_hash_ctx *hash_ctx) +{ + struct ahash_request *areq; + + areq = container_of((void *) hash_ctx, struct ahash_request, __ctx); + return container_of(areq, struct mcryptd_hash_request_ctx, areq); +} + +static inline struct ahash_request + *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx) +{ + return container_of((void *) ctx, struct ahash_request, __ctx); +} + +static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx, + struct ahash_request *areq) +{ + rctx->flag = HASH_UPDATE; +} + +static asmlinkage void (*sha512_job_mgr_init)(struct sha512_mb_mgr *state); +static asmlinkage struct job_sha512* (*sha512_job_mgr_submit) + (struct sha512_mb_mgr *state, + struct job_sha512 *job); +static asmlinkage struct job_sha512* (*sha512_job_mgr_flush) + (struct sha512_mb_mgr *state); +static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job) + (struct sha512_mb_mgr *state); + +inline void sha512_init_digest(uint64_t *digest) +{ + static const uint64_t initial_digest[SHA512_DIGEST_LENGTH] = { + SHA512_H0, SHA512_H1, SHA512_H2, + SHA512_H3, SHA512_H4, SHA512_H5, + SHA512_H6, SHA512_H7 }; + memcpy(digest, initial_digest, sizeof(initial_digest)); +} + +inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2], + uint32_t total_len) +{ + uint32_t i = total_len & (SHA512_BLOCK_SIZE - 1); + + memset(&padblock[i], 0, SHA512_BLOCK_SIZE); + padblock[i] = 0x80; + + i += ((SHA512_BLOCK_SIZE - 1) & + (0 - (total_len + SHA512_PADLENGTHFIELD_SIZE + 1))) + + 1 + SHA512_PADLENGTHFIELD_SIZE; + +#if SHA512_PADLENGTHFIELD_SIZE == 16 + *((uint64_t *) &padblock[i - 16]) = 0; +#endif + + *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3); + + /* Number of extra blocks to hash */ + return i >> SHA512_LOG2_BLOCK_SIZE; +} + +static struct sha512_hash_ctx *sha512_ctx_mgr_resubmit + (struct sha512_ctx_mgr *mgr, struct sha512_hash_ctx *ctx) +{ + while (ctx) { + if (ctx->status & HASH_CTX_STS_COMPLETE) { + /* Clear PROCESSING bit */ + ctx->status = HASH_CTX_STS_COMPLETE; + return ctx; + } + + /* + * If the extra blocks are empty, begin hashing what remains + * in the user's buffer. + */ + if (ctx->partial_block_buffer_length == 0 && + ctx->incoming_buffer_length) { + + const void *buffer = ctx->incoming_buffer; + uint32_t len = ctx->incoming_buffer_length; + uint32_t copy_len; + + /* + * Only entire blocks can be hashed. + * Copy remainder to extra blocks buffer. + */ + copy_len = len & (SHA512_BLOCK_SIZE-1); + + if (copy_len) { + len -= copy_len; + memcpy(ctx->partial_block_buffer, + ((const char *) buffer + len), + copy_len); + ctx->partial_block_buffer_length = copy_len; + } + + ctx->incoming_buffer_length = 0; + + /* len should be a multiple of the block size now */ + assert((len % SHA512_BLOCK_SIZE) == 0); + + /* Set len to the number of blocks to be hashed */ + len >>= SHA512_LOG2_BLOCK_SIZE; + + if (len) { + + ctx->job.buffer = (uint8_t *) buffer; + ctx->job.len = len; + ctx = (struct sha512_hash_ctx *) + sha512_job_mgr_submit(&mgr->mgr, + &ctx->job); + continue; + } + } + + /* + * If the extra blocks are not empty, then we are + * either on the last block(s) or we need more + * user input before continuing. + */ + if (ctx->status & HASH_CTX_STS_LAST) { + + uint8_t *buf = ctx->partial_block_buffer; + uint32_t n_extra_blocks = + sha512_pad(buf, ctx->total_length); + + ctx->status = (HASH_CTX_STS_PROCESSING | + HASH_CTX_STS_COMPLETE); + ctx->job.buffer = buf; + ctx->job.len = (uint32_t) n_extra_blocks; + ctx = (struct sha512_hash_ctx *) + sha512_job_mgr_submit(&mgr->mgr, &ctx->job); + continue; + } + + if (ctx) + ctx->status = HASH_CTX_STS_IDLE; + return ctx; + } + + return NULL; +} + +static struct sha512_hash_ctx + *sha512_ctx_mgr_get_comp_ctx(struct sha512_ctx_mgr *mgr) +{ + /* + * If get_comp_job returns NULL, there are no jobs complete. + * If get_comp_job returns a job, verify that it is safe to return to + * the user. + * If it is not ready, resubmit the job to finish processing. + * If sha512_ctx_mgr_resubmit returned a job, it is ready to be + * returned. + * Otherwise, all jobs currently being managed by the hash_ctx_mgr + * still need processing. + */ + struct sha512_hash_ctx *ctx; + + ctx = (struct sha512_hash_ctx *) + sha512_job_mgr_get_comp_job(&mgr->mgr); + return sha512_ctx_mgr_resubmit(mgr, ctx); +} + +static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr) +{ + sha512_job_mgr_init(&mgr->mgr); +} + +static struct sha512_hash_ctx + *sha512_ctx_mgr_submit(struct sha512_ctx_mgr *mgr, + struct sha512_hash_ctx *ctx, + const void *buffer, + uint32_t len, + int flags) +{ + if (flags & (~HASH_ENTIRE)) { + /* User should not pass anything other than FIRST, UPDATE, or + * LAST + */ + ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; + return ctx; + } + + if (ctx->status & HASH_CTX_STS_PROCESSING) { + /* Cannot submit to a currently processing job. */ + ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING; + return ctx; + } + + if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) { + /* Cannot update a finished job. */ + ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; + return ctx; + } + + + if (flags & HASH_FIRST) { + /* Init digest */ + sha512_init_digest(ctx->job.result_digest); + + /* Reset byte counter */ + ctx->total_length = 0; + + /* Clear extra blocks */ + ctx->partial_block_buffer_length = 0; + } + + /* If we made it here, there were no errors during this call to + * submit + */ + ctx->error = HASH_CTX_ERROR_NONE; + + /* Store buffer ptr info from user */ + ctx->incoming_buffer = buffer; + ctx->incoming_buffer_length = len; + + /* Store the user's request flags and mark this ctx as currently being + * processed. + */ + ctx->status = (flags & HASH_LAST) ? + (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) : + HASH_CTX_STS_PROCESSING; + + /* Advance byte counter */ + ctx->total_length += len; + + /* + * If there is anything currently buffered in the extra blocks, + * append to it until it contains a whole block. + * Or if the user's buffer contains less than a whole block, + * append as much as possible to the extra block. + */ + if ((ctx->partial_block_buffer_length) | (len < SHA512_BLOCK_SIZE)) { + /* Compute how many bytes to copy from user buffer into extra + * block + */ + uint32_t copy_len = SHA512_BLOCK_SIZE - + ctx->partial_block_buffer_length; + if (len < copy_len) + copy_len = len; + + if (copy_len) { + /* Copy and update relevant pointers and counters */ + memcpy + (&ctx->partial_block_buffer[ctx->partial_block_buffer_length], + buffer, copy_len); + + ctx->partial_block_buffer_length += copy_len; + ctx->incoming_buffer = (const void *) + ((const char *)buffer + copy_len); + ctx->incoming_buffer_length = len - copy_len; + } + + /* The extra block should never contain more than 1 block + * here + */ + assert(ctx->partial_block_buffer_length <= SHA512_BLOCK_SIZE); + + /* If the extra block buffer contains exactly 1 block, it can + * be hashed. + */ + if (ctx->partial_block_buffer_length >= SHA512_BLOCK_SIZE) { + ctx->partial_block_buffer_length = 0; + + ctx->job.buffer = ctx->partial_block_buffer; + ctx->job.len = 1; + ctx = (struct sha512_hash_ctx *) + sha512_job_mgr_submit(&mgr->mgr, &ctx->job); + } + } + + return sha512_ctx_mgr_resubmit(mgr, ctx); +} + +static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct sha512_ctx_mgr *mgr) +{ + struct sha512_hash_ctx *ctx; + + while (1) { + ctx = (struct sha512_hash_ctx *) + sha512_job_mgr_flush(&mgr->mgr); + + /* If flush returned 0, there are no more jobs in flight. */ + if (!ctx) + return NULL; + + /* + * If flush returned a job, resubmit the job to finish + * processing. + */ + ctx = sha512_ctx_mgr_resubmit(mgr, ctx); + + /* + * If sha512_ctx_mgr_resubmit returned a job, it is ready to + * be returned. Otherwise, all jobs currently being managed by + * the sha512_ctx_mgr still need processing. Loop. + */ + if (ctx) + return ctx; + } +} + +static int sha512_mb_init(struct ahash_request *areq) +{ + struct sha512_hash_ctx *sctx = ahash_request_ctx(areq); + + hash_ctx_init(sctx); + sctx->job.result_digest[0] = SHA512_H0; + sctx->job.result_digest[1] = SHA512_H1; + sctx->job.result_digest[2] = SHA512_H2; + sctx->job.result_digest[3] = SHA512_H3; + sctx->job.result_digest[4] = SHA512_H4; + sctx->job.result_digest[5] = SHA512_H5; + sctx->job.result_digest[6] = SHA512_H6; + sctx->job.result_digest[7] = SHA512_H7; + sctx->total_length = 0; + sctx->partial_block_buffer_length = 0; + sctx->status = HASH_CTX_STS_IDLE; + + return 0; +} + +static int sha512_mb_set_results(struct mcryptd_hash_request_ctx *rctx) +{ + int i; + struct sha512_hash_ctx *sctx = ahash_request_ctx(&rctx->areq); + __be64 *dst = (__be64 *) rctx->out; + + for (i = 0; i < 8; ++i) + dst[i] = cpu_to_be64(sctx->job.result_digest[i]); + + return 0; +} + +static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx, + struct mcryptd_alg_cstate *cstate, bool flush) +{ + int flag = HASH_UPDATE; + int nbytes, err = 0; + struct mcryptd_hash_request_ctx *rctx = *ret_rctx; + struct sha512_hash_ctx *sha_ctx; + + /* more work ? */ + while (!(rctx->flag & HASH_DONE)) { + nbytes = crypto_ahash_walk_done(&rctx->walk, 0); + if (nbytes < 0) { + err = nbytes; + goto out; + } + /* check if the walk is done */ + if (crypto_ahash_walk_last(&rctx->walk)) { + rctx->flag |= HASH_DONE; + if (rctx->flag & HASH_FINAL) + flag |= HASH_LAST; + + } + sha_ctx = (struct sha512_hash_ctx *) + ahash_request_ctx(&rctx->areq); + kernel_fpu_begin(); + sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, + rctx->walk.data, nbytes, flag); + if (!sha_ctx) { + if (flush) + sha_ctx = sha512_ctx_mgr_flush(cstate->mgr); + } + kernel_fpu_end(); + if (sha_ctx) + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + else { + rctx = NULL; + goto out; + } + } + + /* copy the results */ + if (rctx->flag & HASH_FINAL) + sha512_mb_set_results(rctx); + +out: + *ret_rctx = rctx; + return err; +} + +static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, + struct mcryptd_alg_cstate *cstate, + int err) +{ + struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); + struct sha512_hash_ctx *sha_ctx; + struct mcryptd_hash_request_ctx *req_ctx; + int ret; + + /* remove from work list */ + spin_lock(&cstate->work_lock); + list_del(&rctx->waiter); + spin_unlock(&cstate->work_lock); + + if (irqs_disabled()) + rctx->complete(&req->base, err); + else { + local_bh_disable(); + rctx->complete(&req->base, err); + local_bh_enable(); + } + + /* check to see if there are other jobs that are done */ + sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate->mgr); + while (sha_ctx) { + req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx); + ret = sha_finish_walk(&req_ctx, cstate, false); + if (req_ctx) { + spin_lock(&cstate->work_lock); + list_del(&req_ctx->waiter); + spin_unlock(&cstate->work_lock); + + req = cast_mcryptd_ctx_to_req(req_ctx); + if (irqs_disabled()) + rctx->complete(&req->base, ret); + else { + local_bh_disable(); + rctx->complete(&req->base, ret); + local_bh_enable(); + } + } + sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate->mgr); + } + + return 0; +} + +static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx, + struct mcryptd_alg_cstate *cstate) +{ + unsigned long next_flush; + unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL); + + /* initialize tag */ + rctx->tag.arrival = jiffies; /* tag the arrival time */ + rctx->tag.seq_num = cstate->next_seq_num++; + next_flush = rctx->tag.arrival + delay; + rctx->tag.expire = next_flush; + + spin_lock(&cstate->work_lock); + list_add_tail(&rctx->waiter, &cstate->work_list); + spin_unlock(&cstate->work_lock); + + mcryptd_arm_flusher(cstate, delay); +} + +static int sha512_mb_update(struct ahash_request *areq) +{ + struct mcryptd_hash_request_ctx *rctx = + container_of(areq, struct mcryptd_hash_request_ctx, + areq); + struct mcryptd_alg_cstate *cstate = + this_cpu_ptr(sha512_mb_alg_state.alg_cstate); + + struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); + struct sha512_hash_ctx *sha_ctx; + int ret = 0, nbytes; + + + /* sanity check */ + if (rctx->tag.cpu != smp_processor_id()) { + pr_err("mcryptd error: cpu clash\n"); + goto done; + } + + /* need to init context */ + req_ctx_init(rctx, areq); + + nbytes = crypto_ahash_walk_first(req, &rctx->walk); + + if (nbytes < 0) { + ret = nbytes; + goto done; + } + + if (crypto_ahash_walk_last(&rctx->walk)) + rctx->flag |= HASH_DONE; + + /* submit */ + sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq); + sha512_mb_add_list(rctx, cstate); + kernel_fpu_begin(); + sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, + nbytes, HASH_UPDATE); + kernel_fpu_end(); + + /* check if anything is returned */ + if (!sha_ctx) + return -EINPROGRESS; + + if (sha_ctx->error) { + ret = sha_ctx->error; + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + goto done; + } + + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + ret = sha_finish_walk(&rctx, cstate, false); + + if (!rctx) + return -EINPROGRESS; +done: + sha_complete_job(rctx, cstate, ret); + return ret; +} + +static int sha512_mb_finup(struct ahash_request *areq) +{ + struct mcryptd_hash_request_ctx *rctx = + container_of(areq, struct mcryptd_hash_request_ctx, + areq); + struct mcryptd_alg_cstate *cstate = + this_cpu_ptr(sha512_mb_alg_state.alg_cstate); + + struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); + struct sha512_hash_ctx *sha_ctx; + int ret = 0, flag = HASH_UPDATE, nbytes; + + /* sanity check */ + if (rctx->tag.cpu != smp_processor_id()) { + pr_err("mcryptd error: cpu clash\n"); + goto done; + } + + /* need to init context */ + req_ctx_init(rctx, areq); + + nbytes = crypto_ahash_walk_first(req, &rctx->walk); + + if (nbytes < 0) { + ret = nbytes; + goto done; + } + + if (crypto_ahash_walk_last(&rctx->walk)) { + rctx->flag |= HASH_DONE; + flag = HASH_LAST; + } + + /* submit */ + rctx->flag |= HASH_FINAL; + sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq); + sha512_mb_add_list(rctx, cstate); + + kernel_fpu_begin(); + sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, + nbytes, flag); + kernel_fpu_end(); + + /* check if anything is returned */ + if (!sha_ctx) + return -EINPROGRESS; + + if (sha_ctx->error) { + ret = sha_ctx->error; + goto done; + } + + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + ret = sha_finish_walk(&rctx, cstate, false); + if (!rctx) + return -EINPROGRESS; +done: + sha_complete_job(rctx, cstate, ret); + return ret; +} + +static int sha512_mb_final(struct ahash_request *areq) +{ + struct mcryptd_hash_request_ctx *rctx = + container_of(areq, struct mcryptd_hash_request_ctx, + areq); + struct mcryptd_alg_cstate *cstate = + this_cpu_ptr(sha512_mb_alg_state.alg_cstate); + + struct sha512_hash_ctx *sha_ctx; + int ret = 0; + u8 data; + + /* sanity check */ + if (rctx->tag.cpu != smp_processor_id()) { + pr_err("mcryptd error: cpu clash\n"); + goto done; + } + + /* need to init context */ + req_ctx_init(rctx, areq); + + rctx->flag |= HASH_DONE | HASH_FINAL; + + sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq); + /* flag HASH_FINAL and 0 data size */ + sha512_mb_add_list(rctx, cstate); + kernel_fpu_begin(); + sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0, + HASH_LAST); + kernel_fpu_end(); + + /* check if anything is returned */ + if (!sha_ctx) + return -EINPROGRESS; + + if (sha_ctx->error) { + ret = sha_ctx->error; + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + goto done; + } + + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + ret = sha_finish_walk(&rctx, cstate, false); + if (!rctx) + return -EINPROGRESS; +done: + sha_complete_job(rctx, cstate, ret); + return ret; +} + +static int sha512_mb_export(struct ahash_request *areq, void *out) +{ + struct sha512_hash_ctx *sctx = ahash_request_ctx(areq); + + memcpy(out, sctx, sizeof(*sctx)); + + return 0; +} + +static int sha512_mb_import(struct ahash_request *areq, const void *in) +{ + struct sha512_hash_ctx *sctx = ahash_request_ctx(areq); + + memcpy(sctx, in, sizeof(*sctx)); + + return 0; +} + +static int sha512_mb_async_init_tfm(struct crypto_tfm *tfm) +{ + struct mcryptd_ahash *mcryptd_tfm; + struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm); + struct mcryptd_hash_ctx *mctx; + + mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha512-mb", + CRYPTO_ALG_INTERNAL, + CRYPTO_ALG_INTERNAL); + if (IS_ERR(mcryptd_tfm)) + return PTR_ERR(mcryptd_tfm); + mctx = crypto_ahash_ctx(&mcryptd_tfm->base); + mctx->alg_state = &sha512_mb_alg_state; + ctx->mcryptd_tfm = mcryptd_tfm; + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct ahash_request) + + crypto_ahash_reqsize(&mcryptd_tfm->base)); + + return 0; +} + +static void sha512_mb_async_exit_tfm(struct crypto_tfm *tfm) +{ + struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm); + + mcryptd_free_ahash(ctx->mcryptd_tfm); +} + +static int sha512_mb_areq_init_tfm(struct crypto_tfm *tfm) +{ + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct ahash_request) + + sizeof(struct sha512_hash_ctx)); + + return 0; +} + +static void sha512_mb_areq_exit_tfm(struct crypto_tfm *tfm) +{ + struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm); + + mcryptd_free_ahash(ctx->mcryptd_tfm); +} + +static struct ahash_alg sha512_mb_areq_alg = { + .init = sha512_mb_init, + .update = sha512_mb_update, + .final = sha512_mb_final, + .finup = sha512_mb_finup, + .export = sha512_mb_export, + .import = sha512_mb_import, + .halg = { + .digestsize = SHA512_DIGEST_SIZE, + .statesize = sizeof(struct sha512_hash_ctx), + .base = { + .cra_name = "__sha512-mb", + .cra_driver_name = "__intel_sha512-mb", + .cra_priority = 100, + /* + * use ASYNC flag as some buffers in multi-buffer + * algo may not have completed before hashing thread + * sleep + */ + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_INTERNAL, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT + (sha512_mb_areq_alg.halg.base.cra_list), + .cra_init = sha512_mb_areq_init_tfm, + .cra_exit = sha512_mb_areq_exit_tfm, + .cra_ctxsize = sizeof(struct sha512_hash_ctx), + } + } +}; + +static int sha512_mb_async_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + return crypto_ahash_init(mcryptd_req); +} + +static int sha512_mb_async_update(struct ahash_request *req) +{ + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + return crypto_ahash_update(mcryptd_req); +} + +static int sha512_mb_async_finup(struct ahash_request *req) +{ + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + return crypto_ahash_finup(mcryptd_req); +} + +static int sha512_mb_async_final(struct ahash_request *req) +{ + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + return crypto_ahash_final(mcryptd_req); +} + +static int sha512_mb_async_digest(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + return crypto_ahash_digest(mcryptd_req); +} + +static int sha512_mb_async_export(struct ahash_request *req, void *out) +{ + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + return crypto_ahash_export(mcryptd_req, out); +} + +static int sha512_mb_async_import(struct ahash_request *req, const void *in) +{ + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm); + struct mcryptd_hash_request_ctx *rctx; + struct ahash_request *areq; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + rctx = ahash_request_ctx(mcryptd_req); + + areq = &rctx->areq; + + ahash_request_set_tfm(areq, child); + ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP, + rctx->complete, req); + + return crypto_ahash_import(mcryptd_req, in); +} + +static struct ahash_alg sha512_mb_async_alg = { + .init = sha512_mb_async_init, + .update = sha512_mb_async_update, + .final = sha512_mb_async_final, + .finup = sha512_mb_async_finup, + .digest = sha512_mb_async_digest, + .export = sha512_mb_async_export, + .import = sha512_mb_async_import, + .halg = { + .digestsize = SHA512_DIGEST_SIZE, + .statesize = sizeof(struct sha512_hash_ctx), + .base = { + .cra_name = "sha512", + .cra_driver_name = "sha512_mb", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_type = &crypto_ahash_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT + (sha512_mb_async_alg.halg.base.cra_list), + .cra_init = sha512_mb_async_init_tfm, + .cra_exit = sha512_mb_async_exit_tfm, + .cra_ctxsize = sizeof(struct sha512_mb_ctx), + .cra_alignmask = 0, + }, + }, +}; + +static unsigned long sha512_mb_flusher(struct mcryptd_alg_cstate *cstate) +{ + struct mcryptd_hash_request_ctx *rctx; + unsigned long cur_time; + unsigned long next_flush = 0; + struct sha512_hash_ctx *sha_ctx; + + + cur_time = jiffies; + + while (!list_empty(&cstate->work_list)) { + rctx = list_entry(cstate->work_list.next, + struct mcryptd_hash_request_ctx, waiter); + if time_before(cur_time, rctx->tag.expire) + break; + kernel_fpu_begin(); + sha_ctx = (struct sha512_hash_ctx *) + sha512_ctx_mgr_flush(cstate->mgr); + kernel_fpu_end(); + if (!sha_ctx) { + pr_err("sha512_mb error: nothing got flushed for" + " non-empty list\n"); + break; + } + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + sha_finish_walk(&rctx, cstate, true); + sha_complete_job(rctx, cstate, 0); + } + + if (!list_empty(&cstate->work_list)) { + rctx = list_entry(cstate->work_list.next, + struct mcryptd_hash_request_ctx, waiter); + /* get the hash context and then flush time */ + next_flush = rctx->tag.expire; + mcryptd_arm_flusher(cstate, get_delay(next_flush)); + } + return next_flush; +} + +static int __init sha512_mb_mod_init(void) +{ + + int cpu; + int err; + struct mcryptd_alg_cstate *cpu_state; + + /* check for dependent cpu features */ + if (!boot_cpu_has(X86_FEATURE_AVX2) || + !boot_cpu_has(X86_FEATURE_BMI2)) + return -ENODEV; + + /* initialize multibuffer structures */ + sha512_mb_alg_state.alg_cstate = + alloc_percpu(struct mcryptd_alg_cstate); + + sha512_job_mgr_init = sha512_mb_mgr_init_avx2; + sha512_job_mgr_submit = sha512_mb_mgr_submit_avx2; + sha512_job_mgr_flush = sha512_mb_mgr_flush_avx2; + sha512_job_mgr_get_comp_job = sha512_mb_mgr_get_comp_job_avx2; + + if (!sha512_mb_alg_state.alg_cstate) + return -ENOMEM; + for_each_possible_cpu(cpu) { + cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu); + cpu_state->next_flush = 0; + cpu_state->next_seq_num = 0; + cpu_state->flusher_engaged = false; + INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher); + cpu_state->cpu = cpu; + cpu_state->alg_state = &sha512_mb_alg_state; + cpu_state->mgr = kzalloc(sizeof(struct sha512_ctx_mgr), + GFP_KERNEL); + if (!cpu_state->mgr) + goto err2; + sha512_ctx_mgr_init(cpu_state->mgr); + INIT_LIST_HEAD(&cpu_state->work_list); + spin_lock_init(&cpu_state->work_lock); + } + sha512_mb_alg_state.flusher = &sha512_mb_flusher; + + err = crypto_register_ahash(&sha512_mb_areq_alg); + if (err) + goto err2; + err = crypto_register_ahash(&sha512_mb_async_alg); + if (err) + goto err1; + + + return 0; +err1: + crypto_unregister_ahash(&sha512_mb_areq_alg); +err2: + for_each_possible_cpu(cpu) { + cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu); + kfree(cpu_state->mgr); + } + free_percpu(sha512_mb_alg_state.alg_cstate); + return -ENODEV; +} + +static void __exit sha512_mb_mod_fini(void) +{ + int cpu; + struct mcryptd_alg_cstate *cpu_state; + + crypto_unregister_ahash(&sha512_mb_async_alg); + crypto_unregister_ahash(&sha512_mb_areq_alg); + for_each_possible_cpu(cpu) { + cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu); + kfree(cpu_state->mgr); + } + free_percpu(sha512_mb_alg_state.alg_cstate); +} + +module_init(sha512_mb_mod_init); +module_exit(sha512_mb_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, multi buffer accelerated"); + +MODULE_ALIAS("sha512"); From 026bb8aaf5162b881fdd56f12fa8a6f5a052e097 Mon Sep 17 00:00:00 2001 From: Megha Dey Date: Mon, 27 Jun 2016 10:20:05 -0700 Subject: [PATCH 100/180] crypto: sha512-mb - Enable SHA512 multibuffer support Add the config CRYPTO_SHA512_MB which will enable the computation using the SHA512 multi-buffer algorithm. Signed-off-by: Megha Dey Reviewed-by: Fenghua Yu Reviewed-by: Tim Chen Signed-off-by: Herbert Xu --- crypto/Kconfig | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/crypto/Kconfig b/crypto/Kconfig index d8cc0f0852786..62fcbb9237533 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -720,6 +720,22 @@ config CRYPTO_SHA256_MB lanes remain unfilled, a flush operation will be initiated to process the crypto jobs, adding a slight latency. +config CRYPTO_SHA512_MB + tristate "SHA512 digest algorithm (x86_64 Multi-Buffer, Experimental)" + depends on X86 && 64BIT + select CRYPTO_SHA512 + select CRYPTO_HASH + select CRYPTO_MCRYPTD + help + SHA-512 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented + using multi-buffer technique. This algorithm computes on + multiple data lanes concurrently with SIMD instructions for + better throughput. It should not be enabled by default but + used when there is significant amount of work to keep the keep + the data lanes filled to get performance benefit. If the data + lanes remain unfilled, a flush operation will be initiated to + process the crypto jobs, adding a slight latency. + config CRYPTO_SHA256 tristate "SHA224 and SHA256 digest algorithm" select CRYPTO_HASH From 45691e2d9b18a0a1675b2c7504847f7c228f7657 Mon Sep 17 00:00:00 2001 From: Megha Dey Date: Mon, 27 Jun 2016 10:20:06 -0700 Subject: [PATCH 101/180] crypto: sha512-mb - submit/flush routines for AVX2 This patch introduces the routines used to submit and flush buffers belonging to SHA512 crypto jobs to the SHA512 multibuffer algorithm. It is implemented mostly in assembly optimized with AVX2 instructions. Signed-off-by: Megha Dey Reviewed-by: Fenghua Yu Reviewed-by: Tim Chen Signed-off-by: Herbert Xu --- .../sha512-mb/sha512_mb_mgr_flush_avx2.S | 291 ++++++++++++++++++ .../sha512-mb/sha512_mb_mgr_init_avx2.c | 67 ++++ .../sha512-mb/sha512_mb_mgr_submit_avx2.S | 222 +++++++++++++ 3 files changed, 580 insertions(+) create mode 100644 arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S create mode 100644 arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c create mode 100644 arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S new file mode 100644 index 0000000000000..3ddba19a0db61 --- /dev/null +++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S @@ -0,0 +1,291 @@ +/* + * Flush routine for SHA512 multibuffer + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Megha Dey + * + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "sha512_mb_mgr_datastruct.S" + +.extern sha512_x4_avx2 + +# LINUX register definitions +#define arg1 %rdi +#define arg2 %rsi + +# idx needs to be other than arg1, arg2, rbx, r12 +#define idx %rdx + +# Common definitions +#define state arg1 +#define job arg2 +#define len2 arg2 + +#define unused_lanes %rbx +#define lane_data %rbx +#define tmp2 %rbx + +#define job_rax %rax +#define tmp1 %rax +#define size_offset %rax +#define tmp %rax +#define start_offset %rax + +#define tmp3 arg1 + +#define extra_blocks arg2 +#define p arg2 + +#define tmp4 %r8 +#define lens0 %r8 + +#define lens1 %r9 +#define lens2 %r10 +#define lens3 %r11 + +.macro LABEL prefix n +\prefix\n\(): +.endm + +.macro JNE_SKIP i +jne skip_\i +.endm + +.altmacro +.macro SET_OFFSET _offset +offset = \_offset +.endm +.noaltmacro + +# JOB* sha512_mb_mgr_flush_avx2(MB_MGR *state) +# arg 1 : rcx : state +ENTRY(sha512_mb_mgr_flush_avx2) + FRAME_BEGIN + push %rbx + + # If bit (32+3) is set, then all lanes are empty + mov _unused_lanes(state), unused_lanes + bt $32+7, unused_lanes + jc return_null + + # find a lane with a non-null job + xor idx, idx + offset = (_ldata + 1*_LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) + cmovne one(%rip), idx + offset = (_ldata + 2*_LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) + cmovne two(%rip), idx + offset = (_ldata + 3*_LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) + cmovne three(%rip), idx + + # copy idx to empty lanes +copy_lane_data: + offset = (_args + _data_ptr) + mov offset(state,idx,8), tmp + + I = 0 +.rep 4 + offset = (_ldata + I * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) +.altmacro + JNE_SKIP %I + offset = (_args + _data_ptr + 8*I) + mov tmp, offset(state) + offset = (_lens + 8*I +4) + movl $0xFFFFFFFF, offset(state) +LABEL skip_ %I + I = (I+1) +.noaltmacro +.endr + + # Find min length + mov _lens + 0*8(state),lens0 + mov lens0,idx + mov _lens + 1*8(state),lens1 + cmp idx,lens1 + cmovb lens1,idx + mov _lens + 2*8(state),lens2 + cmp idx,lens2 + cmovb lens2,idx + mov _lens + 3*8(state),lens3 + cmp idx,lens3 + cmovb lens3,idx + mov idx,len2 + and $0xF,idx + and $~0xFF,len2 + jz len_is_0 + + sub len2, lens0 + sub len2, lens1 + sub len2, lens2 + sub len2, lens3 + shr $32,len2 + mov lens0, _lens + 0*8(state) + mov lens1, _lens + 1*8(state) + mov lens2, _lens + 2*8(state) + mov lens3, _lens + 3*8(state) + + # "state" and "args" are the same address, arg1 + # len is arg2 + call sha512_x4_avx2 + # state and idx are intact + +len_is_0: + # process completed job "idx" + imul $_LANE_DATA_size, idx, lane_data + lea _ldata(state, lane_data), lane_data + + mov _job_in_lane(lane_data), job_rax + movq $0, _job_in_lane(lane_data) + movl $STS_COMPLETED, _status(job_rax) + mov _unused_lanes(state), unused_lanes + shl $8, unused_lanes + or idx, unused_lanes + mov unused_lanes, _unused_lanes(state) + + movl $0xFFFFFFFF, _lens+4(state, idx, 8) + + vmovq _args_digest+0*32(state, idx, 8), %xmm0 + vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0 + vmovq _args_digest+2*32(state, idx, 8), %xmm1 + vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1 + vmovq _args_digest+4*32(state, idx, 8), %xmm2 + vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2 + vmovq _args_digest+6*32(state, idx, 8), %xmm3 + vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3 + + vmovdqu %xmm0, _result_digest(job_rax) + vmovdqu %xmm1, _result_digest+1*16(job_rax) + vmovdqu %xmm2, _result_digest+2*16(job_rax) + vmovdqu %xmm3, _result_digest+3*16(job_rax) + +return: + pop %rbx + FRAME_END + ret + +return_null: + xor job_rax, job_rax + jmp return +ENDPROC(sha512_mb_mgr_flush_avx2) +.align 16 + +ENTRY(sha512_mb_mgr_get_comp_job_avx2) + push %rbx + + mov _unused_lanes(state), unused_lanes + bt $(32+7), unused_lanes + jc .return_null + + # Find min length + mov _lens(state),lens0 + mov lens0,idx + mov _lens+1*8(state),lens1 + cmp idx,lens1 + cmovb lens1,idx + mov _lens+2*8(state),lens2 + cmp idx,lens2 + cmovb lens2,idx + mov _lens+3*8(state),lens3 + cmp idx,lens3 + cmovb lens3,idx + test $~0xF,idx + jnz .return_null + and $0xF,idx + + #process completed job "idx" + imul $_LANE_DATA_size, idx, lane_data + lea _ldata(state, lane_data), lane_data + + mov _job_in_lane(lane_data), job_rax + movq $0, _job_in_lane(lane_data) + movl $STS_COMPLETED, _status(job_rax) + mov _unused_lanes(state), unused_lanes + shl $8, unused_lanes + or idx, unused_lanes + mov unused_lanes, _unused_lanes(state) + + movl $0xFFFFFFFF, _lens+4(state, idx, 8) + + vmovq _args_digest(state, idx, 8), %xmm0 + vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0 + vmovq _args_digest+2*32(state, idx, 8), %xmm1 + vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1 + vmovq _args_digest+4*32(state, idx, 8), %xmm2 + vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2 + vmovq _args_digest+6*32(state, idx, 8), %xmm3 + vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3 + + vmovdqu %xmm0, _result_digest+0*16(job_rax) + vmovdqu %xmm1, _result_digest+1*16(job_rax) + vmovdqu %xmm2, _result_digest+2*16(job_rax) + vmovdqu %xmm3, _result_digest+3*16(job_rax) + + pop %rbx + + ret + +.return_null: + xor job_rax, job_rax + pop %rbx + ret +ENDPROC(sha512_mb_mgr_get_comp_job_avx2) +.data + +.align 16 +one: +.quad 1 +two: +.quad 2 +three: +.quad 3 diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c new file mode 100644 index 0000000000000..36870b26067a7 --- /dev/null +++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c @@ -0,0 +1,67 @@ +/* + * Initialization code for multi buffer SHA256 algorithm for AVX2 + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Megha Dey + * + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "sha512_mb_mgr.h" + +void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state) +{ + unsigned int j; + + state->lens[0] = 0; + state->lens[1] = 1; + state->lens[2] = 2; + state->lens[3] = 3; + state->unused_lanes = 0xFF03020100; + for (j = 0; j < 4; j++) + state->ldata[j].job_in_lane = NULL; +} diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S new file mode 100644 index 0000000000000..815f07bdd1f8f --- /dev/null +++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S @@ -0,0 +1,222 @@ +/* + * Buffer submit code for multi buffer SHA512 algorithm + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Megha Dey + * + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "sha512_mb_mgr_datastruct.S" + +.extern sha512_x4_avx2 + +#define arg1 %rdi +#define arg2 %rsi + +#define idx %rdx +#define last_len %rdx + +#define size_offset %rcx +#define tmp2 %rcx + +# Common definitions +#define state arg1 +#define job arg2 +#define len2 arg2 +#define p2 arg2 + +#define p %r11 +#define start_offset %r11 + +#define unused_lanes %rbx + +#define job_rax %rax +#define len %rax + +#define lane %r12 +#define tmp3 %r12 +#define lens3 %r12 + +#define extra_blocks %r8 +#define lens0 %r8 + +#define tmp %r9 +#define lens1 %r9 + +#define lane_data %r10 +#define lens2 %r10 + +#define DWORD_len %eax + +# JOB* sha512_mb_mgr_submit_avx2(MB_MGR *state, JOB *job) +# arg 1 : rcx : state +# arg 2 : rdx : job +ENTRY(sha512_mb_mgr_submit_avx2) + FRAME_BEGIN + push %rbx + push %r12 + + mov _unused_lanes(state), unused_lanes + movzb %bl,lane + shr $8, unused_lanes + imul $_LANE_DATA_size, lane,lane_data + movl $STS_BEING_PROCESSED, _status(job) + lea _ldata(state, lane_data), lane_data + mov unused_lanes, _unused_lanes(state) + movl _len(job), DWORD_len + + mov job, _job_in_lane(lane_data) + movl DWORD_len,_lens+4(state , lane, 8) + + # Load digest words from result_digest + vmovdqu _result_digest+0*16(job), %xmm0 + vmovdqu _result_digest+1*16(job), %xmm1 + vmovdqu _result_digest+2*16(job), %xmm2 + vmovdqu _result_digest+3*16(job), %xmm3 + + vmovq %xmm0, _args_digest(state, lane, 8) + vpextrq $1, %xmm0, _args_digest+1*32(state , lane, 8) + vmovq %xmm1, _args_digest+2*32(state , lane, 8) + vpextrq $1, %xmm1, _args_digest+3*32(state , lane, 8) + vmovq %xmm2, _args_digest+4*32(state , lane, 8) + vpextrq $1, %xmm2, _args_digest+5*32(state , lane, 8) + vmovq %xmm3, _args_digest+6*32(state , lane, 8) + vpextrq $1, %xmm3, _args_digest+7*32(state , lane, 8) + + mov _buffer(job), p + mov p, _args_data_ptr(state, lane, 8) + + cmp $0xFF, unused_lanes + jne return_null + +start_loop: + + # Find min length + mov _lens+0*8(state),lens0 + mov lens0,idx + mov _lens+1*8(state),lens1 + cmp idx,lens1 + cmovb lens1, idx + mov _lens+2*8(state),lens2 + cmp idx,lens2 + cmovb lens2,idx + mov _lens+3*8(state),lens3 + cmp idx,lens3 + cmovb lens3,idx + mov idx,len2 + and $0xF,idx + and $~0xFF,len2 + jz len_is_0 + + sub len2,lens0 + sub len2,lens1 + sub len2,lens2 + sub len2,lens3 + shr $32,len2 + mov lens0, _lens + 0*8(state) + mov lens1, _lens + 1*8(state) + mov lens2, _lens + 2*8(state) + mov lens3, _lens + 3*8(state) + + # "state" and "args" are the same address, arg1 + # len is arg2 + call sha512_x4_avx2 + # state and idx are intact + +len_is_0: + + # process completed job "idx" + imul $_LANE_DATA_size, idx, lane_data + lea _ldata(state, lane_data), lane_data + + mov _job_in_lane(lane_data), job_rax + mov _unused_lanes(state), unused_lanes + movq $0, _job_in_lane(lane_data) + movl $STS_COMPLETED, _status(job_rax) + shl $8, unused_lanes + or idx, unused_lanes + mov unused_lanes, _unused_lanes(state) + + movl $0xFFFFFFFF,_lens+4(state,idx,8) + vmovq _args_digest+0*32(state , idx, 8), %xmm0 + vpinsrq $1, _args_digest+1*32(state , idx, 8), %xmm0, %xmm0 + vmovq _args_digest+2*32(state , idx, 8), %xmm1 + vpinsrq $1, _args_digest+3*32(state , idx, 8), %xmm1, %xmm1 + vmovq _args_digest+4*32(state , idx, 8), %xmm2 + vpinsrq $1, _args_digest+5*32(state , idx, 8), %xmm2, %xmm2 + vmovq _args_digest+6*32(state , idx, 8), %xmm3 + vpinsrq $1, _args_digest+7*32(state , idx, 8), %xmm3, %xmm3 + + vmovdqu %xmm0, _result_digest + 0*16(job_rax) + vmovdqu %xmm1, _result_digest + 1*16(job_rax) + vmovdqu %xmm2, _result_digest + 2*16(job_rax) + vmovdqu %xmm3, _result_digest + 3*16(job_rax) + +return: + pop %r12 + pop %rbx + FRAME_END + ret + +return_null: + xor job_rax, job_rax + jmp return +ENDPROC(sha512_mb_mgr_submit_avx2) +.data + +.align 16 +H0: .int 0x6a09e667 +H1: .int 0xbb67ae85 +H2: .int 0x3c6ef372 +H3: .int 0xa54ff53a +H4: .int 0x510e527f +H5: .int 0x9b05688c +H6: .int 0x1f83d9ab +H7: .int 0x5be0cd19 From 2cdacb68d70d3c2f0fc41108619d51bc256df10a Mon Sep 17 00:00:00 2001 From: Megha Dey Date: Mon, 27 Jun 2016 10:20:07 -0700 Subject: [PATCH 102/180] crypto: sha512-mb - Algorithm data structures This patch introduces the data structures and prototypes of functions needed for computing SHA512 hash using multi-buffer. Included are the structures of the multi-buffer SHA512 job, job scheduler in C and x86 assembly. Signed-off-by: Megha Dey Reviewed-by: Fenghua Yu Reviewed-by: Tim Chen Signed-off-by: Herbert Xu --- arch/x86/crypto/sha512-mb/sha512_mb_ctx.h | 130 ++++++++ arch/x86/crypto/sha512-mb/sha512_mb_mgr.h | 104 +++++++ .../sha512-mb/sha512_mb_mgr_datastruct.S | 281 ++++++++++++++++++ 3 files changed, 515 insertions(+) create mode 100644 arch/x86/crypto/sha512-mb/sha512_mb_ctx.h create mode 100644 arch/x86/crypto/sha512-mb/sha512_mb_mgr.h create mode 100644 arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h new file mode 100644 index 0000000000000..9d4b2c8208d50 --- /dev/null +++ b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h @@ -0,0 +1,130 @@ +/* + * Header file for multi buffer SHA512 context + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Megha Dey + * + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SHA_MB_CTX_INTERNAL_H +#define _SHA_MB_CTX_INTERNAL_H + +#include "sha512_mb_mgr.h" + +#define HASH_UPDATE 0x00 +#define HASH_FIRST 0x01 +#define HASH_LAST 0x02 +#define HASH_ENTIRE 0x03 +#define HASH_DONE 0x04 +#define HASH_FINAL 0x08 + +#define HASH_CTX_STS_IDLE 0x00 +#define HASH_CTX_STS_PROCESSING 0x01 +#define HASH_CTX_STS_LAST 0x02 +#define HASH_CTX_STS_COMPLETE 0x04 + +enum hash_ctx_error { + HASH_CTX_ERROR_NONE = 0, + HASH_CTX_ERROR_INVALID_FLAGS = -1, + HASH_CTX_ERROR_ALREADY_PROCESSING = -2, + HASH_CTX_ERROR_ALREADY_COMPLETED = -3, +}; + +#define hash_ctx_user_data(ctx) ((ctx)->user_data) +#define hash_ctx_digest(ctx) ((ctx)->job.result_digest) +#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING) +#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE) +#define hash_ctx_status(ctx) ((ctx)->status) +#define hash_ctx_error(ctx) ((ctx)->error) +#define hash_ctx_init(ctx) \ + do { \ + (ctx)->error = HASH_CTX_ERROR_NONE; \ + (ctx)->status = HASH_CTX_STS_COMPLETE; \ + } while (0) + +/* Hash Constants and Typedefs */ +#define SHA512_DIGEST_LENGTH 8 +#define SHA512_LOG2_BLOCK_SIZE 7 + +#define SHA512_PADLENGTHFIELD_SIZE 16 + +#ifdef SHA_MB_DEBUG +#define assert(expr) \ +do { \ + if (unlikely(!(expr))) { \ + printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \ + #expr, __FILE__, __func__, __LINE__); \ + } \ +} while (0) +#else +#define assert(expr) do {} while (0) +#endif + +struct sha512_ctx_mgr { + struct sha512_mb_mgr mgr; +}; + +/* typedef struct sha512_ctx_mgr sha512_ctx_mgr; */ + +struct sha512_hash_ctx { + /* Must be at struct offset 0 */ + struct job_sha512 job; + /* status flag */ + int status; + /* error flag */ + int error; + + uint32_t total_length; + const void *incoming_buffer; + uint32_t incoming_buffer_length; + uint8_t partial_block_buffer[SHA512_BLOCK_SIZE * 2]; + uint32_t partial_block_buffer_length; + void *user_data; +}; + +#endif diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h b/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h new file mode 100644 index 0000000000000..178f17eef3825 --- /dev/null +++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h @@ -0,0 +1,104 @@ +/* + * Header file for multi buffer SHA512 algorithm manager + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Megha Dey + * + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __SHA_MB_MGR_H +#define __SHA_MB_MGR_H + +#include + +#define NUM_SHA512_DIGEST_WORDS 8 + +enum job_sts {STS_UNKNOWN = 0, + STS_BEING_PROCESSED = 1, + STS_COMPLETED = 2, + STS_INTERNAL_ERROR = 3, + STS_ERROR = 4 +}; + +struct job_sha512 { + u8 *buffer; + u64 len; + u64 result_digest[NUM_SHA512_DIGEST_WORDS] __aligned(32); + enum job_sts status; + void *user_data; +}; + +struct sha512_args_x4 { + uint64_t digest[8][4]; + uint8_t *data_ptr[4]; +}; + +struct sha512_lane_data { + struct job_sha512 *job_in_lane; +}; + +struct sha512_mb_mgr { + struct sha512_args_x4 args; + + uint64_t lens[4]; + + /* each byte is index (0...7) of unused lanes */ + uint64_t unused_lanes; + /* byte 4 is set to FF as a flag */ + struct sha512_lane_data ldata[4]; +}; + +#define SHA512_MB_MGR_NUM_LANES_AVX2 4 + +void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state); +struct job_sha512 *sha512_mb_mgr_submit_avx2(struct sha512_mb_mgr *state, + struct job_sha512 *job); +struct job_sha512 *sha512_mb_mgr_flush_avx2(struct sha512_mb_mgr *state); +struct job_sha512 *sha512_mb_mgr_get_comp_job_avx2(struct sha512_mb_mgr *state); + +#endif diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S new file mode 100644 index 0000000000000..cf2636d4c9ba9 --- /dev/null +++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S @@ -0,0 +1,281 @@ +/* + * Header file for multi buffer SHA256 algorithm data structure + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Megha Dey + * + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +# Macros for defining data structures + +# Usage example + +#START_FIELDS # JOB_AES +### name size align +#FIELD _plaintext, 8, 8 # pointer to plaintext +#FIELD _ciphertext, 8, 8 # pointer to ciphertext +#FIELD _IV, 16, 8 # IV +#FIELD _keys, 8, 8 # pointer to keys +#FIELD _len, 4, 4 # length in bytes +#FIELD _status, 4, 4 # status enumeration +#FIELD _user_data, 8, 8 # pointer to user data +#UNION _union, size1, align1, \ +# size2, align2, \ +# size3, align3, \ +# ... +#END_FIELDS +#%assign _JOB_AES_size _FIELD_OFFSET +#%assign _JOB_AES_align _STRUCT_ALIGN + +######################################################################### + +# Alternate "struc-like" syntax: +# STRUCT job_aes2 +# RES_Q .plaintext, 1 +# RES_Q .ciphertext, 1 +# RES_DQ .IV, 1 +# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN +# RES_U .union, size1, align1, \ +# size2, align2, \ +# ... +# ENDSTRUCT +# # Following only needed if nesting +# %assign job_aes2_size _FIELD_OFFSET +# %assign job_aes2_align _STRUCT_ALIGN +# +# RES_* macros take a name, a count and an optional alignment. +# The count in in terms of the base size of the macro, and the +# default alignment is the base size. +# The macros are: +# Macro Base size +# RES_B 1 +# RES_W 2 +# RES_D 4 +# RES_Q 8 +# RES_DQ 16 +# RES_Y 32 +# RES_Z 64 +# +# RES_U defines a union. It's arguments are a name and two or more +# pairs of "size, alignment" +# +# The two assigns are only needed if this structure is being nested +# within another. Even if the assigns are not done, one can still use +# STRUCT_NAME_size as the size of the structure. +# +# Note that for nesting, you still need to assign to STRUCT_NAME_size. +# +# The differences between this and using "struc" directly are that each +# type is implicitly aligned to its natural length (although this can be +# over-ridden with an explicit third parameter), and that the structure +# is padded at the end to its overall alignment. +# + +######################################################################### + +#ifndef _DATASTRUCT_ASM_ +#define _DATASTRUCT_ASM_ + +#define PTR_SZ 8 +#define SHA512_DIGEST_WORD_SIZE 8 +#define SHA512_MB_MGR_NUM_LANES_AVX2 4 +#define NUM_SHA512_DIGEST_WORDS 8 +#define SZ4 4*SHA512_DIGEST_WORD_SIZE +#define ROUNDS 80*SZ4 +#define SHA512_DIGEST_ROW_SIZE (SHA512_MB_MGR_NUM_LANES_AVX2 * 8) + +# START_FIELDS +.macro START_FIELDS + _FIELD_OFFSET = 0 + _STRUCT_ALIGN = 0 +.endm + +# FIELD name size align +.macro FIELD name size align + _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1)) + \name = _FIELD_OFFSET + _FIELD_OFFSET = _FIELD_OFFSET + (\size) +.if (\align > _STRUCT_ALIGN) + _STRUCT_ALIGN = \align +.endif +.endm + +# END_FIELDS +.macro END_FIELDS + _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1)) +.endm + +.macro STRUCT p1 +START_FIELDS +.struc \p1 +.endm + +.macro ENDSTRUCT + tmp = _FIELD_OFFSET + END_FIELDS + tmp = (_FIELD_OFFSET - ##tmp) +.if (tmp > 0) + .lcomm tmp +.endm + +## RES_int name size align +.macro RES_int p1 p2 p3 + name = \p1 + size = \p2 + align = .\p3 + + _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1)) +.align align +.lcomm name size + _FIELD_OFFSET = _FIELD_OFFSET + (size) +.if (align > _STRUCT_ALIGN) + _STRUCT_ALIGN = align +.endif +.endm + +# macro RES_B name, size [, align] +.macro RES_B _name, _size, _align=1 +RES_int _name _size _align +.endm + +# macro RES_W name, size [, align] +.macro RES_W _name, _size, _align=2 +RES_int _name 2*(_size) _align +.endm + +# macro RES_D name, size [, align] +.macro RES_D _name, _size, _align=4 +RES_int _name 4*(_size) _align +.endm + +# macro RES_Q name, size [, align] +.macro RES_Q _name, _size, _align=8 +RES_int _name 8*(_size) _align +.endm + +# macro RES_DQ name, size [, align] +.macro RES_DQ _name, _size, _align=16 +RES_int _name 16*(_size) _align +.endm + +# macro RES_Y name, size [, align] +.macro RES_Y _name, _size, _align=32 +RES_int _name 32*(_size) _align +.endm + +# macro RES_Z name, size [, align] +.macro RES_Z _name, _size, _align=64 +RES_int _name 64*(_size) _align +.endm + +#endif + +################################################################### +### Define SHA512 Out Of Order Data Structures +################################################################### + +START_FIELDS # LANE_DATA +### name size align +FIELD _job_in_lane, 8, 8 # pointer to job object +END_FIELDS + + _LANE_DATA_size = _FIELD_OFFSET + _LANE_DATA_align = _STRUCT_ALIGN + +#################################################################### + +START_FIELDS # SHA512_ARGS_X4 +### name size align +FIELD _digest, 8*8*4, 4 # transposed digest +FIELD _data_ptr, 8*4, 8 # array of pointers to data +END_FIELDS + + _SHA512_ARGS_X4_size = _FIELD_OFFSET + _SHA512_ARGS_X4_align = _STRUCT_ALIGN + +##################################################################### + +START_FIELDS # MB_MGR +### name size align +FIELD _args, _SHA512_ARGS_X4_size, _SHA512_ARGS_X4_align +FIELD _lens, 8*4, 8 +FIELD _unused_lanes, 8, 8 +FIELD _ldata, _LANE_DATA_size*4, _LANE_DATA_align +END_FIELDS + + _MB_MGR_size = _FIELD_OFFSET + _MB_MGR_align = _STRUCT_ALIGN + +_args_digest = _args + _digest +_args_data_ptr = _args + _data_ptr + +####################################################################### + +####################################################################### +#### Define constants +####################################################################### + +#define STS_UNKNOWN 0 +#define STS_BEING_PROCESSED 1 +#define STS_COMPLETED 2 + +####################################################################### +#### Define JOB_SHA512 structure +####################################################################### + +START_FIELDS # JOB_SHA512 +### name size align +FIELD _buffer, 8, 8 # pointer to buffer +FIELD _len, 8, 8 # length in bytes +FIELD _result_digest, 8*8, 32 # Digest (output) +FIELD _status, 4, 4 +FIELD _user_data, 8, 8 +END_FIELDS + + _JOB_SHA512_size = _FIELD_OFFSET + _JOB_SHA512_align = _STRUCT_ALIGN From bee5cfd9f6702fa93f183a289dda798042bde489 Mon Sep 17 00:00:00 2001 From: Megha Dey Date: Mon, 27 Jun 2016 10:20:08 -0700 Subject: [PATCH 103/180] crypto: sha512-mb - Crypto computation (x4 AVX2) This patch introduces the assembly routines to do SHA512 computation on buffers belonging to several jobs at once. The assembly routines are optimized with AVX2 instructions that have 4 data lanes and using AVX2 registers. Signed-off-by: Megha Dey Reviewed-by: Fenghua Yu Reviewed-by: Tim Chen Signed-off-by: Herbert Xu --- arch/x86/crypto/sha512-mb/sha512_x4_avx2.S | 529 +++++++++++++++++++++ 1 file changed, 529 insertions(+) create mode 100644 arch/x86/crypto/sha512-mb/sha512_x4_avx2.S diff --git a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S new file mode 100644 index 0000000000000..31ab1eff64133 --- /dev/null +++ b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S @@ -0,0 +1,529 @@ +/* + * Multi-buffer SHA512 algorithm hash compute routine + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Megha Dey + * + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +# code to compute quad SHA512 using AVX2 +# use YMMs to tackle the larger digest size +# outer calling routine takes care of save and restore of XMM registers +# Logic designed/laid out by JDG + +# Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; ymm0-15 +# Stack must be aligned to 32 bytes before call +# Linux clobbers: rax rbx rcx rsi r8 r9 r10 r11 r12 +# Linux preserves: rcx rdx rdi rbp r13 r14 r15 +# clobbers ymm0-15 + +#include +#include "sha512_mb_mgr_datastruct.S" + +arg1 = %rdi +arg2 = %rsi + +# Common definitions +STATE = arg1 +INP_SIZE = arg2 + +IDX = %rax +ROUND = %rbx +TBL = %r8 + +inp0 = %r9 +inp1 = %r10 +inp2 = %r11 +inp3 = %r12 + +a = %ymm0 +b = %ymm1 +c = %ymm2 +d = %ymm3 +e = %ymm4 +f = %ymm5 +g = %ymm6 +h = %ymm7 + +a0 = %ymm8 +a1 = %ymm9 +a2 = %ymm10 + +TT0 = %ymm14 +TT1 = %ymm13 +TT2 = %ymm12 +TT3 = %ymm11 +TT4 = %ymm10 +TT5 = %ymm9 + +T1 = %ymm14 +TMP = %ymm15 + +# Define stack usage +STACK_SPACE1 = SZ4*16 + NUM_SHA512_DIGEST_WORDS*SZ4 + 24 + +#define VMOVPD vmovupd +_digest = SZ4*16 + +# transpose r0, r1, r2, r3, t0, t1 +# "transpose" data in {r0..r3} using temps {t0..t3} +# Input looks like: {r0 r1 r2 r3} +# r0 = {a7 a6 a5 a4 a3 a2 a1 a0} +# r1 = {b7 b6 b5 b4 b3 b2 b1 b0} +# r2 = {c7 c6 c5 c4 c3 c2 c1 c0} +# r3 = {d7 d6 d5 d4 d3 d2 d1 d0} +# +# output looks like: {t0 r1 r0 r3} +# t0 = {d1 d0 c1 c0 b1 b0 a1 a0} +# r1 = {d3 d2 c3 c2 b3 b2 a3 a2} +# r0 = {d5 d4 c5 c4 b5 b4 a5 a4} +# r3 = {d7 d6 c7 c6 b7 b6 a7 a6} + +.macro TRANSPOSE r0 r1 r2 r3 t0 t1 + vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0} + vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2} + vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0} + vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2} + + vperm2f128 $0x20, \r2, \r0, \r1 # h6...a6 + vperm2f128 $0x31, \r2, \r0, \r3 # h2...a2 + vperm2f128 $0x31, \t1, \t0, \r0 # h5...a5 + vperm2f128 $0x20, \t1, \t0, \t0 # h1...a1 +.endm + +.macro ROTATE_ARGS +TMP_ = h +h = g +g = f +f = e +e = d +d = c +c = b +b = a +a = TMP_ +.endm + +# PRORQ reg, imm, tmp +# packed-rotate-right-double +# does a rotate by doing two shifts and an or +.macro _PRORQ reg imm tmp + vpsllq $(64-\imm),\reg,\tmp + vpsrlq $\imm,\reg, \reg + vpor \tmp,\reg, \reg +.endm + +# non-destructive +# PRORQ_nd reg, imm, tmp, src +.macro _PRORQ_nd reg imm tmp src + vpsllq $(64-\imm), \src, \tmp + vpsrlq $\imm, \src, \reg + vpor \tmp, \reg, \reg +.endm + +# PRORQ dst/src, amt +.macro PRORQ reg imm + _PRORQ \reg, \imm, TMP +.endm + +# PRORQ_nd dst, src, amt +.macro PRORQ_nd reg tmp imm + _PRORQ_nd \reg, \imm, TMP, \tmp +.endm + +#; arguments passed implicitly in preprocessor symbols i, a...h +.macro ROUND_00_15 _T1 i + PRORQ_nd a0, e, (18-14) # sig1: a0 = (e >> 4) + + vpxor g, f, a2 # ch: a2 = f^g + vpand e,a2, a2 # ch: a2 = (f^g)&e + vpxor g, a2, a2 # a2 = ch + + PRORQ_nd a1,e,41 # sig1: a1 = (e >> 25) + + offset = SZ4*(\i & 0xf) + vmovdqu \_T1,offset(%rsp) + vpaddq (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K + vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5) + PRORQ a0, 14 # sig1: a0 = (e >> 6) ^ (e >> 11) + vpaddq a2, h, h # h = h + ch + PRORQ_nd a2,a,6 # sig0: a2 = (a >> 11) + vpaddq \_T1,h, h # h = h + ch + W + K + vpxor a1, a0, a0 # a0 = sigma1 + vmovdqu a,\_T1 + PRORQ_nd a1,a,39 # sig0: a1 = (a >> 22) + vpxor c, \_T1, \_T1 # maj: T1 = a^c + add $SZ4, ROUND # ROUND++ + vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b + vpaddq a0, h, h + vpaddq h, d, d + vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11) + PRORQ a2,28 # sig0: a2 = (a >> 2) ^ (a >> 13) + vpxor a1, a2, a2 # a2 = sig0 + vpand c, a, a1 # maj: a1 = a&c + vpor \_T1, a1, a1 # a1 = maj + vpaddq a1, h, h # h = h + ch + W + K + maj + vpaddq a2, h, h # h = h + ch + W + K + maj + sigma0 + ROTATE_ARGS +.endm + + +#; arguments passed implicitly in preprocessor symbols i, a...h +.macro ROUND_16_XX _T1 i + vmovdqu SZ4*((\i-15)&0xf)(%rsp), \_T1 + vmovdqu SZ4*((\i-2)&0xf)(%rsp), a1 + vmovdqu \_T1, a0 + PRORQ \_T1,7 + vmovdqu a1, a2 + PRORQ a1,42 + vpxor a0, \_T1, \_T1 + PRORQ \_T1, 1 + vpxor a2, a1, a1 + PRORQ a1, 19 + vpsrlq $7, a0, a0 + vpxor a0, \_T1, \_T1 + vpsrlq $6, a2, a2 + vpxor a2, a1, a1 + vpaddq SZ4*((\i-16)&0xf)(%rsp), \_T1, \_T1 + vpaddq SZ4*((\i-7)&0xf)(%rsp), a1, a1 + vpaddq a1, \_T1, \_T1 + + ROUND_00_15 \_T1,\i +.endm + + +# void sha512_x4_avx2(void *STATE, const int INP_SIZE) +# arg 1 : STATE : pointer to input data +# arg 2 : INP_SIZE : size of data in blocks (assumed >= 1) +ENTRY(sha512_x4_avx2) + # general registers preserved in outer calling routine + # outer calling routine saves all the XMM registers + # save callee-saved clobbered registers to comply with C function ABI + push %r12 + push %r13 + push %r14 + push %r15 + + sub $STACK_SPACE1, %rsp + + # Load the pre-transposed incoming digest. + vmovdqu 0*SHA512_DIGEST_ROW_SIZE(STATE),a + vmovdqu 1*SHA512_DIGEST_ROW_SIZE(STATE),b + vmovdqu 2*SHA512_DIGEST_ROW_SIZE(STATE),c + vmovdqu 3*SHA512_DIGEST_ROW_SIZE(STATE),d + vmovdqu 4*SHA512_DIGEST_ROW_SIZE(STATE),e + vmovdqu 5*SHA512_DIGEST_ROW_SIZE(STATE),f + vmovdqu 6*SHA512_DIGEST_ROW_SIZE(STATE),g + vmovdqu 7*SHA512_DIGEST_ROW_SIZE(STATE),h + + lea K512_4(%rip),TBL + + # load the address of each of the 4 message lanes + # getting ready to transpose input onto stack + mov _data_ptr+0*PTR_SZ(STATE),inp0 + mov _data_ptr+1*PTR_SZ(STATE),inp1 + mov _data_ptr+2*PTR_SZ(STATE),inp2 + mov _data_ptr+3*PTR_SZ(STATE),inp3 + + xor IDX, IDX +lloop: + xor ROUND, ROUND + + # save old digest + vmovdqu a, _digest(%rsp) + vmovdqu b, _digest+1*SZ4(%rsp) + vmovdqu c, _digest+2*SZ4(%rsp) + vmovdqu d, _digest+3*SZ4(%rsp) + vmovdqu e, _digest+4*SZ4(%rsp) + vmovdqu f, _digest+5*SZ4(%rsp) + vmovdqu g, _digest+6*SZ4(%rsp) + vmovdqu h, _digest+7*SZ4(%rsp) + i = 0 +.rep 4 + vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP + VMOVPD i*32(inp0, IDX), TT2 + VMOVPD i*32(inp1, IDX), TT1 + VMOVPD i*32(inp2, IDX), TT4 + VMOVPD i*32(inp3, IDX), TT3 + TRANSPOSE TT2, TT1, TT4, TT3, TT0, TT5 + vpshufb TMP, TT0, TT0 + vpshufb TMP, TT1, TT1 + vpshufb TMP, TT2, TT2 + vpshufb TMP, TT3, TT3 + ROUND_00_15 TT0,(i*4+0) + ROUND_00_15 TT1,(i*4+1) + ROUND_00_15 TT2,(i*4+2) + ROUND_00_15 TT3,(i*4+3) + i = (i+1) +.endr + add $128, IDX + + i = (i*4) + + jmp Lrounds_16_xx +.align 16 +Lrounds_16_xx: +.rep 16 + ROUND_16_XX T1, i + i = (i+1) +.endr + cmp $0xa00,ROUND + jb Lrounds_16_xx + + # add old digest + vpaddq _digest(%rsp), a, a + vpaddq _digest+1*SZ4(%rsp), b, b + vpaddq _digest+2*SZ4(%rsp), c, c + vpaddq _digest+3*SZ4(%rsp), d, d + vpaddq _digest+4*SZ4(%rsp), e, e + vpaddq _digest+5*SZ4(%rsp), f, f + vpaddq _digest+6*SZ4(%rsp), g, g + vpaddq _digest+7*SZ4(%rsp), h, h + + sub $1, INP_SIZE # unit is blocks + jne lloop + + # write back to memory (state object) the transposed digest + vmovdqu a, 0*SHA512_DIGEST_ROW_SIZE(STATE) + vmovdqu b, 1*SHA512_DIGEST_ROW_SIZE(STATE) + vmovdqu c, 2*SHA512_DIGEST_ROW_SIZE(STATE) + vmovdqu d, 3*SHA512_DIGEST_ROW_SIZE(STATE) + vmovdqu e, 4*SHA512_DIGEST_ROW_SIZE(STATE) + vmovdqu f, 5*SHA512_DIGEST_ROW_SIZE(STATE) + vmovdqu g, 6*SHA512_DIGEST_ROW_SIZE(STATE) + vmovdqu h, 7*SHA512_DIGEST_ROW_SIZE(STATE) + + # update input data pointers + add IDX, inp0 + mov inp0, _data_ptr+0*PTR_SZ(STATE) + add IDX, inp1 + mov inp1, _data_ptr+1*PTR_SZ(STATE) + add IDX, inp2 + mov inp2, _data_ptr+2*PTR_SZ(STATE) + add IDX, inp3 + mov inp3, _data_ptr+3*PTR_SZ(STATE) + + #;;;;;;;;;;;;;;; + #; Postamble + add $STACK_SPACE1, %rsp + # restore callee-saved clobbered registers + + pop %r15 + pop %r14 + pop %r13 + pop %r12 + + # outer calling routine restores XMM and other GP registers + ret +ENDPROC(sha512_x4_avx2) + +.data +.align 64 +K512_4: + .octa 0x428a2f98d728ae22428a2f98d728ae22,\ + 0x428a2f98d728ae22428a2f98d728ae22 + .octa 0x7137449123ef65cd7137449123ef65cd,\ + 0x7137449123ef65cd7137449123ef65cd + .octa 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f,\ + 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f + .octa 0xe9b5dba58189dbbce9b5dba58189dbbc,\ + 0xe9b5dba58189dbbce9b5dba58189dbbc + .octa 0x3956c25bf348b5383956c25bf348b538,\ + 0x3956c25bf348b5383956c25bf348b538 + .octa 0x59f111f1b605d01959f111f1b605d019,\ + 0x59f111f1b605d01959f111f1b605d019 + .octa 0x923f82a4af194f9b923f82a4af194f9b,\ + 0x923f82a4af194f9b923f82a4af194f9b + .octa 0xab1c5ed5da6d8118ab1c5ed5da6d8118,\ + 0xab1c5ed5da6d8118ab1c5ed5da6d8118 + .octa 0xd807aa98a3030242d807aa98a3030242,\ + 0xd807aa98a3030242d807aa98a3030242 + .octa 0x12835b0145706fbe12835b0145706fbe,\ + 0x12835b0145706fbe12835b0145706fbe + .octa 0x243185be4ee4b28c243185be4ee4b28c,\ + 0x243185be4ee4b28c243185be4ee4b28c + .octa 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2,\ + 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2 + .octa 0x72be5d74f27b896f72be5d74f27b896f,\ + 0x72be5d74f27b896f72be5d74f27b896f + .octa 0x80deb1fe3b1696b180deb1fe3b1696b1,\ + 0x80deb1fe3b1696b180deb1fe3b1696b1 + .octa 0x9bdc06a725c712359bdc06a725c71235,\ + 0x9bdc06a725c712359bdc06a725c71235 + .octa 0xc19bf174cf692694c19bf174cf692694,\ + 0xc19bf174cf692694c19bf174cf692694 + .octa 0xe49b69c19ef14ad2e49b69c19ef14ad2,\ + 0xe49b69c19ef14ad2e49b69c19ef14ad2 + .octa 0xefbe4786384f25e3efbe4786384f25e3,\ + 0xefbe4786384f25e3efbe4786384f25e3 + .octa 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5,\ + 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5 + .octa 0x240ca1cc77ac9c65240ca1cc77ac9c65,\ + 0x240ca1cc77ac9c65240ca1cc77ac9c65 + .octa 0x2de92c6f592b02752de92c6f592b0275,\ + 0x2de92c6f592b02752de92c6f592b0275 + .octa 0x4a7484aa6ea6e4834a7484aa6ea6e483,\ + 0x4a7484aa6ea6e4834a7484aa6ea6e483 + .octa 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4,\ + 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4 + .octa 0x76f988da831153b576f988da831153b5,\ + 0x76f988da831153b576f988da831153b5 + .octa 0x983e5152ee66dfab983e5152ee66dfab,\ + 0x983e5152ee66dfab983e5152ee66dfab + .octa 0xa831c66d2db43210a831c66d2db43210,\ + 0xa831c66d2db43210a831c66d2db43210 + .octa 0xb00327c898fb213fb00327c898fb213f,\ + 0xb00327c898fb213fb00327c898fb213f + .octa 0xbf597fc7beef0ee4bf597fc7beef0ee4,\ + 0xbf597fc7beef0ee4bf597fc7beef0ee4 + .octa 0xc6e00bf33da88fc2c6e00bf33da88fc2,\ + 0xc6e00bf33da88fc2c6e00bf33da88fc2 + .octa 0xd5a79147930aa725d5a79147930aa725,\ + 0xd5a79147930aa725d5a79147930aa725 + .octa 0x06ca6351e003826f06ca6351e003826f,\ + 0x06ca6351e003826f06ca6351e003826f + .octa 0x142929670a0e6e70142929670a0e6e70,\ + 0x142929670a0e6e70142929670a0e6e70 + .octa 0x27b70a8546d22ffc27b70a8546d22ffc,\ + 0x27b70a8546d22ffc27b70a8546d22ffc + .octa 0x2e1b21385c26c9262e1b21385c26c926,\ + 0x2e1b21385c26c9262e1b21385c26c926 + .octa 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed,\ + 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed + .octa 0x53380d139d95b3df53380d139d95b3df,\ + 0x53380d139d95b3df53380d139d95b3df + .octa 0x650a73548baf63de650a73548baf63de,\ + 0x650a73548baf63de650a73548baf63de + .octa 0x766a0abb3c77b2a8766a0abb3c77b2a8,\ + 0x766a0abb3c77b2a8766a0abb3c77b2a8 + .octa 0x81c2c92e47edaee681c2c92e47edaee6,\ + 0x81c2c92e47edaee681c2c92e47edaee6 + .octa 0x92722c851482353b92722c851482353b,\ + 0x92722c851482353b92722c851482353b + .octa 0xa2bfe8a14cf10364a2bfe8a14cf10364,\ + 0xa2bfe8a14cf10364a2bfe8a14cf10364 + .octa 0xa81a664bbc423001a81a664bbc423001,\ + 0xa81a664bbc423001a81a664bbc423001 + .octa 0xc24b8b70d0f89791c24b8b70d0f89791,\ + 0xc24b8b70d0f89791c24b8b70d0f89791 + .octa 0xc76c51a30654be30c76c51a30654be30,\ + 0xc76c51a30654be30c76c51a30654be30 + .octa 0xd192e819d6ef5218d192e819d6ef5218,\ + 0xd192e819d6ef5218d192e819d6ef5218 + .octa 0xd69906245565a910d69906245565a910,\ + 0xd69906245565a910d69906245565a910 + .octa 0xf40e35855771202af40e35855771202a,\ + 0xf40e35855771202af40e35855771202a + .octa 0x106aa07032bbd1b8106aa07032bbd1b8,\ + 0x106aa07032bbd1b8106aa07032bbd1b8 + .octa 0x19a4c116b8d2d0c819a4c116b8d2d0c8,\ + 0x19a4c116b8d2d0c819a4c116b8d2d0c8 + .octa 0x1e376c085141ab531e376c085141ab53,\ + 0x1e376c085141ab531e376c085141ab53 + .octa 0x2748774cdf8eeb992748774cdf8eeb99,\ + 0x2748774cdf8eeb992748774cdf8eeb99 + .octa 0x34b0bcb5e19b48a834b0bcb5e19b48a8,\ + 0x34b0bcb5e19b48a834b0bcb5e19b48a8 + .octa 0x391c0cb3c5c95a63391c0cb3c5c95a63,\ + 0x391c0cb3c5c95a63391c0cb3c5c95a63 + .octa 0x4ed8aa4ae3418acb4ed8aa4ae3418acb,\ + 0x4ed8aa4ae3418acb4ed8aa4ae3418acb + .octa 0x5b9cca4f7763e3735b9cca4f7763e373,\ + 0x5b9cca4f7763e3735b9cca4f7763e373 + .octa 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3,\ + 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3 + .octa 0x748f82ee5defb2fc748f82ee5defb2fc,\ + 0x748f82ee5defb2fc748f82ee5defb2fc + .octa 0x78a5636f43172f6078a5636f43172f60,\ + 0x78a5636f43172f6078a5636f43172f60 + .octa 0x84c87814a1f0ab7284c87814a1f0ab72,\ + 0x84c87814a1f0ab7284c87814a1f0ab72 + .octa 0x8cc702081a6439ec8cc702081a6439ec,\ + 0x8cc702081a6439ec8cc702081a6439ec + .octa 0x90befffa23631e2890befffa23631e28,\ + 0x90befffa23631e2890befffa23631e28 + .octa 0xa4506cebde82bde9a4506cebde82bde9,\ + 0xa4506cebde82bde9a4506cebde82bde9 + .octa 0xbef9a3f7b2c67915bef9a3f7b2c67915,\ + 0xbef9a3f7b2c67915bef9a3f7b2c67915 + .octa 0xc67178f2e372532bc67178f2e372532b,\ + 0xc67178f2e372532bc67178f2e372532b + .octa 0xca273eceea26619cca273eceea26619c,\ + 0xca273eceea26619cca273eceea26619c + .octa 0xd186b8c721c0c207d186b8c721c0c207,\ + 0xd186b8c721c0c207d186b8c721c0c207 + .octa 0xeada7dd6cde0eb1eeada7dd6cde0eb1e,\ + 0xeada7dd6cde0eb1eeada7dd6cde0eb1e + .octa 0xf57d4f7fee6ed178f57d4f7fee6ed178,\ + 0xf57d4f7fee6ed178f57d4f7fee6ed178 + .octa 0x06f067aa72176fba06f067aa72176fba,\ + 0x06f067aa72176fba06f067aa72176fba + .octa 0x0a637dc5a2c898a60a637dc5a2c898a6,\ + 0x0a637dc5a2c898a60a637dc5a2c898a6 + .octa 0x113f9804bef90dae113f9804bef90dae,\ + 0x113f9804bef90dae113f9804bef90dae + .octa 0x1b710b35131c471b1b710b35131c471b,\ + 0x1b710b35131c471b1b710b35131c471b + .octa 0x28db77f523047d8428db77f523047d84,\ + 0x28db77f523047d8428db77f523047d84 + .octa 0x32caab7b40c7249332caab7b40c72493,\ + 0x32caab7b40c7249332caab7b40c72493 + .octa 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc,\ + 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc + .octa 0x431d67c49c100d4c431d67c49c100d4c,\ + 0x431d67c49c100d4c431d67c49c100d4c + .octa 0x4cc5d4becb3e42b64cc5d4becb3e42b6,\ + 0x4cc5d4becb3e42b64cc5d4becb3e42b6 + .octa 0x597f299cfc657e2a597f299cfc657e2a,\ + 0x597f299cfc657e2a597f299cfc657e2a + .octa 0x5fcb6fab3ad6faec5fcb6fab3ad6faec,\ + 0x5fcb6fab3ad6faec5fcb6fab3ad6faec + .octa 0x6c44198c4a4758176c44198c4a475817,\ + 0x6c44198c4a4758176c44198c4a475817 + +PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607 + .octa 0x18191a1b1c1d1e1f1011121314151617 From 14009c4bde13d191206d58d0513a9179568a0125 Mon Sep 17 00:00:00 2001 From: Megha Dey Date: Mon, 27 Jun 2016 10:20:09 -0700 Subject: [PATCH 104/180] crypto: tcrypt - Add new mode for sha512_mb Add a new mode to calculate the speed of the sha512_mb algorithm Signed-off-by: Megha Dey Reviewed-by: Fenghua Yu Reviewed-by: Tim Chen Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 6ef78157a0ab0..158c164cbddf9 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1939,6 +1939,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m) test_mb_ahash_speed("sha256", sec, generic_hash_speed_template); if (mode > 400 && mode < 500) break; + case 424: + test_mb_ahash_speed("sha512", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + case 499: break; From f83f5b12ee674c187f329ef8eb9352dc45dcb25f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 28 Jun 2016 09:23:06 +0200 Subject: [PATCH 105/180] crypto: tcrypt - Fix mixing printk/pr_err and obvious indentation issues The recently added test_mb_ahash_speed() has clearly serious coding style issues. Try to fix some of them: 1. Don't mix pr_err() and printk(); 2. Don't wrap strings; 3. Properly align goto statement in if() block; 4. Align wrapped arguments on new line; 5. Don't wrap functions on first argument; Signed-off-by: Krzysztof Kozlowski Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 158c164cbddf9..8893ba5321b5e 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -608,12 +608,12 @@ static void test_mb_ahash_speed(const char *algo, unsigned int sec, req[i] = ahash_request_alloc(tfm, GFP_KERNEL); if (!req[i]) { - printk(KERN_ERR "alg: hash: Failed to allocate " - "request for %s\n", algo); + pr_err("alg: hash: Failed to allocate request for %s\n", + algo); goto out_noreq; } ahash_request_set_callback(req[i], CRYPTO_TFM_REQ_MAY_BACKLOG, - tcrypt_complete, &tresult[i]); + tcrypt_complete, &tresult[i]); hash_buff = xbuf[i][0]; memcpy(hash_buff, ptext, 4096); @@ -621,15 +621,14 @@ static void test_mb_ahash_speed(const char *algo, unsigned int sec, j = 0; - printk(KERN_INFO "\ntesting speed of %s (%s)\n", algo, - get_driver_name(crypto_ahash, tfm)); + pr_err("\ntesting speed of %s (%s)\n", algo, + get_driver_name(crypto_ahash, tfm)); for (i = 0; speed[i].blen != 0; i++) { if (speed[i].blen > TVMEMSIZE * PAGE_SIZE) { - printk(KERN_ERR - "template (%u) too big for tvmem (%lu)\n", - speed[i].blen, TVMEMSIZE * PAGE_SIZE); - goto out; + pr_err("template (%u) too big for tvmem (%lu)\n", + speed[i].blen, TVMEMSIZE * PAGE_SIZE); + goto out; } if (speed[i].klen) @@ -637,13 +636,12 @@ static void test_mb_ahash_speed(const char *algo, unsigned int sec, for (k = 0; k < 8; ++k) { sg_init_one(&sg[k][0], (void *) xbuf[k][0], - speed[i].blen); + speed[i].blen); ahash_request_set_crypt(req[k], sg[k], result[k], speed[i].blen); } - printk(KERN_INFO "test%3u " - "(%5u byte blocks,%5u bytes per update,%4u updates): ", + pr_err("test%3u (%5u byte blocks,%5u bytes per update,%4u updates): ", i, speed[i].blen, speed[i].plen, speed[i].blen / speed[i].plen); @@ -653,9 +651,8 @@ static void test_mb_ahash_speed(const char *algo, unsigned int sec, if (ret == -EBUSY || ret == -EINPROGRESS) continue; if (ret) { - printk(KERN_ERR - "alg (%s) something wrong, ret = %d ...\n", - algo, ret); + pr_err("alg (%s) something wrong, ret = %d ...\n", + algo, ret); goto out; } } @@ -664,11 +661,9 @@ static void test_mb_ahash_speed(const char *algo, unsigned int sec, for (k = 0; k < 8; ++k) { struct tcrypt_result *tr = &tresult[k]; - ret = wait_for_completion_interruptible - (&tr->completion); + ret = wait_for_completion_interruptible(&tr->completion); if (ret) - printk(KERN_ERR - "alg(%s): hash: digest failed\n", algo); + pr_err("alg(%s): hash: digest failed\n", algo); end[k] = get_cycles(); } From f8de55b6252b30f9bc1dcb62964f7b49e00983fa Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 28 Jun 2016 16:41:38 +0800 Subject: [PATCH 106/180] crypto: tcrypt - Use unsigned long for mb ahash cycle counter For the timescales we are working against there is no need to go beyond unsigned long. Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 8893ba5321b5e..59b7327226780 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -584,7 +584,7 @@ char result[8][64]; struct ahash_request *req[8]; struct tcrypt_result tresult[8]; char *xbuf[8][XBUFSIZE]; -cycles_t start[8], end[8], mid; +unsigned long start[8], end[8], mid; static void test_mb_ahash_speed(const char *algo, unsigned int sec, struct hash_speed *speed) @@ -593,6 +593,7 @@ static void test_mb_ahash_speed(const char *algo, unsigned int sec, void *hash_buff; int ret = -ENOMEM; struct crypto_ahash *tfm; + unsigned long cycles; tfm = crypto_alloc_ahash(algo, 0, 0); if (IS_ERR(tfm)) { @@ -667,10 +668,9 @@ static void test_mb_ahash_speed(const char *algo, unsigned int sec, end[k] = get_cycles(); } - printk("\nBlock: %lld cycles (%lld cycles/byte), %d bytes\n", - (s64) (end[7]-start[0])/1, - (s64) (end[7]-start[0])/(8*speed[i].blen), - 8*speed[i].blen); + cycles = end[7] - start[0]; + printk("\nBlock: %6lu cycles (%4lu cycles/byte)\n", + cycles, cycles / (8 * speed[i].blen)); } ret = 0; From 72259deb3a9f2c07d18d71d7c9356754e7d88369 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 28 Jun 2016 20:33:52 +0800 Subject: [PATCH 107/180] crypto: tcrypt - Fix memory leaks/crashes in multibuffer hash speed test This patch resolves a number of issues with the mb speed test function: * The tfm is never freed. * Memory is allocated even when we're not using mb. * When an error occurs we don't wait for completion for other requests. * When an error occurs during allocation we may leak memory. * The test function ignores plen but still runs for plen != blen. * The backlog flag is incorrectly used (may crash). This patch tries to resolve all these issues as well as making the code consistent with the existing hash speed testing function. Signed-off-by: Herbert Xu Tested-by: Krzysztof Kozlowski --- crypto/tcrypt.c | 129 ++++++++++++++++++++++++++---------------------- 1 file changed, 71 insertions(+), 58 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 59b7327226780..8a91dc34610e9 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -578,54 +578,61 @@ static inline int do_one_ahash_op(struct ahash_request *req, int ret) return ret; } -char ptext[4096]; -struct scatterlist sg[8][8]; -char result[8][64]; -struct ahash_request *req[8]; -struct tcrypt_result tresult[8]; -char *xbuf[8][XBUFSIZE]; -unsigned long start[8], end[8], mid; +struct test_mb_ahash_data { + struct scatterlist sg[TVMEMSIZE]; + char result[64]; + struct ahash_request *req; + struct tcrypt_result tresult; + char *xbuf[XBUFSIZE]; +}; static void test_mb_ahash_speed(const char *algo, unsigned int sec, - struct hash_speed *speed) + struct hash_speed *speed) { - unsigned int i, j, k; - void *hash_buff; - int ret = -ENOMEM; + struct test_mb_ahash_data *data; struct crypto_ahash *tfm; + unsigned long start, end; unsigned long cycles; + unsigned int i, j, k; + int ret; + + data = kzalloc(sizeof(*data) * 8, GFP_KERNEL); + if (!data) + return; tfm = crypto_alloc_ahash(algo, 0, 0); if (IS_ERR(tfm)) { pr_err("failed to load transform for %s: %ld\n", algo, PTR_ERR(tfm)); - return; + goto free_data; } + for (i = 0; i < 8; ++i) { - if (testmgr_alloc_buf(xbuf[i])) - goto out_nobuf; + if (testmgr_alloc_buf(data[i].xbuf)) + goto out; - init_completion(&tresult[i].completion); + init_completion(&data[i].tresult.completion); - req[i] = ahash_request_alloc(tfm, GFP_KERNEL); - if (!req[i]) { + data[i].req = ahash_request_alloc(tfm, GFP_KERNEL); + if (!data[i].req) { pr_err("alg: hash: Failed to allocate request for %s\n", algo); - goto out_noreq; + goto out; } - ahash_request_set_callback(req[i], CRYPTO_TFM_REQ_MAY_BACKLOG, - tcrypt_complete, &tresult[i]); - hash_buff = xbuf[i][0]; - memcpy(hash_buff, ptext, 4096); + ahash_request_set_callback(data[i].req, 0, + tcrypt_complete, &data[i].tresult); + test_hash_sg_init(data[i].sg); } - j = 0; - - pr_err("\ntesting speed of %s (%s)\n", algo, - get_driver_name(crypto_ahash, tfm)); + pr_info("\ntesting speed of multibuffer %s (%s)\n", algo, + get_driver_name(crypto_ahash, tfm)); for (i = 0; speed[i].blen != 0; i++) { + /* For some reason this only tests digests. */ + if (speed[i].blen != speed[i].plen) + continue; + if (speed[i].blen > TVMEMSIZE * PAGE_SIZE) { pr_err("template (%u) too big for tvmem (%lu)\n", speed[i].blen, TVMEMSIZE * PAGE_SIZE); @@ -635,53 +642,59 @@ static void test_mb_ahash_speed(const char *algo, unsigned int sec, if (speed[i].klen) crypto_ahash_setkey(tfm, tvmem[0], speed[i].klen); - for (k = 0; k < 8; ++k) { - sg_init_one(&sg[k][0], (void *) xbuf[k][0], - speed[i].blen); - ahash_request_set_crypt(req[k], sg[k], - result[k], speed[i].blen); - } + for (k = 0; k < 8; k++) + ahash_request_set_crypt(data[k].req, data[k].sg, + data[k].result, speed[i].blen); - pr_err("test%3u (%5u byte blocks,%5u bytes per update,%4u updates): ", + pr_info("test%3u " + "(%5u byte blocks,%5u bytes per update,%4u updates): ", i, speed[i].blen, speed[i].plen, speed[i].blen / speed[i].plen); - for (k = 0; k < 8; ++k) { - start[k] = get_cycles(); - ret = crypto_ahash_digest(req[k]); - if (ret == -EBUSY || ret == -EINPROGRESS) + start = get_cycles(); + + for (k = 0; k < 8; k++) { + ret = crypto_ahash_digest(data[k].req); + if (ret == -EINPROGRESS) continue; - if (ret) { - pr_err("alg (%s) something wrong, ret = %d ...\n", - algo, ret); - goto out; - } + + if (ret) + break; + + complete(&data[k].tresult.completion); + data[k].tresult.err = 0; } - mid = get_cycles(); - for (k = 0; k < 8; ++k) { - struct tcrypt_result *tr = &tresult[k]; + for (j = 0; j < k; j++) { + struct tcrypt_result *tr = &data[j].tresult; - ret = wait_for_completion_interruptible(&tr->completion); - if (ret) - pr_err("alg(%s): hash: digest failed\n", algo); - end[k] = get_cycles(); + wait_for_completion(&tr->completion); + if (tr->err) + ret = tr->err; } - cycles = end[7] - start[0]; - printk("\nBlock: %6lu cycles (%4lu cycles/byte)\n", - cycles, cycles / (8 * speed[i].blen)); + end = get_cycles(); + cycles = end - start; + pr_cont("%6lu cycles/operation, %4lu cycles/byte\n", + cycles, cycles / (8 * speed[i].blen)); + + if (ret) { + pr_err("At least one hashing failed ret=%d\n", ret); + break; + } } - ret = 0; out: for (k = 0; k < 8; ++k) - ahash_request_free(req[k]); -out_noreq: + ahash_request_free(data[k].req); + for (k = 0; k < 8; ++k) - testmgr_free_buf(xbuf[k]); -out_nobuf: - return; + testmgr_free_buf(data[k].xbuf); + + crypto_free_ahash(tfm); + +free_data: + kfree(data); } static int test_ahash_jiffies_digest(struct ahash_request *req, int blen, From c34252fd712c10da3a0b018b3b5dbe552ac1e7b9 Mon Sep 17 00:00:00 2001 From: Harsh Jain Date: Wed, 29 Jun 2016 00:24:43 +0530 Subject: [PATCH 108/180] crypto: authenc - Remove redundant sg_init_table call. Remove redundant sg_init_table call. scatterwalk_ffwd doing the same. Signed-off-by: Harsh Jain Signed-off-by: Herbert Xu --- crypto/authenc.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/crypto/authenc.c b/crypto/authenc.c index 55a354d572513..c7cc11d364805 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -206,7 +206,6 @@ static int crypto_authenc_encrypt(struct aead_request *req) struct scatterlist *src, *dst; int err; - sg_init_table(areq_ctx->src, 2); src = scatterwalk_ffwd(areq_ctx->src, req->src, req->assoclen); dst = src; @@ -215,7 +214,6 @@ static int crypto_authenc_encrypt(struct aead_request *req) if (err) return err; - sg_init_table(areq_ctx->dst, 2); dst = scatterwalk_ffwd(areq_ctx->dst, req->dst, req->assoclen); } @@ -251,14 +249,11 @@ static int crypto_authenc_decrypt_tail(struct aead_request *req, if (crypto_memneq(ihash, ahreq->result, authsize)) return -EBADMSG; - sg_init_table(areq_ctx->src, 2); src = scatterwalk_ffwd(areq_ctx->src, req->src, req->assoclen); dst = src; - if (req->src != req->dst) { - sg_init_table(areq_ctx->dst, 2); + if (req->src != req->dst) dst = scatterwalk_ffwd(areq_ctx->dst, req->dst, req->assoclen); - } ablkcipher_request_set_tfm(abreq, ctx->enc); ablkcipher_request_set_callback(abreq, aead_request_flags(req), From 927ef32dccfe6e048a163138b8e714b2d944194d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 18:03:46 +0800 Subject: [PATCH 109/180] crypto: authenc - Consider ahash ASYNC bit As it is, if you get an async ahash with a sync skcipher you'll end up with a sync authenc, which is wrong. This patch fixes it by considering the ASYNC bit from ahash as well. It also fixes a little bug where if a sync version of authenc is requested we may still end up using an async ahash. Neither of them should have any effect as none of the authenc users can request for a sync authenc. Signed-off-by: Herbert Xu --- crypto/authenc.c | 6 ++++-- crypto/authencesn.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/crypto/authenc.c b/crypto/authenc.c index c7cc11d364805..309fbc17222d0 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -392,7 +392,8 @@ static int crypto_authenc_create(struct crypto_template *tmpl, return -EINVAL; auth = ahash_attr_alg(tb[1], CRYPTO_ALG_TYPE_HASH, - CRYPTO_ALG_TYPE_AHASH_MASK); + CRYPTO_ALG_TYPE_AHASH_MASK | + crypto_requires_sync(algt->type, algt->mask)); if (IS_ERR(auth)) return PTR_ERR(auth); @@ -438,7 +439,8 @@ static int crypto_authenc_create(struct crypto_template *tmpl, enc->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) goto err_drop_enc; - inst->alg.base.cra_flags = enc->cra_flags & CRYPTO_ALG_ASYNC; + inst->alg.base.cra_flags = (auth_base->cra_flags | enc->cra_flags) & + CRYPTO_ALG_ASYNC; inst->alg.base.cra_priority = enc->cra_priority * 10 + auth_base->cra_priority; inst->alg.base.cra_blocksize = enc->cra_blocksize; diff --git a/crypto/authencesn.c b/crypto/authencesn.c index 0c0468869e25b..0662b1848c5d8 100644 --- a/crypto/authencesn.c +++ b/crypto/authencesn.c @@ -413,7 +413,8 @@ static int crypto_authenc_esn_create(struct crypto_template *tmpl, return -EINVAL; auth = ahash_attr_alg(tb[1], CRYPTO_ALG_TYPE_HASH, - CRYPTO_ALG_TYPE_AHASH_MASK); + CRYPTO_ALG_TYPE_AHASH_MASK | + crypto_requires_sync(algt->type, algt->mask)); if (IS_ERR(auth)) return PTR_ERR(auth); @@ -456,7 +457,8 @@ static int crypto_authenc_esn_create(struct crypto_template *tmpl, enc->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) goto err_drop_enc; - inst->alg.base.cra_flags = enc->cra_flags & CRYPTO_ALG_ASYNC; + inst->alg.base.cra_flags = (auth_base->cra_flags | enc->cra_flags) & + CRYPTO_ALG_ASYNC; inst->alg.base.cra_priority = enc->cra_priority * 10 + auth_base->cra_priority; inst->alg.base.cra_blocksize = enc->cra_blocksize; From 2495cf25f60e085b35beb9b215235dfe1ca4f200 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 18:03:47 +0800 Subject: [PATCH 110/180] crypto: ahash - Add padding in crypto_ahash_extsize The function crypto_ahash_extsize did not include padding when computing the tfm context size. This patch fixes this by using the generic crypto_alg_extsize helper. Signed-off-by: Herbert Xu --- crypto/ahash.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crypto/ahash.c b/crypto/ahash.c index 3887a98abcc3c..2ce8bcb9049c8 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -461,10 +461,10 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm) static unsigned int crypto_ahash_extsize(struct crypto_alg *alg) { - if (alg->cra_type == &crypto_ahash_type) - return alg->cra_ctxsize; + if (alg->cra_type != &crypto_ahash_type) + return sizeof(struct crypto_shash *); - return sizeof(struct crypto_shash *); + return crypto_alg_extsize(alg); } #ifdef CONFIG_NET From 7166e589da5b681e1f8ff2d1c491d7d71e9f7671 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 18:03:50 +0800 Subject: [PATCH 111/180] crypto: tcrypt - Use skcipher This patch converts tcrypt to use the new skcipher interface as opposed to ablkcipher/blkcipher. Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 241 +++++++++--------------------------------------- 1 file changed, 44 insertions(+), 197 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 8a91dc34610e9..68064fc9c8ee3 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -92,76 +93,6 @@ static void tcrypt_complete(struct crypto_async_request *req, int err) complete(&res->completion); } -static int test_cipher_jiffies(struct blkcipher_desc *desc, int enc, - struct scatterlist *sg, int blen, int secs) -{ - unsigned long start, end; - int bcount; - int ret; - - for (start = jiffies, end = start + secs * HZ, bcount = 0; - time_before(jiffies, end); bcount++) { - if (enc) - ret = crypto_blkcipher_encrypt(desc, sg, sg, blen); - else - ret = crypto_blkcipher_decrypt(desc, sg, sg, blen); - - if (ret) - return ret; - } - - printk("%d operations in %d seconds (%ld bytes)\n", - bcount, secs, (long)bcount * blen); - return 0; -} - -static int test_cipher_cycles(struct blkcipher_desc *desc, int enc, - struct scatterlist *sg, int blen) -{ - unsigned long cycles = 0; - int ret = 0; - int i; - - local_irq_disable(); - - /* Warm-up run. */ - for (i = 0; i < 4; i++) { - if (enc) - ret = crypto_blkcipher_encrypt(desc, sg, sg, blen); - else - ret = crypto_blkcipher_decrypt(desc, sg, sg, blen); - - if (ret) - goto out; - } - - /* The real thing. */ - for (i = 0; i < 8; i++) { - cycles_t start, end; - - start = get_cycles(); - if (enc) - ret = crypto_blkcipher_encrypt(desc, sg, sg, blen); - else - ret = crypto_blkcipher_decrypt(desc, sg, sg, blen); - end = get_cycles(); - - if (ret) - goto out; - - cycles += end - start; - } - -out: - local_irq_enable(); - - if (ret == 0) - printk("1 operation in %lu cycles (%d bytes)\n", - (cycles + 4) / 8, blen); - - return ret; -} - static inline int do_one_aead_op(struct aead_request *req, int ret) { if (ret == -EINPROGRESS || ret == -EBUSY) { @@ -455,106 +386,6 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, return; } -static void test_cipher_speed(const char *algo, int enc, unsigned int secs, - struct cipher_speed_template *template, - unsigned int tcount, u8 *keysize) -{ - unsigned int ret, i, j, iv_len; - const char *key; - char iv[128]; - struct crypto_blkcipher *tfm; - struct blkcipher_desc desc; - const char *e; - u32 *b_size; - - if (enc == ENCRYPT) - e = "encryption"; - else - e = "decryption"; - - tfm = crypto_alloc_blkcipher(algo, 0, CRYPTO_ALG_ASYNC); - - if (IS_ERR(tfm)) { - printk("failed to load transform for %s: %ld\n", algo, - PTR_ERR(tfm)); - return; - } - desc.tfm = tfm; - desc.flags = 0; - - printk(KERN_INFO "\ntesting speed of %s (%s) %s\n", algo, - get_driver_name(crypto_blkcipher, tfm), e); - - i = 0; - do { - - b_size = block_sizes; - do { - struct scatterlist sg[TVMEMSIZE]; - - if ((*keysize + *b_size) > TVMEMSIZE * PAGE_SIZE) { - printk("template (%u) too big for " - "tvmem (%lu)\n", *keysize + *b_size, - TVMEMSIZE * PAGE_SIZE); - goto out; - } - - printk("test %u (%d bit key, %d byte blocks): ", i, - *keysize * 8, *b_size); - - memset(tvmem[0], 0xff, PAGE_SIZE); - - /* set key, plain text and IV */ - key = tvmem[0]; - for (j = 0; j < tcount; j++) { - if (template[j].klen == *keysize) { - key = template[j].key; - break; - } - } - - ret = crypto_blkcipher_setkey(tfm, key, *keysize); - if (ret) { - printk("setkey() failed flags=%x\n", - crypto_blkcipher_get_flags(tfm)); - goto out; - } - - sg_init_table(sg, TVMEMSIZE); - sg_set_buf(sg, tvmem[0] + *keysize, - PAGE_SIZE - *keysize); - for (j = 1; j < TVMEMSIZE; j++) { - sg_set_buf(sg + j, tvmem[j], PAGE_SIZE); - memset (tvmem[j], 0xff, PAGE_SIZE); - } - - iv_len = crypto_blkcipher_ivsize(tfm); - if (iv_len) { - memset(&iv, 0xff, iv_len); - crypto_blkcipher_set_iv(tfm, iv, iv_len); - } - - if (secs) - ret = test_cipher_jiffies(&desc, enc, sg, - *b_size, secs); - else - ret = test_cipher_cycles(&desc, enc, sg, - *b_size); - - if (ret) { - printk("%s() failed flags=%x\n", e, desc.flags); - break; - } - b_size++; - i++; - } while (*b_size); - keysize++; - } while (*keysize); - -out: - crypto_free_blkcipher(tfm); -} - static void test_hash_sg_init(struct scatterlist *sg) { int i; @@ -932,7 +763,7 @@ static void test_hash_speed(const char *algo, unsigned int secs, return test_ahash_speed_common(algo, secs, speed, CRYPTO_ALG_ASYNC); } -static inline int do_one_acipher_op(struct ablkcipher_request *req, int ret) +static inline int do_one_acipher_op(struct skcipher_request *req, int ret) { if (ret == -EINPROGRESS || ret == -EBUSY) { struct tcrypt_result *tr = req->base.data; @@ -945,7 +776,7 @@ static inline int do_one_acipher_op(struct ablkcipher_request *req, int ret) return ret; } -static int test_acipher_jiffies(struct ablkcipher_request *req, int enc, +static int test_acipher_jiffies(struct skcipher_request *req, int enc, int blen, int secs) { unsigned long start, end; @@ -956,10 +787,10 @@ static int test_acipher_jiffies(struct ablkcipher_request *req, int enc, time_before(jiffies, end); bcount++) { if (enc) ret = do_one_acipher_op(req, - crypto_ablkcipher_encrypt(req)); + crypto_skcipher_encrypt(req)); else ret = do_one_acipher_op(req, - crypto_ablkcipher_decrypt(req)); + crypto_skcipher_decrypt(req)); if (ret) return ret; @@ -970,7 +801,7 @@ static int test_acipher_jiffies(struct ablkcipher_request *req, int enc, return 0; } -static int test_acipher_cycles(struct ablkcipher_request *req, int enc, +static int test_acipher_cycles(struct skcipher_request *req, int enc, int blen) { unsigned long cycles = 0; @@ -981,10 +812,10 @@ static int test_acipher_cycles(struct ablkcipher_request *req, int enc, for (i = 0; i < 4; i++) { if (enc) ret = do_one_acipher_op(req, - crypto_ablkcipher_encrypt(req)); + crypto_skcipher_encrypt(req)); else ret = do_one_acipher_op(req, - crypto_ablkcipher_decrypt(req)); + crypto_skcipher_decrypt(req)); if (ret) goto out; @@ -997,10 +828,10 @@ static int test_acipher_cycles(struct ablkcipher_request *req, int enc, start = get_cycles(); if (enc) ret = do_one_acipher_op(req, - crypto_ablkcipher_encrypt(req)); + crypto_skcipher_encrypt(req)); else ret = do_one_acipher_op(req, - crypto_ablkcipher_decrypt(req)); + crypto_skcipher_decrypt(req)); end = get_cycles(); if (ret) @@ -1017,16 +848,16 @@ static int test_acipher_cycles(struct ablkcipher_request *req, int enc, return ret; } -static void test_acipher_speed(const char *algo, int enc, unsigned int secs, - struct cipher_speed_template *template, - unsigned int tcount, u8 *keysize) +static void test_skcipher_speed(const char *algo, int enc, unsigned int secs, + struct cipher_speed_template *template, + unsigned int tcount, u8 *keysize, bool async) { unsigned int ret, i, j, k, iv_len; struct tcrypt_result tresult; const char *key; char iv[128]; - struct ablkcipher_request *req; - struct crypto_ablkcipher *tfm; + struct skcipher_request *req; + struct crypto_skcipher *tfm; const char *e; u32 *b_size; @@ -1037,7 +868,7 @@ static void test_acipher_speed(const char *algo, int enc, unsigned int secs, init_completion(&tresult.completion); - tfm = crypto_alloc_ablkcipher(algo, 0, 0); + tfm = crypto_alloc_skcipher(algo, 0, async ? 0 : CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) { pr_err("failed to load transform for %s: %ld\n", algo, @@ -1046,17 +877,17 @@ static void test_acipher_speed(const char *algo, int enc, unsigned int secs, } pr_info("\ntesting speed of async %s (%s) %s\n", algo, - get_driver_name(crypto_ablkcipher, tfm), e); + get_driver_name(crypto_skcipher, tfm), e); - req = ablkcipher_request_alloc(tfm, GFP_KERNEL); + req = skcipher_request_alloc(tfm, GFP_KERNEL); if (!req) { pr_err("tcrypt: skcipher: Failed to allocate request for %s\n", algo); goto out; } - ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, - tcrypt_complete, &tresult); + skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + tcrypt_complete, &tresult); i = 0; do { @@ -1086,12 +917,12 @@ static void test_acipher_speed(const char *algo, int enc, unsigned int secs, } } - crypto_ablkcipher_clear_flags(tfm, ~0); + crypto_skcipher_clear_flags(tfm, ~0); - ret = crypto_ablkcipher_setkey(tfm, key, *keysize); + ret = crypto_skcipher_setkey(tfm, key, *keysize); if (ret) { pr_err("setkey() failed flags=%x\n", - crypto_ablkcipher_get_flags(tfm)); + crypto_skcipher_get_flags(tfm)); goto out_free_req; } @@ -1115,11 +946,11 @@ static void test_acipher_speed(const char *algo, int enc, unsigned int secs, sg_set_buf(sg, tvmem[0] + *keysize, *b_size); } - iv_len = crypto_ablkcipher_ivsize(tfm); + iv_len = crypto_skcipher_ivsize(tfm); if (iv_len) memset(&iv, 0xff, iv_len); - ablkcipher_request_set_crypt(req, sg, sg, *b_size, iv); + skcipher_request_set_crypt(req, sg, sg, *b_size, iv); if (secs) ret = test_acipher_jiffies(req, enc, @@ -1130,7 +961,7 @@ static void test_acipher_speed(const char *algo, int enc, unsigned int secs, if (ret) { pr_err("%s() failed flags=%x\n", e, - crypto_ablkcipher_get_flags(tfm)); + crypto_skcipher_get_flags(tfm)); break; } b_size++; @@ -1140,9 +971,25 @@ static void test_acipher_speed(const char *algo, int enc, unsigned int secs, } while (*keysize); out_free_req: - ablkcipher_request_free(req); + skcipher_request_free(req); out: - crypto_free_ablkcipher(tfm); + crypto_free_skcipher(tfm); +} + +static void test_acipher_speed(const char *algo, int enc, unsigned int secs, + struct cipher_speed_template *template, + unsigned int tcount, u8 *keysize) +{ + return test_skcipher_speed(algo, enc, secs, template, tcount, keysize, + true); +} + +static void test_cipher_speed(const char *algo, int enc, unsigned int secs, + struct cipher_speed_template *template, + unsigned int tcount, u8 *keysize) +{ + return test_skcipher_speed(algo, enc, secs, template, tcount, keysize, + false); } static void test_available(void) From 02fa472afef2f619a16eb24117b90f9e7998c65f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 18:03:59 +0800 Subject: [PATCH 112/180] crypto: aesni - Use crypto_cipher to derive rfc4106 subkey Currently aesni uses an async ctr(aes) to derive the rfc4106 subkey, which was presumably copied over from the generic rfc4106 code. Over there it's done that way because we already have a ctr(aes) spawn. But it is simply overkill for aesni since we have to go get a ctr(aes) from scratch anyway. This patch simplifies the subkey derivation by using a straight aes cipher instead. Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 76 +++++------------------------- 1 file changed, 11 insertions(+), 65 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 9e15572ef06d6..0ab5ee1c26af0 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -59,17 +59,6 @@ struct aesni_rfc4106_gcm_ctx { u8 nonce[4]; }; -struct aesni_gcm_set_hash_subkey_result { - int err; - struct completion completion; -}; - -struct aesni_hash_subkey_req_data { - u8 iv[16]; - struct aesni_gcm_set_hash_subkey_result result; - struct scatterlist sg; -}; - struct aesni_lrw_ctx { struct lrw_table_ctx lrw_table; u8 raw_aes_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1]; @@ -809,71 +798,28 @@ static void rfc4106_exit(struct crypto_aead *aead) cryptd_free_aead(*ctx); } -static void -rfc4106_set_hash_subkey_done(struct crypto_async_request *req, int err) -{ - struct aesni_gcm_set_hash_subkey_result *result = req->data; - - if (err == -EINPROGRESS) - return; - result->err = err; - complete(&result->completion); -} - static int rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len) { - struct crypto_ablkcipher *ctr_tfm; - struct ablkcipher_request *req; - int ret = -EINVAL; - struct aesni_hash_subkey_req_data *req_data; + struct crypto_cipher *tfm; + int ret; - ctr_tfm = crypto_alloc_ablkcipher("ctr(aes)", 0, 0); - if (IS_ERR(ctr_tfm)) - return PTR_ERR(ctr_tfm); + tfm = crypto_alloc_cipher("aes", 0, 0); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); - ret = crypto_ablkcipher_setkey(ctr_tfm, key, key_len); + ret = crypto_cipher_setkey(tfm, key, key_len); if (ret) - goto out_free_ablkcipher; - - ret = -ENOMEM; - req = ablkcipher_request_alloc(ctr_tfm, GFP_KERNEL); - if (!req) - goto out_free_ablkcipher; - - req_data = kmalloc(sizeof(*req_data), GFP_KERNEL); - if (!req_data) - goto out_free_request; - - memset(req_data->iv, 0, sizeof(req_data->iv)); + goto out_free_cipher; /* Clear the data in the hash sub key container to zero.*/ /* We want to cipher all zeros to create the hash sub key. */ memset(hash_subkey, 0, RFC4106_HASH_SUBKEY_SIZE); - init_completion(&req_data->result.completion); - sg_init_one(&req_data->sg, hash_subkey, RFC4106_HASH_SUBKEY_SIZE); - ablkcipher_request_set_tfm(req, ctr_tfm); - ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP | - CRYPTO_TFM_REQ_MAY_BACKLOG, - rfc4106_set_hash_subkey_done, - &req_data->result); - - ablkcipher_request_set_crypt(req, &req_data->sg, - &req_data->sg, RFC4106_HASH_SUBKEY_SIZE, req_data->iv); - - ret = crypto_ablkcipher_encrypt(req); - if (ret == -EINPROGRESS || ret == -EBUSY) { - ret = wait_for_completion_interruptible - (&req_data->result.completion); - if (!ret) - ret = req_data->result.err; - } - kfree(req_data); -out_free_request: - ablkcipher_request_free(req); -out_free_ablkcipher: - crypto_free_ablkcipher(ctr_tfm); + crypto_cipher_encrypt_one(tfm, hash_subkey, hash_subkey); + +out_free_cipher: + crypto_free_cipher(tfm); return ret; } From 241118de58dace0fe24a754b6e2e3cb6f804ad47 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 18:04:01 +0800 Subject: [PATCH 113/180] crypto: ccp - Use skcipher for fallback This patch replaces use of the obsolete ablkcipher with skcipher. Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-crypto-aes-xts.c | 43 ++++++++++++------------- drivers/crypto/ccp/ccp-crypto.h | 3 +- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c index 0d0d4529ee360..58a4244b47529 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c @@ -14,9 +14,8 @@ #include #include #include -#include -#include #include +#include #include #include "ccp-crypto.h" @@ -110,15 +109,12 @@ static int ccp_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key, ctx->u.aes.key_len = key_len / 2; sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len); - return crypto_ablkcipher_setkey(ctx->u.aes.tfm_ablkcipher, key, - key_len); + return crypto_skcipher_setkey(ctx->u.aes.tfm_skcipher, key, key_len); } static int ccp_aes_xts_crypt(struct ablkcipher_request *req, unsigned int encrypt) { - struct crypto_tfm *tfm = - crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm); struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); unsigned int unit; @@ -146,14 +142,19 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req, if ((unit_size == CCP_XTS_AES_UNIT_SIZE__LAST) || (ctx->u.aes.key_len != AES_KEYSIZE_128)) { + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->u.aes.tfm_skcipher); + /* Use the fallback to process the request for any * unsupported unit sizes or key sizes */ - ablkcipher_request_set_tfm(req, ctx->u.aes.tfm_ablkcipher); - ret = (encrypt) ? crypto_ablkcipher_encrypt(req) : - crypto_ablkcipher_decrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); - + skcipher_request_set_tfm(subreq, ctx->u.aes.tfm_skcipher); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + ret = encrypt ? crypto_skcipher_encrypt(subreq) : + crypto_skcipher_decrypt(subreq); + skcipher_request_zero(subreq); return ret; } @@ -192,23 +193,21 @@ static int ccp_aes_xts_decrypt(struct ablkcipher_request *req) static int ccp_aes_xts_cra_init(struct crypto_tfm *tfm) { struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); - struct crypto_ablkcipher *fallback_tfm; + struct crypto_skcipher *fallback_tfm; ctx->complete = ccp_aes_xts_complete; ctx->u.aes.key_len = 0; - fallback_tfm = crypto_alloc_ablkcipher(crypto_tfm_alg_name(tfm), 0, - CRYPTO_ALG_ASYNC | - CRYPTO_ALG_NEED_FALLBACK); + fallback_tfm = crypto_alloc_skcipher("xts(aes)", 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(fallback_tfm)) { - pr_warn("could not load fallback driver %s\n", - crypto_tfm_alg_name(tfm)); + pr_warn("could not load fallback driver xts(aes)\n"); return PTR_ERR(fallback_tfm); } - ctx->u.aes.tfm_ablkcipher = fallback_tfm; + ctx->u.aes.tfm_skcipher = fallback_tfm; - tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx) + - fallback_tfm->base.crt_ablkcipher.reqsize; + tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx); return 0; } @@ -217,9 +216,7 @@ static void ccp_aes_xts_cra_exit(struct crypto_tfm *tfm) { struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); - if (ctx->u.aes.tfm_ablkcipher) - crypto_free_ablkcipher(ctx->u.aes.tfm_ablkcipher); - ctx->u.aes.tfm_ablkcipher = NULL; + crypto_free_skcipher(ctx->u.aes.tfm_skcipher); } static int ccp_register_aes_xts_alg(struct list_head *head, diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h index a326ec20bfa87..8335b32e815e4 100644 --- a/drivers/crypto/ccp/ccp-crypto.h +++ b/drivers/crypto/ccp/ccp-crypto.h @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -69,7 +68,7 @@ static inline struct ccp_crypto_ahash_alg * /***** AES related defines *****/ struct ccp_aes_ctx { /* Fallback cipher for XTS with unsupported unit sizes */ - struct crypto_ablkcipher *tfm_ablkcipher; + struct crypto_skcipher *tfm_skcipher; /* Cipher used to generate CMAC K1/K2 keys */ struct crypto_cipher *tfm_cipher; From 29406bb9239fcf235357b95ae670841e6589b453 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 18:04:02 +0800 Subject: [PATCH 114/180] crypto: mxs-dcp - Use skcipher for fallback This patch replaces use of the obsolete ablkcipher with skcipher. Signed-off-by: Herbert Xu --- drivers/crypto/mxs-dcp.c | 47 ++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index 59ed54e464a96..625ee50fd78bf 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -11,7 +11,6 @@ * http://www.gnu.org/copyleft/gpl.html */ -#include #include #include #include @@ -25,6 +24,7 @@ #include #include #include +#include #define DCP_MAX_CHANS 4 #define DCP_BUF_SZ PAGE_SIZE @@ -84,7 +84,7 @@ struct dcp_async_ctx { unsigned int hot:1; /* Crypto-specific context */ - struct crypto_ablkcipher *fallback; + struct crypto_skcipher *fallback; unsigned int key_len; uint8_t key[AES_KEYSIZE_128]; }; @@ -374,20 +374,22 @@ static int dcp_chan_thread_aes(void *data) static int mxs_dcp_block_fallback(struct ablkcipher_request *req, int enc) { - struct crypto_tfm *tfm = - crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); - struct dcp_async_ctx *ctx = crypto_ablkcipher_ctx( - crypto_ablkcipher_reqtfm(req)); + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct dcp_async_ctx *ctx = crypto_ablkcipher_ctx(tfm); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); int ret; - ablkcipher_request_set_tfm(req, ctx->fallback); + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); if (enc) - ret = crypto_ablkcipher_encrypt(req); + ret = crypto_skcipher_encrypt(subreq); else - ret = crypto_ablkcipher_decrypt(req); + ret = crypto_skcipher_decrypt(subreq); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + skcipher_request_zero(subreq); return ret; } @@ -453,28 +455,22 @@ static int mxs_dcp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, return 0; } - /* Check if the key size is supported by kernel at all. */ - if (len != AES_KEYSIZE_192 && len != AES_KEYSIZE_256) { - tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - /* * If the requested AES key size is not supported by the hardware, * but is supported by in-kernel software implementation, we use * software fallback. */ - actx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; - actx->fallback->base.crt_flags |= - tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK; + crypto_skcipher_clear_flags(actx->fallback, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(actx->fallback, + tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK); - ret = crypto_ablkcipher_setkey(actx->fallback, key, len); + ret = crypto_skcipher_setkey(actx->fallback, key, len); if (!ret) return 0; tfm->base.crt_flags &= ~CRYPTO_TFM_RES_MASK; - tfm->base.crt_flags |= - actx->fallback->base.crt_flags & CRYPTO_TFM_RES_MASK; + tfm->base.crt_flags |= crypto_skcipher_get_flags(actx->fallback) & + CRYPTO_TFM_RES_MASK; return ret; } @@ -484,9 +480,9 @@ static int mxs_dcp_aes_fallback_init(struct crypto_tfm *tfm) const char *name = crypto_tfm_alg_name(tfm); const uint32_t flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK; struct dcp_async_ctx *actx = crypto_tfm_ctx(tfm); - struct crypto_ablkcipher *blk; + struct crypto_skcipher *blk; - blk = crypto_alloc_ablkcipher(name, 0, flags); + blk = crypto_alloc_skcipher(name, 0, flags); if (IS_ERR(blk)) return PTR_ERR(blk); @@ -499,8 +495,7 @@ static void mxs_dcp_aes_fallback_exit(struct crypto_tfm *tfm) { struct dcp_async_ctx *actx = crypto_tfm_ctx(tfm); - crypto_free_ablkcipher(actx->fallback); - actx->fallback = NULL; + crypto_free_skcipher(actx->fallback); } /* From 1eb60ff82d822299b8b4fd685d1f85de87706c00 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 18:04:03 +0800 Subject: [PATCH 115/180] crypto: picoxcell - Use skcipher for fallback This patch replaces use of the obsolete ablkcipher with skcipher. Signed-off-by: Herbert Xu --- drivers/crypto/picoxcell_crypto.c | 60 ++++++++++++++++--------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c index 3b1c7ecf078fe..47576098831f9 100644 --- a/drivers/crypto/picoxcell_crypto.c +++ b/drivers/crypto/picoxcell_crypto.c @@ -171,7 +171,7 @@ struct spacc_ablk_ctx { * The fallback cipher. If the operation can't be done in hardware, * fallback to a software version. */ - struct crypto_ablkcipher *sw_cipher; + struct crypto_skcipher *sw_cipher; }; /* AEAD cipher context. */ @@ -789,33 +789,35 @@ static int spacc_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key, * request for any other size (192 bits) then we need to do a software * fallback. */ - if (len != AES_KEYSIZE_128 && len != AES_KEYSIZE_256 && - ctx->sw_cipher) { + if (len != AES_KEYSIZE_128 && len != AES_KEYSIZE_256) { + if (!ctx->sw_cipher) + return -EINVAL; + /* * Set the fallback transform to use the same request flags as * the hardware transform. */ - ctx->sw_cipher->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; - ctx->sw_cipher->base.crt_flags |= - cipher->base.crt_flags & CRYPTO_TFM_REQ_MASK; + crypto_skcipher_clear_flags(ctx->sw_cipher, + CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(ctx->sw_cipher, + cipher->base.crt_flags & + CRYPTO_TFM_REQ_MASK); + + err = crypto_skcipher_setkey(ctx->sw_cipher, key, len); + + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + tfm->crt_flags |= + crypto_skcipher_get_flags(ctx->sw_cipher) & + CRYPTO_TFM_RES_MASK; - err = crypto_ablkcipher_setkey(ctx->sw_cipher, key, len); if (err) goto sw_setkey_failed; - } else if (len != AES_KEYSIZE_128 && len != AES_KEYSIZE_256 && - !ctx->sw_cipher) - err = -EINVAL; + } memcpy(ctx->key, key, len); ctx->key_len = len; sw_setkey_failed: - if (err && ctx->sw_cipher) { - tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; - tfm->crt_flags |= - ctx->sw_cipher->base.crt_flags & CRYPTO_TFM_RES_MASK; - } - return err; } @@ -910,20 +912,21 @@ static int spacc_ablk_do_fallback(struct ablkcipher_request *req, struct crypto_tfm *old_tfm = crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(old_tfm); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->sw_cipher); int err; - if (!ctx->sw_cipher) - return -EINVAL; - /* * Change the request to use the software fallback transform, and once * the ciphering has completed, put the old transform back into the * request. */ - ablkcipher_request_set_tfm(req, ctx->sw_cipher); - err = is_encrypt ? crypto_ablkcipher_encrypt(req) : - crypto_ablkcipher_decrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(old_tfm)); + skcipher_request_set_tfm(subreq, ctx->sw_cipher); + skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + err = is_encrypt ? crypto_skcipher_encrypt(subreq) : + crypto_skcipher_decrypt(subreq); + skcipher_request_zero(subreq); return err; } @@ -1015,12 +1018,13 @@ static int spacc_ablk_cra_init(struct crypto_tfm *tfm) ctx->generic.flags = spacc_alg->type; ctx->generic.engine = engine; if (alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK) { - ctx->sw_cipher = crypto_alloc_ablkcipher(alg->cra_name, 0, - CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + ctx->sw_cipher = crypto_alloc_skcipher( + alg->cra_name, 0, CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(ctx->sw_cipher)) { dev_warn(engine->dev, "failed to allocate fallback for %s\n", alg->cra_name); - ctx->sw_cipher = NULL; + return PTR_ERR(ctx->sw_cipher); } } ctx->generic.key_offs = spacc_alg->key_offs; @@ -1035,9 +1039,7 @@ static void spacc_ablk_cra_exit(struct crypto_tfm *tfm) { struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm); - if (ctx->sw_cipher) - crypto_free_ablkcipher(ctx->sw_cipher); - ctx->sw_cipher = NULL; + crypto_free_skcipher(ctx->sw_cipher); } static int spacc_ablk_encrypt(struct ablkcipher_request *req) From 2d20ce070d3b78f0974408ef648223967d0efb0a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 18:04:04 +0800 Subject: [PATCH 116/180] crypto: qce - Use skcipher for fallback This patch replaces use of the obsolete ablkcipher with skcipher. Signed-off-by: Herbert Xu --- drivers/crypto/qce/ablkcipher.c | 27 ++++++++++++++++----------- drivers/crypto/qce/cipher.h | 2 +- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/crypto/qce/ablkcipher.c b/drivers/crypto/qce/ablkcipher.c index dbcbbe242bd67..b04b42f48366d 100644 --- a/drivers/crypto/qce/ablkcipher.c +++ b/drivers/crypto/qce/ablkcipher.c @@ -15,8 +15,8 @@ #include #include #include -#include #include +#include #include "cipher.h" @@ -189,7 +189,7 @@ static int qce_ablkcipher_setkey(struct crypto_ablkcipher *ablk, const u8 *key, memcpy(ctx->enc_key, key, keylen); return 0; fallback: - ret = crypto_ablkcipher_setkey(ctx->fallback, key, keylen); + ret = crypto_skcipher_setkey(ctx->fallback, key, keylen); if (!ret) ctx->enc_keylen = keylen; return ret; @@ -212,10 +212,16 @@ static int qce_ablkcipher_crypt(struct ablkcipher_request *req, int encrypt) if (IS_AES(rctx->flags) && ctx->enc_keylen != AES_KEYSIZE_128 && ctx->enc_keylen != AES_KEYSIZE_256) { - ablkcipher_request_set_tfm(req, ctx->fallback); - ret = encrypt ? crypto_ablkcipher_encrypt(req) : - crypto_ablkcipher_decrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + ret = encrypt ? crypto_skcipher_encrypt(subreq) : + crypto_skcipher_decrypt(subreq); + skcipher_request_zero(subreq); return ret; } @@ -239,10 +245,9 @@ static int qce_ablkcipher_init(struct crypto_tfm *tfm) memset(ctx, 0, sizeof(*ctx)); tfm->crt_ablkcipher.reqsize = sizeof(struct qce_cipher_reqctx); - ctx->fallback = crypto_alloc_ablkcipher(crypto_tfm_alg_name(tfm), - CRYPTO_ALG_TYPE_ABLKCIPHER, - CRYPTO_ALG_ASYNC | - CRYPTO_ALG_NEED_FALLBACK); + ctx->fallback = crypto_alloc_skcipher(crypto_tfm_alg_name(tfm), 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(ctx->fallback)) return PTR_ERR(ctx->fallback); @@ -253,7 +258,7 @@ static void qce_ablkcipher_exit(struct crypto_tfm *tfm) { struct qce_cipher_ctx *ctx = crypto_tfm_ctx(tfm); - crypto_free_ablkcipher(ctx->fallback); + crypto_free_skcipher(ctx->fallback); } struct qce_ablkcipher_def { diff --git a/drivers/crypto/qce/cipher.h b/drivers/crypto/qce/cipher.h index 5c6a5f8633e5d..2b0278bb6e923 100644 --- a/drivers/crypto/qce/cipher.h +++ b/drivers/crypto/qce/cipher.h @@ -22,7 +22,7 @@ struct qce_cipher_ctx { u8 enc_key[QCE_MAX_KEY_SIZE]; unsigned int enc_keylen; - struct crypto_ablkcipher *fallback; + struct crypto_skcipher *fallback; }; /** From 678adecd117f953cd21088064d95ceffac79a2a6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 18:04:05 +0800 Subject: [PATCH 117/180] crypto: sahara - Use skcipher for fallback This patch replaces use of the obsolete ablkcipher with skcipher. It also removes shash_fallback which is totally unused. Signed-off-by: Herbert Xu --- drivers/crypto/sahara.c | 112 ++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 62 deletions(-) diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c index c3f3d89e4831c..0c49956ee0cea 100644 --- a/drivers/crypto/sahara.c +++ b/drivers/crypto/sahara.c @@ -14,10 +14,9 @@ * Based on omap-aes.c and tegra-aes.c */ -#include #include -#include #include +#include #include #include @@ -150,10 +149,7 @@ struct sahara_ctx { /* AES-specific context */ int keylen; u8 key[AES_KEYSIZE_128]; - struct crypto_ablkcipher *fallback; - - /* SHA-specific context */ - struct crypto_shash *shash_fallback; + struct crypto_skcipher *fallback; }; struct sahara_aes_reqctx { @@ -620,25 +616,21 @@ static int sahara_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, return 0; } - if (keylen != AES_KEYSIZE_128 && - keylen != AES_KEYSIZE_192 && keylen != AES_KEYSIZE_256) + if (keylen != AES_KEYSIZE_192 && keylen != AES_KEYSIZE_256) return -EINVAL; /* * The requested key size is not supported by HW, do a fallback. */ - ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; - ctx->fallback->base.crt_flags |= - (tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK); + crypto_skcipher_clear_flags(ctx->fallback, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(ctx->fallback, tfm->base.crt_flags & + CRYPTO_TFM_REQ_MASK); - ret = crypto_ablkcipher_setkey(ctx->fallback, key, keylen); - if (ret) { - struct crypto_tfm *tfm_aux = crypto_ablkcipher_tfm(tfm); + ret = crypto_skcipher_setkey(ctx->fallback, key, keylen); - tfm_aux->crt_flags &= ~CRYPTO_TFM_RES_MASK; - tfm_aux->crt_flags |= - (ctx->fallback->base.crt_flags & CRYPTO_TFM_RES_MASK); - } + tfm->base.crt_flags &= ~CRYPTO_TFM_RES_MASK; + tfm->base.crt_flags |= crypto_skcipher_get_flags(ctx->fallback) & + CRYPTO_TFM_RES_MASK; return ret; } @@ -670,16 +662,20 @@ static int sahara_aes_crypt(struct ablkcipher_request *req, unsigned long mode) static int sahara_aes_ecb_encrypt(struct ablkcipher_request *req) { - struct crypto_tfm *tfm = - crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct sahara_ctx *ctx = crypto_ablkcipher_ctx( crypto_ablkcipher_reqtfm(req)); int err; if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { - ablkcipher_request_set_tfm(req, ctx->fallback); - err = crypto_ablkcipher_encrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + err = crypto_skcipher_encrypt(subreq); + skcipher_request_zero(subreq); return err; } @@ -688,16 +684,20 @@ static int sahara_aes_ecb_encrypt(struct ablkcipher_request *req) static int sahara_aes_ecb_decrypt(struct ablkcipher_request *req) { - struct crypto_tfm *tfm = - crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct sahara_ctx *ctx = crypto_ablkcipher_ctx( crypto_ablkcipher_reqtfm(req)); int err; if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { - ablkcipher_request_set_tfm(req, ctx->fallback); - err = crypto_ablkcipher_decrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + err = crypto_skcipher_decrypt(subreq); + skcipher_request_zero(subreq); return err; } @@ -706,16 +706,20 @@ static int sahara_aes_ecb_decrypt(struct ablkcipher_request *req) static int sahara_aes_cbc_encrypt(struct ablkcipher_request *req) { - struct crypto_tfm *tfm = - crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct sahara_ctx *ctx = crypto_ablkcipher_ctx( crypto_ablkcipher_reqtfm(req)); int err; if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { - ablkcipher_request_set_tfm(req, ctx->fallback); - err = crypto_ablkcipher_encrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + err = crypto_skcipher_encrypt(subreq); + skcipher_request_zero(subreq); return err; } @@ -724,16 +728,20 @@ static int sahara_aes_cbc_encrypt(struct ablkcipher_request *req) static int sahara_aes_cbc_decrypt(struct ablkcipher_request *req) { - struct crypto_tfm *tfm = - crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct sahara_ctx *ctx = crypto_ablkcipher_ctx( crypto_ablkcipher_reqtfm(req)); int err; if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { - ablkcipher_request_set_tfm(req, ctx->fallback); - err = crypto_ablkcipher_decrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + err = crypto_skcipher_decrypt(subreq); + skcipher_request_zero(subreq); return err; } @@ -745,8 +753,9 @@ static int sahara_aes_cra_init(struct crypto_tfm *tfm) const char *name = crypto_tfm_alg_name(tfm); struct sahara_ctx *ctx = crypto_tfm_ctx(tfm); - ctx->fallback = crypto_alloc_ablkcipher(name, 0, - CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + ctx->fallback = crypto_alloc_skcipher(name, 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(ctx->fallback)) { pr_err("Error allocating fallback algo %s\n", name); return PTR_ERR(ctx->fallback); @@ -761,9 +770,7 @@ static void sahara_aes_cra_exit(struct crypto_tfm *tfm) { struct sahara_ctx *ctx = crypto_tfm_ctx(tfm); - if (ctx->fallback) - crypto_free_ablkcipher(ctx->fallback); - ctx->fallback = NULL; + crypto_free_skcipher(ctx->fallback); } static u32 sahara_sha_init_hdr(struct sahara_dev *dev, @@ -1180,15 +1187,6 @@ static int sahara_sha_import(struct ahash_request *req, const void *in) static int sahara_sha_cra_init(struct crypto_tfm *tfm) { - const char *name = crypto_tfm_alg_name(tfm); - struct sahara_ctx *ctx = crypto_tfm_ctx(tfm); - - ctx->shash_fallback = crypto_alloc_shash(name, 0, - CRYPTO_ALG_NEED_FALLBACK); - if (IS_ERR(ctx->shash_fallback)) { - pr_err("Error allocating fallback algo %s\n", name); - return PTR_ERR(ctx->shash_fallback); - } crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), sizeof(struct sahara_sha_reqctx) + SHA_BUFFER_LEN + SHA256_BLOCK_SIZE); @@ -1196,14 +1194,6 @@ static int sahara_sha_cra_init(struct crypto_tfm *tfm) return 0; } -static void sahara_sha_cra_exit(struct crypto_tfm *tfm) -{ - struct sahara_ctx *ctx = crypto_tfm_ctx(tfm); - - crypto_free_shash(ctx->shash_fallback); - ctx->shash_fallback = NULL; -} - static struct crypto_alg aes_algs[] = { { .cra_name = "ecb(aes)", @@ -1272,7 +1262,6 @@ static struct ahash_alg sha_v3_algs[] = { .cra_alignmask = 0, .cra_module = THIS_MODULE, .cra_init = sahara_sha_cra_init, - .cra_exit = sahara_sha_cra_exit, } }, }; @@ -1300,7 +1289,6 @@ static struct ahash_alg sha_v4_algs[] = { .cra_alignmask = 0, .cra_module = THIS_MODULE, .cra_init = sahara_sha_cra_init, - .cra_exit = sahara_sha_cra_exit, } }, }; From 64e26807bb93b4accaa395d98e118ab893cac074 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 18:04:07 +0800 Subject: [PATCH 118/180] crypto: s390/aes - Use skcipher for fallback This patch replaces use of the obsolete blkcipher with skcipher. Signed-off-by: Herbert Xu --- arch/s390/crypto/aes_s390.c | 113 +++++++++++++++++++----------------- 1 file changed, 60 insertions(+), 53 deletions(-) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 7554a8bb2adc5..2ea18b050309a 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -44,7 +45,7 @@ struct s390_aes_ctx { long dec; int key_len; union { - struct crypto_blkcipher *blk; + struct crypto_skcipher *blk; struct crypto_cipher *cip; } fallback; }; @@ -63,7 +64,7 @@ struct s390_xts_ctx { long enc; long dec; int key_len; - struct crypto_blkcipher *fallback; + struct crypto_skcipher *fallback; }; /* @@ -237,16 +238,16 @@ static int setkey_fallback_blk(struct crypto_tfm *tfm, const u8 *key, struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); unsigned int ret; - sctx->fallback.blk->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; - sctx->fallback.blk->base.crt_flags |= (tfm->crt_flags & - CRYPTO_TFM_REQ_MASK); + crypto_skcipher_clear_flags(sctx->fallback.blk, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(sctx->fallback.blk, tfm->crt_flags & + CRYPTO_TFM_REQ_MASK); + + ret = crypto_skcipher_setkey(sctx->fallback.blk, key, len); + + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + tfm->crt_flags |= crypto_skcipher_get_flags(sctx->fallback.blk) & + CRYPTO_TFM_RES_MASK; - ret = crypto_blkcipher_setkey(sctx->fallback.blk, key, len); - if (ret) { - tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; - tfm->crt_flags |= (sctx->fallback.blk->base.crt_flags & - CRYPTO_TFM_RES_MASK); - } return ret; } @@ -255,15 +256,17 @@ static int fallback_blk_dec(struct blkcipher_desc *desc, unsigned int nbytes) { unsigned int ret; - struct crypto_blkcipher *tfm; - struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_blkcipher *tfm = desc->tfm; + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm); + SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk); - tfm = desc->tfm; - desc->tfm = sctx->fallback.blk; + skcipher_request_set_tfm(req, sctx->fallback.blk); + skcipher_request_set_callback(req, desc->flags, NULL, NULL); + skcipher_request_set_crypt(req, src, dst, nbytes, desc->info); - ret = crypto_blkcipher_decrypt_iv(desc, dst, src, nbytes); + ret = crypto_skcipher_decrypt(req); - desc->tfm = tfm; + skcipher_request_zero(req); return ret; } @@ -272,15 +275,15 @@ static int fallback_blk_enc(struct blkcipher_desc *desc, unsigned int nbytes) { unsigned int ret; - struct crypto_blkcipher *tfm; - struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_blkcipher *tfm = desc->tfm; + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm); + SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk); - tfm = desc->tfm; - desc->tfm = sctx->fallback.blk; + skcipher_request_set_tfm(req, sctx->fallback.blk); + skcipher_request_set_callback(req, desc->flags, NULL, NULL); + skcipher_request_set_crypt(req, src, dst, nbytes, desc->info); - ret = crypto_blkcipher_encrypt_iv(desc, dst, src, nbytes); - - desc->tfm = tfm; + ret = crypto_skcipher_encrypt(req); return ret; } @@ -370,8 +373,9 @@ static int fallback_init_blk(struct crypto_tfm *tfm) const char *name = tfm->__crt_alg->cra_name; struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); - sctx->fallback.blk = crypto_alloc_blkcipher(name, 0, - CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + sctx->fallback.blk = crypto_alloc_skcipher(name, 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(sctx->fallback.blk)) { pr_err("Allocating AES fallback algorithm %s failed\n", @@ -386,8 +390,7 @@ static void fallback_exit_blk(struct crypto_tfm *tfm) { struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); - crypto_free_blkcipher(sctx->fallback.blk); - sctx->fallback.blk = NULL; + crypto_free_skcipher(sctx->fallback.blk); } static struct crypto_alg ecb_aes_alg = { @@ -536,16 +539,16 @@ static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key, struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); unsigned int ret; - xts_ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; - xts_ctx->fallback->base.crt_flags |= (tfm->crt_flags & - CRYPTO_TFM_REQ_MASK); + crypto_skcipher_clear_flags(xts_ctx->fallback, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(xts_ctx->fallback, tfm->crt_flags & + CRYPTO_TFM_REQ_MASK); + + ret = crypto_skcipher_setkey(xts_ctx->fallback, key, len); + + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + tfm->crt_flags |= crypto_skcipher_get_flags(xts_ctx->fallback) & + CRYPTO_TFM_RES_MASK; - ret = crypto_blkcipher_setkey(xts_ctx->fallback, key, len); - if (ret) { - tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; - tfm->crt_flags |= (xts_ctx->fallback->base.crt_flags & - CRYPTO_TFM_RES_MASK); - } return ret; } @@ -553,16 +556,18 @@ static int xts_fallback_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); - struct crypto_blkcipher *tfm; + struct crypto_blkcipher *tfm = desc->tfm; + struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm); + SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback); unsigned int ret; - tfm = desc->tfm; - desc->tfm = xts_ctx->fallback; + skcipher_request_set_tfm(req, xts_ctx->fallback); + skcipher_request_set_callback(req, desc->flags, NULL, NULL); + skcipher_request_set_crypt(req, src, dst, nbytes, desc->info); - ret = crypto_blkcipher_decrypt_iv(desc, dst, src, nbytes); + ret = crypto_skcipher_decrypt(req); - desc->tfm = tfm; + skcipher_request_zero(req); return ret; } @@ -570,16 +575,18 @@ static int xts_fallback_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); - struct crypto_blkcipher *tfm; + struct crypto_blkcipher *tfm = desc->tfm; + struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm); + SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback); unsigned int ret; - tfm = desc->tfm; - desc->tfm = xts_ctx->fallback; + skcipher_request_set_tfm(req, xts_ctx->fallback); + skcipher_request_set_callback(req, desc->flags, NULL, NULL); + skcipher_request_set_crypt(req, src, dst, nbytes, desc->info); - ret = crypto_blkcipher_encrypt_iv(desc, dst, src, nbytes); + ret = crypto_skcipher_encrypt(req); - desc->tfm = tfm; + skcipher_request_zero(req); return ret; } @@ -700,8 +707,9 @@ static int xts_fallback_init(struct crypto_tfm *tfm) const char *name = tfm->__crt_alg->cra_name; struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); - xts_ctx->fallback = crypto_alloc_blkcipher(name, 0, - CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + xts_ctx->fallback = crypto_alloc_skcipher(name, 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(xts_ctx->fallback)) { pr_err("Allocating XTS fallback algorithm %s failed\n", @@ -715,8 +723,7 @@ static void xts_fallback_exit(struct crypto_tfm *tfm) { struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); - crypto_free_blkcipher(xts_ctx->fallback); - xts_ctx->fallback = NULL; + crypto_free_skcipher(xts_ctx->fallback); } static struct crypto_alg xts_aes_alg = { From 32f27c745c26ff4b6351bce265cba049a2c74de5 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 18:04:13 +0800 Subject: [PATCH 119/180] crypto: api - Add crypto_inst_setname This patch adds the helper crypto_inst_setname because the current helper crypto_alloc_instance2 is no longer useful given that we now look up the algorithm after we allocate the instance object. Signed-off-by: Herbert Xu --- crypto/algapi.c | 24 +++++++++++++++++------- include/crypto/algapi.h | 2 ++ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/crypto/algapi.c b/crypto/algapi.c index 731255a6104f7..df939b54b09f7 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -811,6 +811,21 @@ int crypto_attr_u32(struct rtattr *rta, u32 *num) } EXPORT_SYMBOL_GPL(crypto_attr_u32); +int crypto_inst_setname(struct crypto_instance *inst, const char *name, + struct crypto_alg *alg) +{ + if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", name, + alg->cra_name) >= CRYPTO_MAX_ALG_NAME) + return -ENAMETOOLONG; + + if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", + name, alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) + return -ENAMETOOLONG; + + return 0; +} +EXPORT_SYMBOL_GPL(crypto_inst_setname); + void *crypto_alloc_instance2(const char *name, struct crypto_alg *alg, unsigned int head) { @@ -825,13 +840,8 @@ void *crypto_alloc_instance2(const char *name, struct crypto_alg *alg, inst = (void *)(p + head); - err = -ENAMETOOLONG; - if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", name, - alg->cra_name) >= CRYPTO_MAX_ALG_NAME) - goto err_free_inst; - - if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", - name, alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) + err = crypto_inst_setname(inst, name, alg); + if (err) goto err_free_inst; return p; diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index eeafd21afb446..0483f652ac279 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -244,6 +244,8 @@ static inline struct crypto_alg *crypto_attr_alg(struct rtattr *rta, } int crypto_attr_u32(struct rtattr *rta, u32 *num); +int crypto_inst_setname(struct crypto_instance *inst, const char *name, + struct crypto_alg *alg); void *crypto_alloc_instance2(const char *name, struct crypto_alg *alg, unsigned int head); struct crypto_instance *crypto_alloc_instance(const char *name, From 1503a24f53f153f8c88be9810bc73efaf6853e1c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 18:04:14 +0800 Subject: [PATCH 120/180] crypto: tcrypt - Add speed test for cts This patch adds speed tests for cts(cbc(aes)). Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 68064fc9c8ee3..11aedae02382d 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1389,6 +1389,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m) speed_template_32_48_64); test_cipher_speed("xts(aes)", DECRYPT, sec, NULL, 0, speed_template_32_48_64); + test_cipher_speed("cts(cbc(aes))", ENCRYPT, sec, NULL, 0, + speed_template_16_24_32); + test_cipher_speed("cts(cbc(aes))", DECRYPT, sec, NULL, 0, + speed_template_16_24_32); test_cipher_speed("ctr(aes)", ENCRYPT, sec, NULL, 0, speed_template_16_24_32); test_cipher_speed("ctr(aes)", DECRYPT, sec, NULL, 0, @@ -1818,6 +1822,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m) speed_template_32_48_64); test_acipher_speed("xts(aes)", DECRYPT, sec, NULL, 0, speed_template_32_48_64); + test_acipher_speed("cts(cbc(aes))", ENCRYPT, sec, NULL, 0, + speed_template_16_24_32); + test_acipher_speed("cts(cbc(aes))", DECRYPT, sec, NULL, 0, + speed_template_16_24_32); test_acipher_speed("ctr(aes)", ENCRYPT, sec, NULL, 0, speed_template_16_24_32); test_acipher_speed("ctr(aes)", DECRYPT, sec, NULL, 0, From 50d2b643ea6675927435743633a57c2a9cfd8d83 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 19:32:20 +0800 Subject: [PATCH 121/180] crypto: testmgr - Allow leading zeros in RSA This patch allows RSA implementations to produce output with leading zeroes. testmgr will skip leading zeroes when comparing the output. This patch also tries to make the RSA test function generic enough to potentially handle other akcipher algorithms. Signed-off-by: Herbert Xu --- crypto/testmgr.c | 51 +++++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 537fdc380a7b8..38e23be315a0b 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -1911,8 +1911,8 @@ static int alg_test_kpp(const struct alg_test_desc *desc, const char *driver, return err; } -static int do_test_rsa(struct crypto_akcipher *tfm, - struct akcipher_testvec *vecs) +static int test_akcipher_one(struct crypto_akcipher *tfm, + struct akcipher_testvec *vecs) { char *xbuf[XBUFSIZE]; struct akcipher_request *req; @@ -1963,17 +1963,18 @@ static int do_test_rsa(struct crypto_akcipher *tfm, /* Run RSA encrypt - c = m^e mod n;*/ err = wait_async_op(&result, crypto_akcipher_encrypt(req)); if (err) { - pr_err("alg: rsa: encrypt test failed. err %d\n", err); + pr_err("alg: akcipher: encrypt test failed. err %d\n", err); goto free_all; } if (req->dst_len != vecs->c_size) { - pr_err("alg: rsa: encrypt test failed. Invalid output len\n"); + pr_err("alg: akcipher: encrypt test failed. Invalid output len\n"); err = -EINVAL; goto free_all; } /* verify that encrypted message is equal to expected */ if (memcmp(vecs->c, outbuf_enc, vecs->c_size)) { - pr_err("alg: rsa: encrypt test failed. Invalid output\n"); + pr_err("alg: akcipher: encrypt test failed. Invalid output\n"); + hexdump(outbuf_enc, vecs->c_size); err = -EINVAL; goto free_all; } @@ -2001,18 +2002,22 @@ static int do_test_rsa(struct crypto_akcipher *tfm, /* Run RSA decrypt - m = c^d mod n;*/ err = wait_async_op(&result, crypto_akcipher_decrypt(req)); if (err) { - pr_err("alg: rsa: decrypt test failed. err %d\n", err); + pr_err("alg: akcipher: decrypt test failed. err %d\n", err); goto free_all; } out_len = req->dst_len; - if (out_len != vecs->m_size) { - pr_err("alg: rsa: decrypt test failed. Invalid output len\n"); + if (out_len < vecs->m_size) { + pr_err("alg: akcipher: decrypt test failed. " + "Invalid output len %u\n", out_len); err = -EINVAL; goto free_all; } /* verify that decrypted message is equal to the original msg */ - if (memcmp(vecs->m, outbuf_dec, vecs->m_size)) { - pr_err("alg: rsa: decrypt test failed. Invalid output\n"); + if (memchr_inv(outbuf_dec, 0, out_len - vecs->m_size) || + memcmp(vecs->m, outbuf_dec + out_len - vecs->m_size, + vecs->m_size)) { + pr_err("alg: akcipher: decrypt test failed. Invalid output\n"); + hexdump(outbuf_dec, out_len); err = -EINVAL; } free_all: @@ -2025,28 +2030,20 @@ static int do_test_rsa(struct crypto_akcipher *tfm, return err; } -static int test_rsa(struct crypto_akcipher *tfm, struct akcipher_testvec *vecs, - unsigned int tcount) +static int test_akcipher(struct crypto_akcipher *tfm, const char *alg, + struct akcipher_testvec *vecs, unsigned int tcount) { int ret, i; for (i = 0; i < tcount; i++) { - ret = do_test_rsa(tfm, vecs++); - if (ret) { - pr_err("alg: rsa: test failed on vector %d, err=%d\n", - i + 1, ret); - return ret; - } - } - return 0; -} - -static int test_akcipher(struct crypto_akcipher *tfm, const char *alg, - struct akcipher_testvec *vecs, unsigned int tcount) -{ - if (strncmp(alg, "rsa", 3) == 0) - return test_rsa(tfm, vecs, tcount); + ret = test_akcipher_one(tfm, vecs++); + if (!ret) + continue; + pr_err("alg: akcipher: test failed on vector %d, err=%d\n", + i + 1, ret); + return ret; + } return 0; } From 9b45b7bba3d22de52e09df63c50f390a193a3f53 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 19:32:21 +0800 Subject: [PATCH 122/180] crypto: rsa - Generate fixed-length output Every implementation of RSA that we have naturally generates output with leading zeroes. The one and only user of RSA, pkcs1pad wants to have those leading zeroes in place, in fact because they are currently absent it has to write those zeroes itself. So we shouldn't be stripping leading zeroes in the first place. In fact this patch makes rsa-generic produce output with fixed length so that pkcs1pad does not need to do any extra work. This patch also changes DH to use the new interface. Signed-off-by: Herbert Xu --- crypto/dh.c | 2 +- crypto/rsa.c | 8 +++---- include/linux/mpi.h | 2 +- lib/mpi/mpicoder.c | 55 +++++++++++++++++++++------------------------ 4 files changed, 32 insertions(+), 35 deletions(-) diff --git a/crypto/dh.c b/crypto/dh.c index 5e960fe28681d..9d19360e71897 100644 --- a/crypto/dh.c +++ b/crypto/dh.c @@ -129,7 +129,7 @@ static int dh_compute_value(struct kpp_request *req) if (ret) goto err_free_base; - ret = mpi_write_to_sgl(val, req->dst, &req->dst_len, &sign); + ret = mpi_write_to_sgl(val, req->dst, req->dst_len, &sign); if (ret) goto err_free_base; diff --git a/crypto/rsa.c b/crypto/rsa.c index dc692d43b6662..4c280b6a3ea9c 100644 --- a/crypto/rsa.c +++ b/crypto/rsa.c @@ -108,7 +108,7 @@ static int rsa_enc(struct akcipher_request *req) if (ret) goto err_free_m; - ret = mpi_write_to_sgl(c, req->dst, &req->dst_len, &sign); + ret = mpi_write_to_sgl(c, req->dst, req->dst_len, &sign); if (ret) goto err_free_m; @@ -147,7 +147,7 @@ static int rsa_dec(struct akcipher_request *req) if (ret) goto err_free_c; - ret = mpi_write_to_sgl(m, req->dst, &req->dst_len, &sign); + ret = mpi_write_to_sgl(m, req->dst, req->dst_len, &sign); if (ret) goto err_free_c; @@ -185,7 +185,7 @@ static int rsa_sign(struct akcipher_request *req) if (ret) goto err_free_m; - ret = mpi_write_to_sgl(s, req->dst, &req->dst_len, &sign); + ret = mpi_write_to_sgl(s, req->dst, req->dst_len, &sign); if (ret) goto err_free_m; @@ -226,7 +226,7 @@ static int rsa_verify(struct akcipher_request *req) if (ret) goto err_free_s; - ret = mpi_write_to_sgl(m, req->dst, &req->dst_len, &sign); + ret = mpi_write_to_sgl(m, req->dst, req->dst_len, &sign); if (ret) goto err_free_s; diff --git a/include/linux/mpi.h b/include/linux/mpi.h index f219559e5e800..1cc5ffb769afd 100644 --- a/include/linux/mpi.h +++ b/include/linux/mpi.h @@ -80,7 +80,7 @@ void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign); int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes, int *sign); void *mpi_get_secure_buffer(MPI a, unsigned *nbytes, int *sign); -int mpi_write_to_sgl(MPI a, struct scatterlist *sg, unsigned *nbytes, +int mpi_write_to_sgl(MPI a, struct scatterlist *sg, unsigned nbytes, int *sign); #define log_mpidump g10_log_mpidump diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index 823cf5f5196b3..7150e5c23604d 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -237,16 +237,13 @@ EXPORT_SYMBOL_GPL(mpi_get_buffer); * @a: a multi precision integer * @sgl: scatterlist to write to. Needs to be at least * mpi_get_size(a) long. - * @nbytes: in/out param - it has the be set to the maximum number of - * bytes that can be written to sgl. This has to be at least - * the size of the integer a. On return it receives the actual - * length of the data written on success or the data that would - * be written if buffer was too small. + * @nbytes: the number of bytes to write. Leading bytes will be + * filled with zero. * @sign: if not NULL, it will be set to the sign of a. * * Return: 0 on success or error code in case of error */ -int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes, +int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned nbytes, int *sign) { u8 *p, *p2; @@ -258,43 +255,44 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes, #error please implement for this limb size. #endif unsigned int n = mpi_get_size(a); - int i, x, y = 0, lzeros, buf_len; - - if (!nbytes) - return -EINVAL; + int i, x, buf_len; if (sign) *sign = a->sign; - lzeros = count_lzeros(a); - - if (*nbytes < n - lzeros) { - *nbytes = n - lzeros; + if (nbytes < n) return -EOVERFLOW; - } - *nbytes = n - lzeros; buf_len = sgl->length; p2 = sg_virt(sgl); - for (i = a->nlimbs - 1 - lzeros / BYTES_PER_MPI_LIMB, - lzeros %= BYTES_PER_MPI_LIMB; - i >= 0; i--) { + while (nbytes > n) { + if (!buf_len) { + sgl = sg_next(sgl); + if (!sgl) + return -EINVAL; + buf_len = sgl->length; + p2 = sg_virt(sgl); + } + + i = min_t(unsigned, nbytes - n, buf_len); + memset(p2, 0, i); + p2 += i; + buf_len -= i; + nbytes -= i; + } + + for (i = a->nlimbs - 1; i >= 0; i--) { #if BYTES_PER_MPI_LIMB == 4 - alimb = cpu_to_be32(a->d[i]); + alimb = a->d[i] ? cpu_to_be32(a->d[i]) : 0; #elif BYTES_PER_MPI_LIMB == 8 - alimb = cpu_to_be64(a->d[i]); + alimb = a->d[i] ? cpu_to_be64(a->d[i]) : 0; #else #error please implement for this limb size. #endif - if (lzeros) { - y = lzeros; - lzeros = 0; - } - - p = (u8 *)&alimb + y; + p = (u8 *)&alimb; - for (x = 0; x < sizeof(alimb) - y; x++) { + for (x = 0; x < sizeof(alimb); x++) { if (!buf_len) { sgl = sg_next(sgl); if (!sgl) @@ -305,7 +303,6 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes, *p2++ = *p++; buf_len--; } - y = 0; } return 0; } From 127827b9c295db35fa7e49d00ac5d14faeda9461 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 19:32:22 +0800 Subject: [PATCH 123/180] lib/mpi: Do not do sg_virt Currently the mpi SG helpers use sg_virt which is completely broken. It happens to work with normal kernel memory but will fail with anything that is not linearly mapped. This patch fixes this by using the SG iterator helpers. Signed-off-by: Herbert Xu --- lib/mpi/mpicoder.c | 86 +++++++++++++++++++++++++++------------------- 1 file changed, 50 insertions(+), 36 deletions(-) diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index 7150e5c23604d..c6272ae2015e7 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "mpi-internal.h" @@ -255,7 +256,9 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned nbytes, #error please implement for this limb size. #endif unsigned int n = mpi_get_size(a); + struct sg_mapping_iter miter; int i, x, buf_len; + int nents; if (sign) *sign = a->sign; @@ -263,23 +266,27 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned nbytes, if (nbytes < n) return -EOVERFLOW; - buf_len = sgl->length; - p2 = sg_virt(sgl); + nents = sg_nents_for_len(sgl, nbytes); + if (nents < 0) + return -EINVAL; - while (nbytes > n) { - if (!buf_len) { - sgl = sg_next(sgl); - if (!sgl) - return -EINVAL; - buf_len = sgl->length; - p2 = sg_virt(sgl); - } + sg_miter_start(&miter, sgl, nents, SG_MITER_ATOMIC | SG_MITER_TO_SG); + sg_miter_next(&miter); + buf_len = miter.length; + p2 = miter.addr; + while (nbytes > n) { i = min_t(unsigned, nbytes - n, buf_len); memset(p2, 0, i); p2 += i; - buf_len -= i; nbytes -= i; + + buf_len -= i; + if (!buf_len) { + sg_miter_next(&miter); + buf_len = miter.length; + p2 = miter.addr; + } } for (i = a->nlimbs - 1; i >= 0; i--) { @@ -293,17 +300,16 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned nbytes, p = (u8 *)&alimb; for (x = 0; x < sizeof(alimb); x++) { - if (!buf_len) { - sgl = sg_next(sgl); - if (!sgl) - return -EINVAL; - buf_len = sgl->length; - p2 = sg_virt(sgl); - } *p2++ = *p++; - buf_len--; + if (!--buf_len) { + sg_miter_next(&miter); + buf_len = miter.length; + p2 = miter.addr; + } } } + + sg_miter_stop(&miter); return 0; } EXPORT_SYMBOL_GPL(mpi_write_to_sgl); @@ -323,19 +329,23 @@ EXPORT_SYMBOL_GPL(mpi_write_to_sgl); */ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes) { - struct scatterlist *sg; - int x, i, j, z, lzeros, ents; + struct sg_mapping_iter miter; unsigned int nbits, nlimbs; + int x, j, z, lzeros, ents; + unsigned int len; + const u8 *buff; mpi_limb_t a; MPI val = NULL; - lzeros = 0; - ents = sg_nents(sgl); + ents = sg_nents_for_len(sgl, nbytes); + if (ents < 0) + return NULL; - for_each_sg(sgl, sg, ents, i) { - const u8 *buff = sg_virt(sg); - int len = sg->length; + sg_miter_start(&miter, sgl, ents, SG_MITER_ATOMIC | SG_MITER_FROM_SG); + lzeros = 0; + len = 0; + while (nbytes > 0) { while (len && !*buff) { lzeros++; len--; @@ -345,12 +355,14 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes) if (len && *buff) break; - ents--; + sg_miter_next(&miter); + buff = miter.addr; + len = miter.length; + nbytes -= lzeros; lzeros = 0; } - sgl = sg; nbytes -= lzeros; nbits = nbytes * 8; if (nbits > MAX_EXTERN_MPI_BITS) { @@ -359,8 +371,7 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes) } if (nbytes > 0) - nbits -= count_leading_zeros(*(u8 *)(sg_virt(sgl) + lzeros)) - - (BITS_PER_LONG - 8); + nbits -= count_leading_zeros(*buff) - (BITS_PER_LONG - 8); nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); val = mpi_alloc(nlimbs); @@ -379,21 +390,24 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes) z = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; z %= BYTES_PER_MPI_LIMB; - for_each_sg(sgl, sg, ents, i) { - const u8 *buffer = sg_virt(sg) + lzeros; - int len = sg->length - lzeros; - + for (;;) { for (x = 0; x < len; x++) { a <<= 8; - a |= *buffer++; + a |= *buff++; if (((z + x + 1) % BYTES_PER_MPI_LIMB) == 0) { val->d[j--] = a; a = 0; } } z += x; - lzeros = 0; + + if (!sg_miter_next(&miter)) + break; + + buff = miter.addr; + len = miter.length; } + return val; } EXPORT_SYMBOL_GPL(mpi_read_raw_from_sgl); From c0d20d22e0ad20718702ae98cf8b5c200271d6df Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 19:32:23 +0800 Subject: [PATCH 124/180] crypto: rsa-pkcs1pad - Require hash to be present The only user of rsa-pkcs1pad always uses the hash so there is no reason to support the case of not having a hash. This patch also changes the digest info lookup so that it is only done once during template instantiation rather than on each operation. Signed-off-by: Herbert Xu --- crypto/rsa-pkcs1pad.c | 83 ++++++++++++++++--------------------------- 1 file changed, 30 insertions(+), 53 deletions(-) diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index ead8dc0d084e7..5c1c78e21f841 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -92,13 +92,12 @@ static const struct rsa_asn1_template *rsa_lookup_asn1(const char *name) struct pkcs1pad_ctx { struct crypto_akcipher *child; - const char *hash_name; unsigned int key_size; }; struct pkcs1pad_inst_ctx { struct crypto_akcipher_spawn spawn; - const char *hash_name; + const struct rsa_asn1_template *digest_info; }; struct pkcs1pad_request { @@ -416,20 +415,16 @@ static int pkcs1pad_sign(struct akcipher_request *req) struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm); struct pkcs1pad_request *req_ctx = akcipher_request_ctx(req); - const struct rsa_asn1_template *digest_info = NULL; + struct akcipher_instance *inst = akcipher_alg_instance(tfm); + struct pkcs1pad_inst_ctx *ictx = akcipher_instance_ctx(inst); + const struct rsa_asn1_template *digest_info = ictx->digest_info; int err; unsigned int ps_end, digest_size = 0; if (!ctx->key_size) return -EINVAL; - if (ctx->hash_name) { - digest_info = rsa_lookup_asn1(ctx->hash_name); - if (!digest_info) - return -EINVAL; - - digest_size = digest_info->size; - } + digest_size = digest_info->size; if (req->src_len + digest_size > ctx->key_size - 11) return -EOVERFLOW; @@ -462,10 +457,8 @@ static int pkcs1pad_sign(struct akcipher_request *req) memset(req_ctx->in_buf + 1, 0xff, ps_end - 1); req_ctx->in_buf[ps_end] = 0x00; - if (digest_info) { - memcpy(req_ctx->in_buf + ps_end + 1, digest_info->data, - digest_info->size); - } + memcpy(req_ctx->in_buf + ps_end + 1, digest_info->data, + digest_info->size); pkcs1pad_sg_set_buf(req_ctx->in_sg, req_ctx->in_buf, ctx->key_size - 1 - req->src_len, req->src); @@ -499,7 +492,9 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err) struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm); struct pkcs1pad_request *req_ctx = akcipher_request_ctx(req); - const struct rsa_asn1_template *digest_info; + struct akcipher_instance *inst = akcipher_alg_instance(tfm); + struct pkcs1pad_inst_ctx *ictx = akcipher_instance_ctx(inst); + const struct rsa_asn1_template *digest_info = ictx->digest_info; unsigned int pos; if (err == -EOVERFLOW) @@ -527,17 +522,11 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err) goto done; pos++; - if (ctx->hash_name) { - digest_info = rsa_lookup_asn1(ctx->hash_name); - if (!digest_info) - goto done; + if (memcmp(req_ctx->out_buf + pos, digest_info->data, + digest_info->size)) + goto done; - if (memcmp(req_ctx->out_buf + pos, digest_info->data, - digest_info->size)) - goto done; - - pos += digest_info->size; - } + pos += digest_info->size; err = 0; @@ -626,12 +615,11 @@ static int pkcs1pad_init_tfm(struct crypto_akcipher *tfm) struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm); struct crypto_akcipher *child_tfm; - child_tfm = crypto_spawn_akcipher(akcipher_instance_ctx(inst)); + child_tfm = crypto_spawn_akcipher(&ictx->spawn); if (IS_ERR(child_tfm)) return PTR_ERR(child_tfm); ctx->child = child_tfm; - ctx->hash_name = ictx->hash_name; return 0; } @@ -648,12 +636,12 @@ static void pkcs1pad_free(struct akcipher_instance *inst) struct crypto_akcipher_spawn *spawn = &ctx->spawn; crypto_drop_akcipher(spawn); - kfree(ctx->hash_name); kfree(inst); } static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb) { + const struct rsa_asn1_template *digest_info; struct crypto_attr_type *algt; struct akcipher_instance *inst; struct pkcs1pad_inst_ctx *ctx; @@ -676,7 +664,11 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb) hash_name = crypto_attr_alg_name(tb[2]); if (IS_ERR(hash_name)) - hash_name = NULL; + return PTR_ERR(hash_name); + + digest_info = rsa_lookup_asn1(hash_name); + if (!digest_info) + return -EINVAL; inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); if (!inst) @@ -684,7 +676,7 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb) ctx = akcipher_instance_ctx(inst); spawn = &ctx->spawn; - ctx->hash_name = hash_name ? kstrdup(hash_name, GFP_KERNEL) : NULL; + ctx->digest_info = digest_info; crypto_set_spawn(&spawn->base, akcipher_crypto_instance(inst)); err = crypto_grab_akcipher(spawn, rsa_alg_name, 0, @@ -696,27 +688,14 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb) err = -ENAMETOOLONG; - if (!hash_name) { - if (snprintf(inst->alg.base.cra_name, - CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s)", - rsa_alg->base.cra_name) >= - CRYPTO_MAX_ALG_NAME || - snprintf(inst->alg.base.cra_driver_name, - CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s)", - rsa_alg->base.cra_driver_name) >= - CRYPTO_MAX_ALG_NAME) + if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, + "pkcs1pad(%s,%s)", rsa_alg->base.cra_name, hash_name) >= + CRYPTO_MAX_ALG_NAME || + snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, + "pkcs1pad(%s,%s)", + rsa_alg->base.cra_driver_name, hash_name) >= + CRYPTO_MAX_ALG_NAME) goto out_drop_alg; - } else { - if (snprintf(inst->alg.base.cra_name, - CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s,%s)", - rsa_alg->base.cra_name, hash_name) >= - CRYPTO_MAX_ALG_NAME || - snprintf(inst->alg.base.cra_driver_name, - CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s,%s)", - rsa_alg->base.cra_driver_name, hash_name) >= - CRYPTO_MAX_ALG_NAME) - goto out_free_hash; - } inst->alg.base.cra_flags = rsa_alg->base.cra_flags & CRYPTO_ALG_ASYNC; inst->alg.base.cra_priority = rsa_alg->base.cra_priority; @@ -738,12 +717,10 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb) err = akcipher_register_instance(tmpl, inst); if (err) - goto out_free_hash; + goto out_drop_alg; return 0; -out_free_hash: - kfree(ctx->hash_name); out_drop_alg: crypto_drop_akcipher(spawn); out_free_inst: From 0f2c83190bc6756021e35a91c1f282ecae3f0e87 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 19:32:24 +0800 Subject: [PATCH 125/180] crypto: rsa-pkcs1pad - Remove bogus page splitting The helper pkcs1pad_sg_set_buf tries to split a buffer that crosses a page boundary into two SG entries. This is unnecessary. This patch removes that. Signed-off-by: Herbert Xu --- crypto/rsa-pkcs1pad.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 5c1c78e21f841..d9baefb7d5d1a 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -103,7 +103,7 @@ struct pkcs1pad_inst_ctx { struct pkcs1pad_request { struct akcipher_request child_req; - struct scatterlist in_sg[3], out_sg[2]; + struct scatterlist in_sg[2], out_sg[1]; uint8_t *in_buf, *out_buf; }; @@ -163,19 +163,10 @@ static int pkcs1pad_get_max_size(struct crypto_akcipher *tfm) static void pkcs1pad_sg_set_buf(struct scatterlist *sg, void *buf, size_t len, struct scatterlist *next) { - int nsegs = next ? 1 : 0; - - if (offset_in_page(buf) + len <= PAGE_SIZE) { - nsegs += 1; - sg_init_table(sg, nsegs); - sg_set_buf(sg, buf, len); - } else { - nsegs += 2; - sg_init_table(sg, nsegs); - sg_set_buf(sg + 0, buf, PAGE_SIZE - offset_in_page(buf)); - sg_set_buf(sg + 1, buf + PAGE_SIZE - offset_in_page(buf), - offset_in_page(buf) + len - PAGE_SIZE); - } + int nsegs = next ? 2 : 1; + + sg_init_table(sg, nsegs); + sg_set_buf(sg, buf, len); if (next) sg_chain(sg, nsegs, next); From 3a32ce507a942962f81cae8abec9ef0d0647d127 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 19:32:26 +0800 Subject: [PATCH 126/180] crypto: rsa-pkcs1pad - Always use GFP_KERNEL We don't currently support using akcipher in atomic contexts, so GFP_KERNEL should always be used. Signed-off-by: Herbert Xu --- crypto/rsa-pkcs1pad.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index d9baefb7d5d1a..db19284f2e2be 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -260,8 +260,7 @@ static int pkcs1pad_encrypt(struct akcipher_request *req) req_ctx->child_req.dst_len = ctx->key_size; req_ctx->in_buf = kmalloc(ctx->key_size - 1 - req->src_len, - (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC); + GFP_KERNEL); if (!req_ctx->in_buf) return -ENOMEM; @@ -274,9 +273,7 @@ static int pkcs1pad_encrypt(struct akcipher_request *req) pkcs1pad_sg_set_buf(req_ctx->in_sg, req_ctx->in_buf, ctx->key_size - 1 - req->src_len, req->src); - req_ctx->out_buf = kmalloc(ctx->key_size, - (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC); + req_ctx->out_buf = kmalloc(ctx->key_size, GFP_KERNEL); if (!req_ctx->out_buf) { kfree(req_ctx->in_buf); return -ENOMEM; @@ -379,9 +376,7 @@ static int pkcs1pad_decrypt(struct akcipher_request *req) req_ctx->child_req.dst = req_ctx->out_sg; req_ctx->child_req.dst_len = ctx->key_size ; - req_ctx->out_buf = kmalloc(ctx->key_size, - (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC); + req_ctx->out_buf = kmalloc(ctx->key_size, GFP_KERNEL); if (!req_ctx->out_buf) return -ENOMEM; @@ -438,8 +433,7 @@ static int pkcs1pad_sign(struct akcipher_request *req) req_ctx->child_req.dst_len = ctx->key_size; req_ctx->in_buf = kmalloc(ctx->key_size - 1 - req->src_len, - (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC); + GFP_KERNEL); if (!req_ctx->in_buf) return -ENOMEM; @@ -454,9 +448,7 @@ static int pkcs1pad_sign(struct akcipher_request *req) pkcs1pad_sg_set_buf(req_ctx->in_sg, req_ctx->in_buf, ctx->key_size - 1 - req->src_len, req->src); - req_ctx->out_buf = kmalloc(ctx->key_size, - (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC); + req_ctx->out_buf = kmalloc(ctx->key_size, GFP_KERNEL); if (!req_ctx->out_buf) { kfree(req_ctx->in_buf); return -ENOMEM; @@ -577,9 +569,7 @@ static int pkcs1pad_verify(struct akcipher_request *req) req_ctx->child_req.dst = req_ctx->out_sg; req_ctx->child_req.dst_len = ctx->key_size; - req_ctx->out_buf = kmalloc(ctx->key_size, - (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC); + req_ctx->out_buf = kmalloc(ctx->key_size, GFP_KERNEL); if (!req_ctx->out_buf) return -ENOMEM; From 73f79189603b3cd8af1ba1ad2d71f1b3f0aa796d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 19:32:27 +0800 Subject: [PATCH 127/180] crypto: rsa-pkcs1pad - Move key size check to setkey Rather than repeatedly checking the key size on each operation, we should be checking it once when the key is set. Signed-off-by: Herbert Xu --- crypto/rsa-pkcs1pad.c | 56 ++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 30 deletions(-) diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index db19284f2e2be..ebd851474a9db 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -111,40 +111,48 @@ static int pkcs1pad_set_pub_key(struct crypto_akcipher *tfm, const void *key, unsigned int keylen) { struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm); - int err, size; + int err; + + ctx->key_size = 0; err = crypto_akcipher_set_pub_key(ctx->child, key, keylen); + if (err) + return err; - if (!err) { - /* Find out new modulus size from rsa implementation */ - size = crypto_akcipher_maxsize(ctx->child); + /* Find out new modulus size from rsa implementation */ + err = crypto_akcipher_maxsize(ctx->child); + if (err < 0) + return err; - ctx->key_size = size > 0 ? size : 0; - if (size <= 0) - err = size; - } + if (err > PAGE_SIZE) + return -ENOTSUPP; - return err; + ctx->key_size = err; + return 0; } static int pkcs1pad_set_priv_key(struct crypto_akcipher *tfm, const void *key, unsigned int keylen) { struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm); - int err, size; + int err; + + ctx->key_size = 0; err = crypto_akcipher_set_priv_key(ctx->child, key, keylen); + if (err) + return err; - if (!err) { - /* Find out new modulus size from rsa implementation */ - size = crypto_akcipher_maxsize(ctx->child); + /* Find out new modulus size from rsa implementation */ + err = crypto_akcipher_maxsize(ctx->child); + if (err < 0) + return err; - ctx->key_size = size > 0 ? size : 0; - if (size <= 0) - err = size; - } + if (err > PAGE_SIZE) + return -ENOTSUPP; - return err; + ctx->key_size = err; + return 0; } static int pkcs1pad_get_max_size(struct crypto_akcipher *tfm) @@ -247,9 +255,6 @@ static int pkcs1pad_encrypt(struct akcipher_request *req) return -EOVERFLOW; } - if (ctx->key_size > PAGE_SIZE) - return -ENOTSUPP; - /* * Replace both input and output to add the padding in the input and * the potential missing leading zeros in the output. @@ -367,9 +372,6 @@ static int pkcs1pad_decrypt(struct akcipher_request *req) if (!ctx->key_size || req->src_len != ctx->key_size) return -EINVAL; - if (ctx->key_size > PAGE_SIZE) - return -ENOTSUPP; - /* Reuse input buffer, output to a new buffer */ req_ctx->child_req.src = req->src; req_ctx->child_req.src_len = req->src_len; @@ -420,9 +422,6 @@ static int pkcs1pad_sign(struct akcipher_request *req) return -EOVERFLOW; } - if (ctx->key_size > PAGE_SIZE) - return -ENOTSUPP; - /* * Replace both input and output to add the padding in the input and * the potential missing leading zeros in the output. @@ -560,9 +559,6 @@ static int pkcs1pad_verify(struct akcipher_request *req) if (!ctx->key_size || req->src_len < ctx->key_size) return -EINVAL; - if (ctx->key_size > PAGE_SIZE) - return -ENOTSUPP; - /* Reuse input buffer, output to a new buffer */ req_ctx->child_req.src = req->src; req_ctx->child_req.src_len = req->src_len; From d858b0713849be51406fe84722d0877fb57d201c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 19:32:28 +0800 Subject: [PATCH 128/180] crypto: rsa-pkcs1pad - Avoid copying output when possible In the vast majority of cases (2^-32 on 32-bit and 2^-64 on 64-bit) cases, the result from encryption/signing will require no padding. This patch makes these two operations write their output directly to the final destination. Only in the exceedingly rare cases where fixup is needed to we copy it out and back to add the leading zeroes. This patch also makes use of the crypto_akcipher_set_crypt API instead of writing the akcipher request directly. Signed-off-by: Herbert Xu --- crypto/rsa-pkcs1pad.c | 112 +++++++++++++++++------------------------- 1 file changed, 45 insertions(+), 67 deletions(-) diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index ebd851474a9db..8ccfdd7c926eb 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -185,37 +185,36 @@ static int pkcs1pad_encrypt_sign_complete(struct akcipher_request *req, int err) struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm); struct pkcs1pad_request *req_ctx = akcipher_request_ctx(req); - size_t pad_len = ctx->key_size - req_ctx->child_req.dst_len; - size_t chunk_len, pad_left; - struct sg_mapping_iter miter; - - if (!err) { - if (pad_len) { - sg_miter_start(&miter, req->dst, - sg_nents_for_len(req->dst, pad_len), - SG_MITER_ATOMIC | SG_MITER_TO_SG); - - pad_left = pad_len; - while (pad_left) { - sg_miter_next(&miter); - - chunk_len = min(miter.length, pad_left); - memset(miter.addr, 0, chunk_len); - pad_left -= chunk_len; - } - - sg_miter_stop(&miter); - } - - sg_pcopy_from_buffer(req->dst, - sg_nents_for_len(req->dst, ctx->key_size), - req_ctx->out_buf, req_ctx->child_req.dst_len, - pad_len); - } + unsigned int pad_len; + unsigned int len; + u8 *out_buf; + + if (err) + goto out; + + len = req_ctx->child_req.dst_len; + pad_len = ctx->key_size - len; + + /* Four billion to one */ + if (likely(!pad_len)) + goto out; + + out_buf = kzalloc(ctx->key_size, GFP_ATOMIC); + err = -ENOMEM; + if (!out_buf) + goto out; + + sg_copy_to_buffer(req->dst, sg_nents_for_len(req->dst, len), + out_buf + pad_len, len); + sg_copy_from_buffer(req->dst, + sg_nents_for_len(req->dst, ctx->key_size), + out_buf, ctx->key_size); + kzfree(out_buf); + +out: req->dst_len = ctx->key_size; kfree(req_ctx->in_buf); - kzfree(req_ctx->out_buf); return err; } @@ -255,15 +254,6 @@ static int pkcs1pad_encrypt(struct akcipher_request *req) return -EOVERFLOW; } - /* - * Replace both input and output to add the padding in the input and - * the potential missing leading zeros in the output. - */ - req_ctx->child_req.src = req_ctx->in_sg; - req_ctx->child_req.src_len = ctx->key_size - 1; - req_ctx->child_req.dst = req_ctx->out_sg; - req_ctx->child_req.dst_len = ctx->key_size; - req_ctx->in_buf = kmalloc(ctx->key_size - 1 - req->src_len, GFP_KERNEL); if (!req_ctx->in_buf) @@ -291,6 +281,10 @@ static int pkcs1pad_encrypt(struct akcipher_request *req) akcipher_request_set_callback(&req_ctx->child_req, req->base.flags, pkcs1pad_encrypt_sign_complete_cb, req); + /* Reuse output buffer */ + akcipher_request_set_crypt(&req_ctx->child_req, req_ctx->in_sg, + req->dst, ctx->key_size - 1, req->dst_len); + err = crypto_akcipher_encrypt(&req_ctx->child_req); if (err != -EINPROGRESS && (err != -EBUSY || @@ -372,12 +366,6 @@ static int pkcs1pad_decrypt(struct akcipher_request *req) if (!ctx->key_size || req->src_len != ctx->key_size) return -EINVAL; - /* Reuse input buffer, output to a new buffer */ - req_ctx->child_req.src = req->src; - req_ctx->child_req.src_len = req->src_len; - req_ctx->child_req.dst = req_ctx->out_sg; - req_ctx->child_req.dst_len = ctx->key_size ; - req_ctx->out_buf = kmalloc(ctx->key_size, GFP_KERNEL); if (!req_ctx->out_buf) return -ENOMEM; @@ -389,6 +377,11 @@ static int pkcs1pad_decrypt(struct akcipher_request *req) akcipher_request_set_callback(&req_ctx->child_req, req->base.flags, pkcs1pad_decrypt_complete_cb, req); + /* Reuse input buffer, output to a new buffer */ + akcipher_request_set_crypt(&req_ctx->child_req, req->src, + req_ctx->out_sg, req->src_len, + ctx->key_size); + err = crypto_akcipher_decrypt(&req_ctx->child_req); if (err != -EINPROGRESS && (err != -EBUSY || @@ -422,15 +415,6 @@ static int pkcs1pad_sign(struct akcipher_request *req) return -EOVERFLOW; } - /* - * Replace both input and output to add the padding in the input and - * the potential missing leading zeros in the output. - */ - req_ctx->child_req.src = req_ctx->in_sg; - req_ctx->child_req.src_len = ctx->key_size - 1; - req_ctx->child_req.dst = req_ctx->out_sg; - req_ctx->child_req.dst_len = ctx->key_size; - req_ctx->in_buf = kmalloc(ctx->key_size - 1 - req->src_len, GFP_KERNEL); if (!req_ctx->in_buf) @@ -447,19 +431,14 @@ static int pkcs1pad_sign(struct akcipher_request *req) pkcs1pad_sg_set_buf(req_ctx->in_sg, req_ctx->in_buf, ctx->key_size - 1 - req->src_len, req->src); - req_ctx->out_buf = kmalloc(ctx->key_size, GFP_KERNEL); - if (!req_ctx->out_buf) { - kfree(req_ctx->in_buf); - return -ENOMEM; - } - - pkcs1pad_sg_set_buf(req_ctx->out_sg, req_ctx->out_buf, - ctx->key_size, NULL); - akcipher_request_set_tfm(&req_ctx->child_req, ctx->child); akcipher_request_set_callback(&req_ctx->child_req, req->base.flags, pkcs1pad_encrypt_sign_complete_cb, req); + /* Reuse output buffer */ + akcipher_request_set_crypt(&req_ctx->child_req, req_ctx->in_sg, + req->dst, ctx->key_size - 1, req->dst_len); + err = crypto_akcipher_sign(&req_ctx->child_req); if (err != -EINPROGRESS && (err != -EBUSY || @@ -559,12 +538,6 @@ static int pkcs1pad_verify(struct akcipher_request *req) if (!ctx->key_size || req->src_len < ctx->key_size) return -EINVAL; - /* Reuse input buffer, output to a new buffer */ - req_ctx->child_req.src = req->src; - req_ctx->child_req.src_len = req->src_len; - req_ctx->child_req.dst = req_ctx->out_sg; - req_ctx->child_req.dst_len = ctx->key_size; - req_ctx->out_buf = kmalloc(ctx->key_size, GFP_KERNEL); if (!req_ctx->out_buf) return -ENOMEM; @@ -576,6 +549,11 @@ static int pkcs1pad_verify(struct akcipher_request *req) akcipher_request_set_callback(&req_ctx->child_req, req->base.flags, pkcs1pad_verify_complete_cb, req); + /* Reuse input buffer, output to a new buffer */ + akcipher_request_set_crypt(&req_ctx->child_req, req->src, + req_ctx->out_sg, req->src_len, + ctx->key_size); + err = crypto_akcipher_verify(&req_ctx->child_req); if (err != -EINPROGRESS && (err != -EBUSY || From d13cd11fbccb5239c8fb4c1e70e0ca2e811ba2c4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 30 Jun 2016 11:00:13 +0800 Subject: [PATCH 129/180] crypto: tcrypt - Do not bail on EINPROGRESS in multibuffer hash test The multibuffer hash speed test is incorrectly bailing because of an EINPROGRESS return value. This patch fixes it by setting ret to zero if it is equal to -EINPROGRESS. Reported-by: Megha Dey Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 11aedae02382d..202cfa10076c5 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -486,8 +486,10 @@ static void test_mb_ahash_speed(const char *algo, unsigned int sec, for (k = 0; k < 8; k++) { ret = crypto_ahash_digest(data[k].req); - if (ret == -EINPROGRESS) + if (ret == -EINPROGRESS) { + ret = 0; continue; + } if (ret) break; From eb3547859d73629c888825d6b928f2d0dba5af41 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Thu, 30 Jun 2016 14:04:11 -0500 Subject: [PATCH 130/180] crypto: omap-sham - increase cra_proirity to 400 The arm-neon-sha implementations have cra_priority of 150...300, so increase omap-sham priority to 400 to ensure it is on top of any software alg. Signed-off-by: Bin Liu Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index ae6f841f81ffa..7fe4eef12fe2f 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -1334,7 +1334,7 @@ static struct ahash_alg algs_sha1_md5[] = { .halg.base = { .cra_name = "sha1", .cra_driver_name = "omap-sha1", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | @@ -1357,7 +1357,7 @@ static struct ahash_alg algs_sha1_md5[] = { .halg.base = { .cra_name = "md5", .cra_driver_name = "omap-md5", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | @@ -1381,7 +1381,7 @@ static struct ahash_alg algs_sha1_md5[] = { .halg.base = { .cra_name = "hmac(sha1)", .cra_driver_name = "omap-hmac-sha1", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | @@ -1406,7 +1406,7 @@ static struct ahash_alg algs_sha1_md5[] = { .halg.base = { .cra_name = "hmac(md5)", .cra_driver_name = "omap-hmac-md5", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | @@ -1434,7 +1434,7 @@ static struct ahash_alg algs_sha224_sha256[] = { .halg.base = { .cra_name = "sha224", .cra_driver_name = "omap-sha224", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1456,7 +1456,7 @@ static struct ahash_alg algs_sha224_sha256[] = { .halg.base = { .cra_name = "sha256", .cra_driver_name = "omap-sha256", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1479,7 +1479,7 @@ static struct ahash_alg algs_sha224_sha256[] = { .halg.base = { .cra_name = "hmac(sha224)", .cra_driver_name = "omap-hmac-sha224", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1503,7 +1503,7 @@ static struct ahash_alg algs_sha224_sha256[] = { .halg.base = { .cra_name = "hmac(sha256)", .cra_driver_name = "omap-hmac-sha256", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1529,7 +1529,7 @@ static struct ahash_alg algs_sha384_sha512[] = { .halg.base = { .cra_name = "sha384", .cra_driver_name = "omap-sha384", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1551,7 +1551,7 @@ static struct ahash_alg algs_sha384_sha512[] = { .halg.base = { .cra_name = "sha512", .cra_driver_name = "omap-sha512", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1574,7 +1574,7 @@ static struct ahash_alg algs_sha384_sha512[] = { .halg.base = { .cra_name = "hmac(sha384)", .cra_driver_name = "omap-hmac-sha384", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1598,7 +1598,7 @@ static struct ahash_alg algs_sha384_sha512[] = { .halg.base = { .cra_name = "hmac(sha512)", .cra_driver_name = "omap-hmac-sha512", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, From 98eca72fa04a9bbf28dba95efaec5aa95588fe23 Mon Sep 17 00:00:00 2001 From: raveendra padasalagi Date: Fri, 1 Jul 2016 11:16:54 +0530 Subject: [PATCH 131/180] crypto: sha3 - Add HMAC-SHA3 test modes and test vectors This patch adds HMAC-SHA3 test modes in tcrypt module and related test vectors. Signed-off-by: Raveendra Padasalagi Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 16 ++ crypto/testmgr.c | 40 +++++ crypto/testmgr.h | 388 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 444 insertions(+) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 202cfa10076c5..ae22f05d5936c 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1313,6 +1313,22 @@ static int do_test(const char *alg, u32 type, u32 mask, int m) ret += tcrypt_test("hmac(crc32)"); break; + case 111: + ret += tcrypt_test("hmac(sha3-224)"); + break; + + case 112: + ret += tcrypt_test("hmac(sha3-256)"); + break; + + case 113: + ret += tcrypt_test("hmac(sha3-384)"); + break; + + case 114: + ret += tcrypt_test("hmac(sha3-512)"); + break; + case 150: ret += tcrypt_test("ansi_cprng"); break; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 38e23be315a0b..8ea0d3fcb580a 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -3399,6 +3399,46 @@ static const struct alg_test_desc alg_test_descs[] = { .count = HMAC_SHA256_TEST_VECTORS } } + }, { + .alg = "hmac(sha3-224)", + .test = alg_test_hash, + .fips_allowed = 1, + .suite = { + .hash = { + .vecs = hmac_sha3_224_tv_template, + .count = HMAC_SHA3_224_TEST_VECTORS + } + } + }, { + .alg = "hmac(sha3-256)", + .test = alg_test_hash, + .fips_allowed = 1, + .suite = { + .hash = { + .vecs = hmac_sha3_256_tv_template, + .count = HMAC_SHA3_256_TEST_VECTORS + } + } + }, { + .alg = "hmac(sha3-384)", + .test = alg_test_hash, + .fips_allowed = 1, + .suite = { + .hash = { + .vecs = hmac_sha3_384_tv_template, + .count = HMAC_SHA3_384_TEST_VECTORS + } + } + }, { + .alg = "hmac(sha3-512)", + .test = alg_test_hash, + .fips_allowed = 1, + .suite = { + .hash = { + .vecs = hmac_sha3_512_tv_template, + .count = HMAC_SHA3_512_TEST_VECTORS + } + } }, { .alg = "hmac(sha384)", .test = alg_test_hash, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 7358931b3082c..4ce2d866919d7 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -3694,6 +3694,394 @@ static struct hash_testvec hmac_sha512_tv_template[] = { }, }; +#define HMAC_SHA3_224_TEST_VECTORS 4 + +static struct hash_testvec hmac_sha3_224_tv_template[] = { + { + .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b\x0b", + .ksize = 20, + .plaintext = "Hi There", + .psize = 8, + .digest = "\x3b\x16\x54\x6b\xbc\x7b\xe2\x70" + "\x6a\x03\x1d\xca\xfd\x56\x37\x3d" + "\x98\x84\x36\x76\x41\xd8\xc5\x9a" + "\xf3\xc8\x60\xf7", + }, { + .key = "Jefe", + .ksize = 4, + .plaintext = "what do ya want for nothing?", + .psize = 28, + .digest = "\x7f\xdb\x8d\xd8\x8b\xd2\xf6\x0d" + "\x1b\x79\x86\x34\xad\x38\x68\x11" + "\xc2\xcf\xc8\x5b\xfa\xf5\xd5\x2b" + "\xba\xce\x5e\x66", + .np = 4, + .tap = { 7, 7, 7, 7 } + }, { + .key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + .ksize = 131, + .plaintext = "Test Using Large" + "r Than Block-Siz" + "e Key - Hash Key" + " First", + .psize = 54, + .digest = "\xb4\xa1\xf0\x4c\x00\x28\x7a\x9b" + "\x7f\x60\x75\xb3\x13\xd2\x79\xb8" + "\x33\xbc\x8f\x75\x12\x43\x52\xd0" + "\x5f\xb9\x99\x5f", + }, { + .key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + .ksize = 131, + .plaintext = + "This is a test u" + "sing a larger th" + "an block-size ke" + "y and a larger t" + "han block-size d" + "ata. The key nee" + "ds to be hashed " + "before being use" + "d by the HMAC al" + "gorithm.", + .psize = 152, + .digest = "\x05\xd8\xcd\x6d\x00\xfa\xea\x8d" + "\x1e\xb6\x8a\xde\x28\x73\x0b\xbd" + "\x3c\xba\xb6\x92\x9f\x0a\x08\x6b" + "\x29\xcd\x62\xa0", + }, +}; + +#define HMAC_SHA3_256_TEST_VECTORS 4 + +static struct hash_testvec hmac_sha3_256_tv_template[] = { + { + .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b\x0b", + .ksize = 20, + .plaintext = "Hi There", + .psize = 8, + .digest = "\xba\x85\x19\x23\x10\xdf\xfa\x96" + "\xe2\xa3\xa4\x0e\x69\x77\x43\x51" + "\x14\x0b\xb7\x18\x5e\x12\x02\xcd" + "\xcc\x91\x75\x89\xf9\x5e\x16\xbb", + }, { + .key = "Jefe", + .ksize = 4, + .plaintext = "what do ya want for nothing?", + .psize = 28, + .digest = "\xc7\xd4\x07\x2e\x78\x88\x77\xae" + "\x35\x96\xbb\xb0\xda\x73\xb8\x87" + "\xc9\x17\x1f\x93\x09\x5b\x29\x4a" + "\xe8\x57\xfb\xe2\x64\x5e\x1b\xa5", + .np = 4, + .tap = { 7, 7, 7, 7 } + }, { + .key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + .ksize = 131, + .plaintext = "Test Using Large" + "r Than Block-Siz" + "e Key - Hash Key" + " First", + .psize = 54, + .digest = "\xed\x73\xa3\x74\xb9\x6c\x00\x52" + "\x35\xf9\x48\x03\x2f\x09\x67\x4a" + "\x58\xc0\xce\x55\x5c\xfc\x1f\x22" + "\x3b\x02\x35\x65\x60\x31\x2c\x3b", + }, { + .key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + .ksize = 131, + .plaintext = + "This is a test u" + "sing a larger th" + "an block-size ke" + "y and a larger t" + "han block-size d" + "ata. The key nee" + "ds to be hashed " + "before being use" + "d by the HMAC al" + "gorithm.", + .psize = 152, + .digest = "\x65\xc5\xb0\x6d\x4c\x3d\xe3\x2a" + "\x7a\xef\x87\x63\x26\x1e\x49\xad" + "\xb6\xe2\x29\x3e\xc8\xe7\xc6\x1e" + "\x8d\xe6\x17\x01\xfc\x63\xe1\x23", + }, +}; + +#define HMAC_SHA3_384_TEST_VECTORS 4 + +static struct hash_testvec hmac_sha3_384_tv_template[] = { + { + .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b\x0b", + .ksize = 20, + .plaintext = "Hi There", + .psize = 8, + .digest = "\x68\xd2\xdc\xf7\xfd\x4d\xdd\x0a" + "\x22\x40\xc8\xa4\x37\x30\x5f\x61" + "\xfb\x73\x34\xcf\xb5\xd0\x22\x6e" + "\x1b\xc2\x7d\xc1\x0a\x2e\x72\x3a" + "\x20\xd3\x70\xb4\x77\x43\x13\x0e" + "\x26\xac\x7e\x3d\x53\x28\x86\xbd", + }, { + .key = "Jefe", + .ksize = 4, + .plaintext = "what do ya want for nothing?", + .psize = 28, + .digest = "\xf1\x10\x1f\x8c\xbf\x97\x66\xfd" + "\x67\x64\xd2\xed\x61\x90\x3f\x21" + "\xca\x9b\x18\xf5\x7c\xf3\xe1\xa2" + "\x3c\xa1\x35\x08\xa9\x32\x43\xce" + "\x48\xc0\x45\xdc\x00\x7f\x26\xa2" + "\x1b\x3f\x5e\x0e\x9d\xf4\xc2\x0a", + .np = 4, + .tap = { 7, 7, 7, 7 } + }, { + .key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + .ksize = 131, + .plaintext = "Test Using Large" + "r Than Block-Siz" + "e Key - Hash Key" + " First", + .psize = 54, + .digest = "\x0f\xc1\x95\x13\xbf\x6b\xd8\x78" + "\x03\x70\x16\x70\x6a\x0e\x57\xbc" + "\x52\x81\x39\x83\x6b\x9a\x42\xc3" + "\xd4\x19\xe4\x98\xe0\xe1\xfb\x96" + "\x16\xfd\x66\x91\x38\xd3\x3a\x11" + "\x05\xe0\x7c\x72\xb6\x95\x3b\xcc", + }, { + .key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + .ksize = 131, + .plaintext = + "This is a test u" + "sing a larger th" + "an block-size ke" + "y and a larger t" + "han block-size d" + "ata. The key nee" + "ds to be hashed " + "before being use" + "d by the HMAC al" + "gorithm.", + .psize = 152, + .digest = "\x02\x6f\xdf\x6b\x50\x74\x1e\x37" + "\x38\x99\xc9\xf7\xd5\x40\x6d\x4e" + "\xb0\x9f\xc6\x66\x56\x36\xfc\x1a" + "\x53\x00\x29\xdd\xf5\xcf\x3c\xa5" + "\xa9\x00\xed\xce\x01\xf5\xf6\x1e" + "\x2f\x40\x8c\xdf\x2f\xd3\xe7\xe8", + }, +}; + +#define HMAC_SHA3_512_TEST_VECTORS 4 + +static struct hash_testvec hmac_sha3_512_tv_template[] = { + { + .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b\x0b", + .ksize = 20, + .plaintext = "Hi There", + .psize = 8, + .digest = "\xeb\x3f\xbd\x4b\x2e\xaa\xb8\xf5" + "\xc5\x04\xbd\x3a\x41\x46\x5a\xac" + "\xec\x15\x77\x0a\x7c\xab\xac\x53" + "\x1e\x48\x2f\x86\x0b\x5e\xc7\xba" + "\x47\xcc\xb2\xc6\xf2\xaf\xce\x8f" + "\x88\xd2\x2b\x6d\xc6\x13\x80\xf2" + "\x3a\x66\x8f\xd3\x88\x8b\xb8\x05" + "\x37\xc0\xa0\xb8\x64\x07\x68\x9e", + }, { + .key = "Jefe", + .ksize = 4, + .plaintext = "what do ya want for nothing?", + .psize = 28, + .digest = "\x5a\x4b\xfe\xab\x61\x66\x42\x7c" + "\x7a\x36\x47\xb7\x47\x29\x2b\x83" + "\x84\x53\x7c\xdb\x89\xaf\xb3\xbf" + "\x56\x65\xe4\xc5\xe7\x09\x35\x0b" + "\x28\x7b\xae\xc9\x21\xfd\x7c\xa0" + "\xee\x7a\x0c\x31\xd0\x22\xa9\x5e" + "\x1f\xc9\x2b\xa9\xd7\x7d\xf8\x83" + "\x96\x02\x75\xbe\xb4\xe6\x20\x24", + .np = 4, + .tap = { 7, 7, 7, 7 } + }, { + .key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + .ksize = 131, + .plaintext = "Test Using Large" + "r Than Block-Siz" + "e Key - Hash Key" + " First", + .psize = 54, + .digest = "\x00\xf7\x51\xa9\xe5\x06\x95\xb0" + "\x90\xed\x69\x11\xa4\xb6\x55\x24" + "\x95\x1c\xdc\x15\xa7\x3a\x5d\x58" + "\xbb\x55\x21\x5e\xa2\xcd\x83\x9a" + "\xc7\x9d\x2b\x44\xa3\x9b\xaf\xab" + "\x27\xe8\x3f\xde\x9e\x11\xf6\x34" + "\x0b\x11\xd9\x91\xb1\xb9\x1b\xf2" + "\xee\xe7\xfc\x87\x24\x26\xc3\xa4", + }, { + .key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + .ksize = 131, + .plaintext = + "This is a test u" + "sing a larger th" + "an block-size ke" + "y and a larger t" + "han block-size d" + "ata. The key nee" + "ds to be hashed " + "before being use" + "d by the HMAC al" + "gorithm.", + .psize = 152, + .digest = "\x38\xa4\x56\xa0\x04\xbd\x10\xd3" + "\x2c\x9a\xb8\x33\x66\x84\x11\x28" + "\x62\xc3\xdb\x61\xad\xcc\xa3\x18" + "\x29\x35\x5e\xaf\x46\xfd\x5c\x73" + "\xd0\x6a\x1f\x0d\x13\xfe\xc9\xa6" + "\x52\xfb\x38\x11\xb5\x77\xb1\xb1" + "\xd1\xb9\x78\x9f\x97\xae\x5b\x83" + "\xc6\xf4\x4d\xfc\xf1\xd6\x7e\xba", + }, +}; + /* * Poly1305 test vectors from RFC7539 A.3. */ From 27710b8ea3defcbd7d340dbd0423d911b4eb7c4f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 3 Jul 2016 10:46:11 +0800 Subject: [PATCH 132/180] crypto: rsa-pkcs1pad - Fix regression from leading zeros As the software RSA implementation now produces fixed-length output, we need to eliminate leading zeros in the calling code instead. This patch does just that for pkcs1pad signature verification. Fixes: 9b45b7bba3d2 ("crypto: rsa - Generate fixed-length output") Reported-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/rsa-pkcs1pad.c | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 8ccfdd7c926eb..880d3db57a252 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -456,49 +456,55 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err) struct akcipher_instance *inst = akcipher_alg_instance(tfm); struct pkcs1pad_inst_ctx *ictx = akcipher_instance_ctx(inst); const struct rsa_asn1_template *digest_info = ictx->digest_info; + unsigned int dst_len; unsigned int pos; - - if (err == -EOVERFLOW) - /* Decrypted value had no leading 0 byte */ - err = -EINVAL; + u8 *out_buf; if (err) goto done; - if (req_ctx->child_req.dst_len != ctx->key_size - 1) { - err = -EINVAL; + err = -EINVAL; + dst_len = req_ctx->child_req.dst_len; + if (dst_len < ctx->key_size - 1) goto done; + + out_buf = req_ctx->out_buf; + if (dst_len == ctx->key_size) { + if (out_buf[0] != 0x00) + /* Decrypted value had no leading 0 byte */ + goto done; + + dst_len--; + out_buf++; } err = -EBADMSG; - if (req_ctx->out_buf[0] != 0x01) + if (out_buf[0] != 0x01) goto done; - for (pos = 1; pos < req_ctx->child_req.dst_len; pos++) - if (req_ctx->out_buf[pos] != 0xff) + for (pos = 1; pos < dst_len; pos++) + if (out_buf[pos] != 0xff) break; - if (pos < 9 || pos == req_ctx->child_req.dst_len || - req_ctx->out_buf[pos] != 0x00) + if (pos < 9 || pos == dst_len || out_buf[pos] != 0x00) goto done; pos++; - if (memcmp(req_ctx->out_buf + pos, digest_info->data, - digest_info->size)) + if (memcmp(out_buf + pos, digest_info->data, digest_info->size)) goto done; pos += digest_info->size; err = 0; - if (req->dst_len < req_ctx->child_req.dst_len - pos) + if (req->dst_len < dst_len - pos) err = -EOVERFLOW; - req->dst_len = req_ctx->child_req.dst_len - pos; + req->dst_len = dst_len - pos; if (!err) sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst, req->dst_len), - req_ctx->out_buf + pos, req->dst_len); + out_buf + pos, req->dst_len); done: kzfree(req_ctx->out_buf); From 151f25112ff7befc134ed3fc58b0ff8792b3169e Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 1 Jul 2016 08:19:44 +1000 Subject: [PATCH 133/180] powerpc: define FUNC_START/FUNC_END gcc provides FUNC_START/FUNC_END macros to help with creating assembly functions. Mirror these in the kernel so we can more easily share code between userspace and the kernel. FUNC_END is just a stub since we don't currently annotate the end of kernel functions. It might make sense to do a wholesale search and replace, but for now just create a couple of defines. Signed-off-by: Anton Blanchard Signed-off-by: Herbert Xu --- arch/powerpc/include/asm/ppc_asm.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 2b31632376a5b..051af612a7e14 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -286,6 +286,9 @@ GLUE(.,name): #endif +#define FUNC_START(name) _GLOBAL(name) +#define FUNC_END(name) + /* * LOAD_REG_IMMEDIATE(rn, expr) * Loads the value of the constant expression 'expr' into register 'rn' From 6dd7a82cc54ebd2936763befd3dcd4beb727a704 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 1 Jul 2016 08:19:45 +1000 Subject: [PATCH 134/180] crypto: powerpc - Add POWER8 optimised crc32c Use the vector polynomial multiply-sum instructions in POWER8 to speed up crc32c. This is just over 41x faster than the slice-by-8 method that it replaces. Measurements on a 4.1 GHz POWER8 show it sustaining 52 GiB/sec. A simple btrfs write performance test: dd if=/dev/zero of=/mnt/tmpfile bs=1M count=4096 sync is over 3.7x faster. Signed-off-by: Anton Blanchard Signed-off-by: Herbert Xu --- arch/powerpc/crypto/Makefile | 2 + arch/powerpc/crypto/crc32c-vpmsum_asm.S | 1553 ++++++++++++++++++++++ arch/powerpc/crypto/crc32c-vpmsum_glue.c | 167 +++ arch/powerpc/include/asm/ppc-opcode.h | 12 + crypto/Kconfig | 11 + 5 files changed, 1745 insertions(+) create mode 100644 arch/powerpc/crypto/crc32c-vpmsum_asm.S create mode 100644 arch/powerpc/crypto/crc32c-vpmsum_glue.c diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile index 9c221b69c1818..7998c177f0a2d 100644 --- a/arch/powerpc/crypto/Makefile +++ b/arch/powerpc/crypto/Makefile @@ -9,9 +9,11 @@ obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.o obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o +obj-$(CONFIG_CRYPT_CRC32C_VPMSUM) += crc32c-vpmsum.o aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o md5-ppc-y := md5-asm.o md5-glue.o sha1-powerpc-y := sha1-powerpc-asm.o sha1.o sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o +crc32c-vpmsum-y := crc32c-vpmsum_asm.o crc32c-vpmsum_glue.o diff --git a/arch/powerpc/crypto/crc32c-vpmsum_asm.S b/arch/powerpc/crypto/crc32c-vpmsum_asm.S new file mode 100644 index 0000000000000..dc640b212299c --- /dev/null +++ b/arch/powerpc/crypto/crc32c-vpmsum_asm.S @@ -0,0 +1,1553 @@ +/* + * Calculate the checksum of data that is 16 byte aligned and a multiple of + * 16 bytes. + * + * The first step is to reduce it to 1024 bits. We do this in 8 parallel + * chunks in order to mask the latency of the vpmsum instructions. If we + * have more than 32 kB of data to checksum we repeat this step multiple + * times, passing in the previous 1024 bits. + * + * The next step is to reduce the 1024 bits to 64 bits. This step adds + * 32 bits of 0s to the end - this matches what a CRC does. We just + * calculate constants that land the data in this 32 bits. + * + * We then use fixed point Barrett reduction to compute a mod n over GF(2) + * for n = CRC using POWER8 instructions. We use x = 32. + * + * http://en.wikipedia.org/wiki/Barrett_reduction + * + * Copyright (C) 2015 Anton Blanchard , IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include + + .section .rodata +.balign 16 + +.byteswap_constant: + /* byte reverse permute constant */ + .octa 0x0F0E0D0C0B0A09080706050403020100 + +#define MAX_SIZE 32768 +.constants: + + /* Reduce 262144 kbits to 1024 bits */ + /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */ + .octa 0x00000000b6ca9e20000000009c37c408 + + /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */ + .octa 0x00000000350249a800000001b51df26c + + /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */ + .octa 0x00000001862dac54000000000724b9d0 + + /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */ + .octa 0x00000001d87fb48c00000001c00532fe + + /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */ + .octa 0x00000001f39b699e00000000f05a9362 + + /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */ + .octa 0x0000000101da11b400000001e1007970 + + /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */ + .octa 0x00000001cab571e000000000a57366ee + + /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */ + .octa 0x00000000c7020cfe0000000192011284 + + /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */ + .octa 0x00000000cdaed1ae0000000162716d9a + + /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */ + .octa 0x00000001e804effc00000000cd97ecde + + /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */ + .octa 0x0000000077c3ea3a0000000058812bc0 + + /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */ + .octa 0x0000000068df31b40000000088b8c12e + + /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */ + .octa 0x00000000b059b6c200000001230b234c + + /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */ + .octa 0x0000000145fb8ed800000001120b416e + + /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */ + .octa 0x00000000cbc0916800000001974aecb0 + + /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */ + .octa 0x000000005ceeedc2000000008ee3f226 + + /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */ + .octa 0x0000000047d74e8600000001089aba9a + + /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */ + .octa 0x00000001407e9e220000000065113872 + + /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */ + .octa 0x00000001da967bda000000005c07ec10 + + /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */ + .octa 0x000000006c8983680000000187590924 + + /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */ + .octa 0x00000000f2d14c9800000000e35da7c6 + + /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */ + .octa 0x00000001993c6ad4000000000415855a + + /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */ + .octa 0x000000014683d1ac0000000073617758 + + /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */ + .octa 0x00000001a7c93e6c0000000176021d28 + + /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */ + .octa 0x000000010211e90a00000001c358fd0a + + /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */ + .octa 0x000000001119403e00000001ff7a2c18 + + /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */ + .octa 0x000000001c3261aa00000000f2d9f7e4 + + /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */ + .octa 0x000000014e37a634000000016cf1f9c8 + + /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */ + .octa 0x0000000073786c0c000000010af9279a + + /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */ + .octa 0x000000011dc037f80000000004f101e8 + + /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */ + .octa 0x0000000031433dfc0000000070bcf184 + + /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */ + .octa 0x000000009cde8348000000000a8de642 + + /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */ + .octa 0x0000000038d3c2a60000000062ea130c + + /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */ + .octa 0x000000011b25f26000000001eb31cbb2 + + /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */ + .octa 0x000000001629e6f00000000170783448 + + /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */ + .octa 0x0000000160838b4c00000001a684b4c6 + + /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */ + .octa 0x000000007a44011c00000000253ca5b4 + + /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */ + .octa 0x00000000226f417a0000000057b4b1e2 + + /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */ + .octa 0x0000000045eb2eb400000000b6bd084c + + /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */ + .octa 0x000000014459d70c0000000123c2d592 + + /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */ + .octa 0x00000001d406ed8200000000159dafce + + /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */ + .octa 0x0000000160c8e1a80000000127e1a64e + + /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */ + .octa 0x0000000027ba80980000000056860754 + + /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */ + .octa 0x000000006d92d01800000001e661aae8 + + /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */ + .octa 0x000000012ed7e3f200000000f82c6166 + + /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */ + .octa 0x000000002dc8778800000000c4f9c7ae + + /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */ + .octa 0x0000000018240bb80000000074203d20 + + /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */ + .octa 0x000000001ad381580000000198173052 + + /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */ + .octa 0x00000001396b78f200000001ce8aba54 + + /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */ + .octa 0x000000011a68133400000001850d5d94 + + /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */ + .octa 0x000000012104732e00000001d609239c + + /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */ + .octa 0x00000000a140d90c000000001595f048 + + /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */ + .octa 0x00000001b7215eda0000000042ccee08 + + /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */ + .octa 0x00000001aaf1df3c000000010a389d74 + + /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */ + .octa 0x0000000029d15b8a000000012a840da6 + + /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */ + .octa 0x00000000f1a96922000000001d181c0c + + /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */ + .octa 0x00000001ac80d03c0000000068b7d1f6 + + /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */ + .octa 0x000000000f11d56a000000005b0f14fc + + /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */ + .octa 0x00000001f1c022a20000000179e9e730 + + /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */ + .octa 0x0000000173d00ae200000001ce1368d6 + + /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */ + .octa 0x00000001d4ffe4ac0000000112c3a84c + + /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */ + .octa 0x000000016edc5ae400000000de940fee + + /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */ + .octa 0x00000001f1a0214000000000fe896b7e + + /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */ + .octa 0x00000000ca0b28a000000001f797431c + + /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */ + .octa 0x00000001928e30a20000000053e989ba + + /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */ + .octa 0x0000000097b1b002000000003920cd16 + + /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */ + .octa 0x00000000b15bf90600000001e6f579b8 + + /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */ + .octa 0x00000000411c5d52000000007493cb0a + + /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */ + .octa 0x00000001c36f330000000001bdd376d8 + + /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */ + .octa 0x00000001119227e0000000016badfee6 + + /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */ + .octa 0x00000000114d47020000000071de5c58 + + /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */ + .octa 0x00000000458b5b9800000000453f317c + + /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */ + .octa 0x000000012e31fb8e0000000121675cce + + /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */ + .octa 0x000000005cf619d800000001f409ee92 + + /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */ + .octa 0x0000000063f4d8b200000000f36b9c88 + + /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */ + .octa 0x000000004138dc8a0000000036b398f4 + + /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */ + .octa 0x00000001d29ee8e000000001748f9adc + + /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */ + .octa 0x000000006a08ace800000001be94ec00 + + /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */ + .octa 0x0000000127d4201000000000b74370d6 + + /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */ + .octa 0x0000000019d76b6200000001174d0b98 + + /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */ + .octa 0x00000001b1471f6e00000000befc06a4 + + /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */ + .octa 0x00000001f64c19cc00000001ae125288 + + /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */ + .octa 0x00000000003c0ea00000000095c19b34 + + /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */ + .octa 0x000000014d73abf600000001a78496f2 + + /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */ + .octa 0x00000001620eb84400000001ac5390a0 + + /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */ + .octa 0x0000000147655048000000002a80ed6e + + /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */ + .octa 0x0000000067b5077e00000001fa9b0128 + + /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */ + .octa 0x0000000010ffe20600000001ea94929e + + /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */ + .octa 0x000000000fee8f1e0000000125f4305c + + /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */ + .octa 0x00000001da26fbae00000001471e2002 + + /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */ + .octa 0x00000001b3a8bd880000000132d2253a + + /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */ + .octa 0x00000000e8f3898e00000000f26b3592 + + /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */ + .octa 0x00000000b0d0d28c00000000bc8b67b0 + + /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */ + .octa 0x0000000030f2a798000000013a826ef2 + + /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */ + .octa 0x000000000fba10020000000081482c84 + + /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */ + .octa 0x00000000bdb9bd7200000000e77307c2 + + /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */ + .octa 0x0000000075d3bf5a00000000d4a07ec8 + + /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */ + .octa 0x00000000ef1f98a00000000017102100 + + /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */ + .octa 0x00000000689c760200000000db406486 + + /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */ + .octa 0x000000016d5fa5fe0000000192db7f88 + + /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */ + .octa 0x00000001d0d2b9ca000000018bf67b1e + + /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */ + .octa 0x0000000041e7b470000000007c09163e + + /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */ + .octa 0x00000001cbb6495e000000000adac060 + + /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */ + .octa 0x000000010052a0b000000000bd8316ae + + /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */ + .octa 0x00000001d8effb5c000000019f09ab54 + + /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */ + .octa 0x00000001d969853c0000000125155542 + + /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */ + .octa 0x00000000523ccce2000000018fdb5882 + + /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */ + .octa 0x000000001e2436bc00000000e794b3f4 + + /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */ + .octa 0x00000000ddd1c3a2000000016f9bb022 + + /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */ + .octa 0x0000000019fcfe3800000000290c9978 + + /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */ + .octa 0x00000001ce95db640000000083c0f350 + + /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */ + .octa 0x00000000af5828060000000173ea6628 + + /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */ + .octa 0x00000001006388f600000001c8b4e00a + + /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */ + .octa 0x0000000179eca00a00000000de95d6aa + + /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */ + .octa 0x0000000122410a6a000000010b7f7248 + + /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */ + .octa 0x000000004288e87c00000001326e3a06 + + /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */ + .octa 0x000000016c5490da00000000bb62c2e6 + + /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */ + .octa 0x00000000d1c71f6e0000000156a4b2c2 + + /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */ + .octa 0x00000001b4ce08a6000000011dfe763a + + /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */ + .octa 0x00000001466ba60c000000007bcca8e2 + + /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */ + .octa 0x00000001f6c488a40000000186118faa + + /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */ + .octa 0x000000013bfb06820000000111a65a88 + + /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */ + .octa 0x00000000690e9e54000000003565e1c4 + + /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */ + .octa 0x00000000281346b6000000012ed02a82 + + /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */ + .octa 0x000000015646402400000000c486ecfc + + /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */ + .octa 0x000000016063a8dc0000000001b951b2 + + /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */ + .octa 0x0000000116a663620000000048143916 + + /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */ + .octa 0x000000017e8aa4d200000001dc2ae124 + + /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */ + .octa 0x00000001728eb10c00000001416c58d6 + + /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */ + .octa 0x00000001b08fd7fa00000000a479744a + + /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */ + .octa 0x00000001092a16e80000000096ca3a26 + + /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */ + .octa 0x00000000a505637c00000000ff223d4e + + /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */ + .octa 0x00000000d94869b2000000010e84da42 + + /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */ + .octa 0x00000001c8b203ae00000001b61ba3d0 + + /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */ + .octa 0x000000005704aea000000000680f2de8 + + /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */ + .octa 0x000000012e295fa2000000008772a9a8 + + /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */ + .octa 0x000000011d0908bc0000000155f295bc + + /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */ + .octa 0x0000000193ed97ea00000000595f9282 + + /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */ + .octa 0x000000013a0f1c520000000164b1c25a + + /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */ + .octa 0x000000010c2c40c000000000fbd67c50 + + /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */ + .octa 0x00000000ff6fac3e0000000096076268 + + /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */ + .octa 0x000000017b3609c000000001d288e4cc + + /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */ + .octa 0x0000000088c8c92200000001eaac1bdc + + /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */ + .octa 0x00000001751baae600000001f1ea39e2 + + /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */ + .octa 0x000000010795297200000001eb6506fc + + /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */ + .octa 0x0000000162b00abe000000010f806ffe + + /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */ + .octa 0x000000000d7b404c000000010408481e + + /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */ + .octa 0x00000000763b13d40000000188260534 + + /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */ + .octa 0x00000000f6dc22d80000000058fc73e0 + + /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */ + .octa 0x000000007daae06000000000391c59b8 + + /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */ + .octa 0x000000013359ab7c000000018b638400 + + /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */ + .octa 0x000000008add438a000000011738f5c4 + + /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */ + .octa 0x00000001edbefdea000000008cf7c6da + + /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */ + .octa 0x000000004104e0f800000001ef97fb16 + + /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */ + .octa 0x00000000b48a82220000000102130e20 + + /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */ + .octa 0x00000001bcb4684400000000db968898 + + /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */ + .octa 0x000000013293ce0a00000000b5047b5e + + /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */ + .octa 0x00000001710d0844000000010b90fdb2 + + /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */ + .octa 0x0000000117907f6e000000004834a32e + + /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */ + .octa 0x0000000087ddf93e0000000059c8f2b0 + + /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */ + .octa 0x000000005970e9b00000000122cec508 + + /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */ + .octa 0x0000000185b2b7d0000000000a330cda + + /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */ + .octa 0x00000001dcee0efc000000014a47148c + + /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */ + .octa 0x0000000030da27220000000042c61cb8 + + /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */ + .octa 0x000000012f925a180000000012fe6960 + + /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */ + .octa 0x00000000dd2e357c00000000dbda2c20 + + /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */ + .octa 0x00000000071c80de000000011122410c + + /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */ + .octa 0x000000011513140a00000000977b2070 + + /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */ + .octa 0x00000001df876e8e000000014050438e + + /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */ + .octa 0x000000015f81d6ce0000000147c840e8 + + /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */ + .octa 0x000000019dd94dbe00000001cc7c88ce + + /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */ + .octa 0x00000001373d206e00000001476b35a4 + + /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */ + .octa 0x00000000668ccade000000013d52d508 + + /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */ + .octa 0x00000001b192d268000000008e4be32e + + /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */ + .octa 0x00000000e30f3a7800000000024120fe + + /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */ + .octa 0x000000010ef1f7bc00000000ddecddb4 + + /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */ + .octa 0x00000001f5ac738000000000d4d403bc + + /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */ + .octa 0x000000011822ea7000000001734b89aa + + /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */ + .octa 0x00000000c3a33848000000010e7a58d6 + + /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */ + .octa 0x00000001bd151c2400000001f9f04e9c + + /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */ + .octa 0x0000000056002d7600000000b692225e + + /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */ + .octa 0x000000014657c4f4000000019b8d3f3e + + /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */ + .octa 0x0000000113742d7c00000001a874f11e + + /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */ + .octa 0x000000019c5920ba000000010d5a4254 + + /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */ + .octa 0x000000005216d2d600000000bbb2f5d6 + + /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */ + .octa 0x0000000136f5ad8a0000000179cc0e36 + + /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */ + .octa 0x000000018b07beb600000001dca1da4a + + /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */ + .octa 0x00000000db1e93b000000000feb1a192 + + /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */ + .octa 0x000000000b96fa3a00000000d1eeedd6 + + /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */ + .octa 0x00000001d9968af0000000008fad9bb4 + + /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */ + .octa 0x000000000e4a77a200000001884938e4 + + /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */ + .octa 0x00000000508c2ac800000001bc2e9bc0 + + /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */ + .octa 0x0000000021572a8000000001f9658a68 + + /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */ + .octa 0x00000001b859daf2000000001b9224fc + + /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */ + .octa 0x000000016f7884740000000055b2fb84 + + /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */ + .octa 0x00000001b438810e000000018b090348 + + /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */ + .octa 0x0000000095ddc6f2000000011ccbd5ea + + /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */ + .octa 0x00000001d977c20c0000000007ae47f8 + + /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */ + .octa 0x00000000ebedb99a0000000172acbec0 + + /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */ + .octa 0x00000001df9e9e9200000001c6e3ff20 + + /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */ + .octa 0x00000001a4a3f95200000000e1b38744 + + /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */ + .octa 0x00000000e2f5122000000000791585b2 + + /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */ + .octa 0x000000004aa01f3e00000000ac53b894 + + /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */ + .octa 0x00000000b3e90a5800000001ed5f2cf4 + + /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */ + .octa 0x000000000c9ca2aa00000001df48b2e0 + + /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */ + .octa 0x000000015168231600000000049c1c62 + + /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */ + .octa 0x0000000036fce78c000000017c460c12 + + /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */ + .octa 0x000000009037dc10000000015be4da7e + + /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */ + .octa 0x00000000d3298582000000010f38f668 + + /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */ + .octa 0x00000001b42e8ad60000000039f40a00 + + /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */ + .octa 0x00000000142a983800000000bd4c10c4 + + /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */ + .octa 0x0000000109c7f1900000000042db1d98 + + /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */ + .octa 0x0000000056ff931000000001c905bae6 + + /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */ + .octa 0x00000001594513aa00000000069d40ea + + /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */ + .octa 0x00000001e3b5b1e8000000008e4fbad0 + + /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */ + .octa 0x000000011dd5fc080000000047bedd46 + + /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */ + .octa 0x00000001675f0cc20000000026396bf8 + + /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */ + .octa 0x00000000d1c8dd4400000000379beb92 + + /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */ + .octa 0x0000000115ebd3d8000000000abae54a + + /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */ + .octa 0x00000001ecbd0dac0000000007e6a128 + + /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */ + .octa 0x00000000cdf67af2000000000ade29d2 + + /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */ + .octa 0x000000004c01ff4c00000000f974c45c + + /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */ + .octa 0x00000000f2d8657e00000000e77ac60a + + /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */ + .octa 0x000000006bae74c40000000145895816 + + /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */ + .octa 0x0000000152af8aa00000000038e362be + + /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */ + .octa 0x0000000004663802000000007f991a64 + + /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */ + .octa 0x00000001ab2f5afc00000000fa366d3a + + /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */ + .octa 0x0000000074a4ebd400000001a2bb34f0 + + /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */ + .octa 0x00000001d7ab3a4c0000000028a9981e + + /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */ + .octa 0x00000001a8da60c600000001dbc672be + + /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */ + .octa 0x000000013cf6382000000000b04d77f6 + + /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */ + .octa 0x00000000bec12e1e0000000124400d96 + + /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */ + .octa 0x00000001c6368010000000014ca4b414 + + /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */ + .octa 0x00000001e6e78758000000012fe2c938 + + /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */ + .octa 0x000000008d7f2b3c00000001faed01e6 + + /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */ + .octa 0x000000016b4a156e000000007e80ecfe + + /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */ + .octa 0x00000001c63cfeb60000000098daee94 + + /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */ + .octa 0x000000015f902670000000010a04edea + + /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */ + .octa 0x00000001cd5de11e00000001c00b4524 + + /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */ + .octa 0x000000001acaec540000000170296550 + + /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */ + .octa 0x000000002bd0ca780000000181afaa48 + + /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */ + .octa 0x0000000032d63d5c0000000185a31ffa + + /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */ + .octa 0x000000001c6d4e4c000000002469f608 + + /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */ + .octa 0x0000000106a60b92000000006980102a + + /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */ + .octa 0x00000000d3855e120000000111ea9ca8 + + /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */ + .octa 0x00000000e312563600000001bd1d29ce + + /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */ + .octa 0x000000009e8f7ea400000001b34b9580 + + /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */ + .octa 0x00000001c82e562c000000003076054e + + /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */ + .octa 0x00000000ca9f09ce000000012a608ea4 + + /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */ + .octa 0x00000000c63764e600000000784d05fe + + /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */ + .octa 0x0000000168d2e49e000000016ef0d82a + + /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */ + .octa 0x00000000e986c1480000000075bda454 + + /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */ + .octa 0x00000000cfb65894000000003dc0a1c4 + + /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */ + .octa 0x0000000111cadee400000000e9a5d8be + + /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */ + .octa 0x0000000171fb63ce00000001609bc4b4 + +.short_constants: + + /* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */ + /* x^1952 mod p(x)`, x^1984 mod p(x)`, x^2016 mod p(x)`, x^2048 mod p(x)` */ + .octa 0x7fec2963e5bf80485cf015c388e56f72 + + /* x^1824 mod p(x)`, x^1856 mod p(x)`, x^1888 mod p(x)`, x^1920 mod p(x)` */ + .octa 0x38e888d4844752a9963a18920246e2e6 + + /* x^1696 mod p(x)`, x^1728 mod p(x)`, x^1760 mod p(x)`, x^1792 mod p(x)` */ + .octa 0x42316c00730206ad419a441956993a31 + + /* x^1568 mod p(x)`, x^1600 mod p(x)`, x^1632 mod p(x)`, x^1664 mod p(x)` */ + .octa 0x543d5c543e65ddf9924752ba2b830011 + + /* x^1440 mod p(x)`, x^1472 mod p(x)`, x^1504 mod p(x)`, x^1536 mod p(x)` */ + .octa 0x78e87aaf56767c9255bd7f9518e4a304 + + /* x^1312 mod p(x)`, x^1344 mod p(x)`, x^1376 mod p(x)`, x^1408 mod p(x)` */ + .octa 0x8f68fcec1903da7f6d76739fe0553f1e + + /* x^1184 mod p(x)`, x^1216 mod p(x)`, x^1248 mod p(x)`, x^1280 mod p(x)` */ + .octa 0x3f4840246791d588c133722b1fe0b5c3 + + /* x^1056 mod p(x)`, x^1088 mod p(x)`, x^1120 mod p(x)`, x^1152 mod p(x)` */ + .octa 0x34c96751b04de25a64b67ee0e55ef1f3 + + /* x^928 mod p(x)`, x^960 mod p(x)`, x^992 mod p(x)`, x^1024 mod p(x)` */ + .octa 0x156c8e180b4a395b069db049b8fdb1e7 + + /* x^800 mod p(x)`, x^832 mod p(x)`, x^864 mod p(x)`, x^896 mod p(x)` */ + .octa 0xe0b99ccbe661f7bea11bfaf3c9e90b9e + + /* x^672 mod p(x)`, x^704 mod p(x)`, x^736 mod p(x)`, x^768 mod p(x)` */ + .octa 0x041d37768cd75659817cdc5119b29a35 + + /* x^544 mod p(x)`, x^576 mod p(x)`, x^608 mod p(x)`, x^640 mod p(x)` */ + .octa 0x3a0777818cfaa9651ce9d94b36c41f1c + + /* x^416 mod p(x)`, x^448 mod p(x)`, x^480 mod p(x)`, x^512 mod p(x)` */ + .octa 0x0e148e8252377a554f256efcb82be955 + + /* x^288 mod p(x)`, x^320 mod p(x)`, x^352 mod p(x)`, x^384 mod p(x)` */ + .octa 0x9c25531d19e65ddeec1631edb2dea967 + + /* x^160 mod p(x)`, x^192 mod p(x)`, x^224 mod p(x)`, x^256 mod p(x)` */ + .octa 0x790606ff9957c0a65d27e147510ac59a + + /* x^32 mod p(x)`, x^64 mod p(x)`, x^96 mod p(x)`, x^128 mod p(x)` */ + .octa 0x82f63b786ea2d55ca66805eb18b8ea18 + + +.barrett_constants: + /* 33 bit reflected Barrett constant m - (4^32)/n */ + .octa 0x000000000000000000000000dea713f1 /* x^64 div p(x)` */ + /* 33 bit reflected Barrett constant n */ + .octa 0x00000000000000000000000105ec76f1 + + .text + +#if defined(__BIG_ENDIAN__) +#define BYTESWAP_DATA +#else +#undef BYTESWAP_DATA +#endif + +#define off16 r25 +#define off32 r26 +#define off48 r27 +#define off64 r28 +#define off80 r29 +#define off96 r30 +#define off112 r31 + +#define const1 v24 +#define const2 v25 + +#define byteswap v26 +#define mask_32bit v27 +#define mask_64bit v28 +#define zeroes v29 + +#ifdef BYTESWAP_DATA +#define VPERM(A, B, C, D) vperm A, B, C, D +#else +#define VPERM(A, B, C, D) +#endif + +/* unsigned int __crc32c_vpmsum(unsigned int crc, void *p, unsigned long len) */ +FUNC_START(__crc32c_vpmsum) + std r31,-8(r1) + std r30,-16(r1) + std r29,-24(r1) + std r28,-32(r1) + std r27,-40(r1) + std r26,-48(r1) + std r25,-56(r1) + + li off16,16 + li off32,32 + li off48,48 + li off64,64 + li off80,80 + li off96,96 + li off112,112 + li r0,0 + + /* Enough room for saving 10 non volatile VMX registers */ + subi r6,r1,56+10*16 + subi r7,r1,56+2*16 + + stvx v20,0,r6 + stvx v21,off16,r6 + stvx v22,off32,r6 + stvx v23,off48,r6 + stvx v24,off64,r6 + stvx v25,off80,r6 + stvx v26,off96,r6 + stvx v27,off112,r6 + stvx v28,0,r7 + stvx v29,off16,r7 + + mr r10,r3 + + vxor zeroes,zeroes,zeroes + vspltisw v0,-1 + + vsldoi mask_32bit,zeroes,v0,4 + vsldoi mask_64bit,zeroes,v0,8 + + /* Get the initial value into v8 */ + vxor v8,v8,v8 + MTVRD(v8, R3) + vsldoi v8,zeroes,v8,8 /* shift into bottom 32 bits */ + +#ifdef BYTESWAP_DATA + addis r3,r2,.byteswap_constant@toc@ha + addi r3,r3,.byteswap_constant@toc@l + + lvx byteswap,0,r3 + addi r3,r3,16 +#endif + + cmpdi r5,256 + blt .Lshort + + rldicr r6,r5,0,56 + + /* Checksum in blocks of MAX_SIZE */ +1: lis r7,MAX_SIZE@h + ori r7,r7,MAX_SIZE@l + mr r9,r7 + cmpd r6,r7 + bgt 2f + mr r7,r6 +2: subf r6,r7,r6 + + /* our main loop does 128 bytes at a time */ + srdi r7,r7,7 + + /* + * Work out the offset into the constants table to start at. Each + * constant is 16 bytes, and it is used against 128 bytes of input + * data - 128 / 16 = 8 + */ + sldi r8,r7,4 + srdi r9,r9,3 + subf r8,r8,r9 + + /* We reduce our final 128 bytes in a separate step */ + addi r7,r7,-1 + mtctr r7 + + addis r3,r2,.constants@toc@ha + addi r3,r3,.constants@toc@l + + /* Find the start of our constants */ + add r3,r3,r8 + + /* zero v0-v7 which will contain our checksums */ + vxor v0,v0,v0 + vxor v1,v1,v1 + vxor v2,v2,v2 + vxor v3,v3,v3 + vxor v4,v4,v4 + vxor v5,v5,v5 + vxor v6,v6,v6 + vxor v7,v7,v7 + + lvx const1,0,r3 + + /* + * If we are looping back to consume more data we use the values + * already in v16-v23. + */ + cmpdi r0,1 + beq 2f + + /* First warm up pass */ + lvx v16,0,r4 + lvx v17,off16,r4 + VPERM(v16,v16,v16,byteswap) + VPERM(v17,v17,v17,byteswap) + lvx v18,off32,r4 + lvx v19,off48,r4 + VPERM(v18,v18,v18,byteswap) + VPERM(v19,v19,v19,byteswap) + lvx v20,off64,r4 + lvx v21,off80,r4 + VPERM(v20,v20,v20,byteswap) + VPERM(v21,v21,v21,byteswap) + lvx v22,off96,r4 + lvx v23,off112,r4 + VPERM(v22,v22,v22,byteswap) + VPERM(v23,v23,v23,byteswap) + addi r4,r4,8*16 + + /* xor in initial value */ + vxor v16,v16,v8 + +2: bdz .Lfirst_warm_up_done + + addi r3,r3,16 + lvx const2,0,r3 + + /* Second warm up pass */ + VPMSUMD(v8,v16,const1) + lvx v16,0,r4 + VPERM(v16,v16,v16,byteswap) + ori r2,r2,0 + + VPMSUMD(v9,v17,const1) + lvx v17,off16,r4 + VPERM(v17,v17,v17,byteswap) + ori r2,r2,0 + + VPMSUMD(v10,v18,const1) + lvx v18,off32,r4 + VPERM(v18,v18,v18,byteswap) + ori r2,r2,0 + + VPMSUMD(v11,v19,const1) + lvx v19,off48,r4 + VPERM(v19,v19,v19,byteswap) + ori r2,r2,0 + + VPMSUMD(v12,v20,const1) + lvx v20,off64,r4 + VPERM(v20,v20,v20,byteswap) + ori r2,r2,0 + + VPMSUMD(v13,v21,const1) + lvx v21,off80,r4 + VPERM(v21,v21,v21,byteswap) + ori r2,r2,0 + + VPMSUMD(v14,v22,const1) + lvx v22,off96,r4 + VPERM(v22,v22,v22,byteswap) + ori r2,r2,0 + + VPMSUMD(v15,v23,const1) + lvx v23,off112,r4 + VPERM(v23,v23,v23,byteswap) + + addi r4,r4,8*16 + + bdz .Lfirst_cool_down + + /* + * main loop. We modulo schedule it such that it takes three iterations + * to complete - first iteration load, second iteration vpmsum, third + * iteration xor. + */ + .balign 16 +4: lvx const1,0,r3 + addi r3,r3,16 + ori r2,r2,0 + + vxor v0,v0,v8 + VPMSUMD(v8,v16,const2) + lvx v16,0,r4 + VPERM(v16,v16,v16,byteswap) + ori r2,r2,0 + + vxor v1,v1,v9 + VPMSUMD(v9,v17,const2) + lvx v17,off16,r4 + VPERM(v17,v17,v17,byteswap) + ori r2,r2,0 + + vxor v2,v2,v10 + VPMSUMD(v10,v18,const2) + lvx v18,off32,r4 + VPERM(v18,v18,v18,byteswap) + ori r2,r2,0 + + vxor v3,v3,v11 + VPMSUMD(v11,v19,const2) + lvx v19,off48,r4 + VPERM(v19,v19,v19,byteswap) + lvx const2,0,r3 + ori r2,r2,0 + + vxor v4,v4,v12 + VPMSUMD(v12,v20,const1) + lvx v20,off64,r4 + VPERM(v20,v20,v20,byteswap) + ori r2,r2,0 + + vxor v5,v5,v13 + VPMSUMD(v13,v21,const1) + lvx v21,off80,r4 + VPERM(v21,v21,v21,byteswap) + ori r2,r2,0 + + vxor v6,v6,v14 + VPMSUMD(v14,v22,const1) + lvx v22,off96,r4 + VPERM(v22,v22,v22,byteswap) + ori r2,r2,0 + + vxor v7,v7,v15 + VPMSUMD(v15,v23,const1) + lvx v23,off112,r4 + VPERM(v23,v23,v23,byteswap) + + addi r4,r4,8*16 + + bdnz 4b + +.Lfirst_cool_down: + /* First cool down pass */ + lvx const1,0,r3 + addi r3,r3,16 + + vxor v0,v0,v8 + VPMSUMD(v8,v16,const1) + ori r2,r2,0 + + vxor v1,v1,v9 + VPMSUMD(v9,v17,const1) + ori r2,r2,0 + + vxor v2,v2,v10 + VPMSUMD(v10,v18,const1) + ori r2,r2,0 + + vxor v3,v3,v11 + VPMSUMD(v11,v19,const1) + ori r2,r2,0 + + vxor v4,v4,v12 + VPMSUMD(v12,v20,const1) + ori r2,r2,0 + + vxor v5,v5,v13 + VPMSUMD(v13,v21,const1) + ori r2,r2,0 + + vxor v6,v6,v14 + VPMSUMD(v14,v22,const1) + ori r2,r2,0 + + vxor v7,v7,v15 + VPMSUMD(v15,v23,const1) + ori r2,r2,0 + +.Lsecond_cool_down: + /* Second cool down pass */ + vxor v0,v0,v8 + vxor v1,v1,v9 + vxor v2,v2,v10 + vxor v3,v3,v11 + vxor v4,v4,v12 + vxor v5,v5,v13 + vxor v6,v6,v14 + vxor v7,v7,v15 + + /* + * vpmsumd produces a 96 bit result in the least significant bits + * of the register. Since we are bit reflected we have to shift it + * left 32 bits so it occupies the least significant bits in the + * bit reflected domain. + */ + vsldoi v0,v0,zeroes,4 + vsldoi v1,v1,zeroes,4 + vsldoi v2,v2,zeroes,4 + vsldoi v3,v3,zeroes,4 + vsldoi v4,v4,zeroes,4 + vsldoi v5,v5,zeroes,4 + vsldoi v6,v6,zeroes,4 + vsldoi v7,v7,zeroes,4 + + /* xor with last 1024 bits */ + lvx v8,0,r4 + lvx v9,off16,r4 + VPERM(v8,v8,v8,byteswap) + VPERM(v9,v9,v9,byteswap) + lvx v10,off32,r4 + lvx v11,off48,r4 + VPERM(v10,v10,v10,byteswap) + VPERM(v11,v11,v11,byteswap) + lvx v12,off64,r4 + lvx v13,off80,r4 + VPERM(v12,v12,v12,byteswap) + VPERM(v13,v13,v13,byteswap) + lvx v14,off96,r4 + lvx v15,off112,r4 + VPERM(v14,v14,v14,byteswap) + VPERM(v15,v15,v15,byteswap) + + addi r4,r4,8*16 + + vxor v16,v0,v8 + vxor v17,v1,v9 + vxor v18,v2,v10 + vxor v19,v3,v11 + vxor v20,v4,v12 + vxor v21,v5,v13 + vxor v22,v6,v14 + vxor v23,v7,v15 + + li r0,1 + cmpdi r6,0 + addi r6,r6,128 + bne 1b + + /* Work out how many bytes we have left */ + andi. r5,r5,127 + + /* Calculate where in the constant table we need to start */ + subfic r6,r5,128 + add r3,r3,r6 + + /* How many 16 byte chunks are in the tail */ + srdi r7,r5,4 + mtctr r7 + + /* + * Reduce the previously calculated 1024 bits to 64 bits, shifting + * 32 bits to include the trailing 32 bits of zeros + */ + lvx v0,0,r3 + lvx v1,off16,r3 + lvx v2,off32,r3 + lvx v3,off48,r3 + lvx v4,off64,r3 + lvx v5,off80,r3 + lvx v6,off96,r3 + lvx v7,off112,r3 + addi r3,r3,8*16 + + VPMSUMW(v0,v16,v0) + VPMSUMW(v1,v17,v1) + VPMSUMW(v2,v18,v2) + VPMSUMW(v3,v19,v3) + VPMSUMW(v4,v20,v4) + VPMSUMW(v5,v21,v5) + VPMSUMW(v6,v22,v6) + VPMSUMW(v7,v23,v7) + + /* Now reduce the tail (0 - 112 bytes) */ + cmpdi r7,0 + beq 1f + + lvx v16,0,r4 + lvx v17,0,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off16,r4 + lvx v17,off16,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off32,r4 + lvx v17,off32,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off48,r4 + lvx v17,off48,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off64,r4 + lvx v17,off64,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off80,r4 + lvx v17,off80,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off96,r4 + lvx v17,off96,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + + /* Now xor all the parallel chunks together */ +1: vxor v0,v0,v1 + vxor v2,v2,v3 + vxor v4,v4,v5 + vxor v6,v6,v7 + + vxor v0,v0,v2 + vxor v4,v4,v6 + + vxor v0,v0,v4 + +.Lbarrett_reduction: + /* Barrett constants */ + addis r3,r2,.barrett_constants@toc@ha + addi r3,r3,.barrett_constants@toc@l + + lvx const1,0,r3 + lvx const2,off16,r3 + + vsldoi v1,v0,v0,8 + vxor v0,v0,v1 /* xor two 64 bit results together */ + + /* shift left one bit */ + vspltisb v1,1 + vsl v0,v0,v1 + + vand v0,v0,mask_64bit + + /* + * The reflected version of Barrett reduction. Instead of bit + * reflecting our data (which is expensive to do), we bit reflect our + * constants and our algorithm, which means the intermediate data in + * our vector registers goes from 0-63 instead of 63-0. We can reflect + * the algorithm because we don't carry in mod 2 arithmetic. + */ + vand v1,v0,mask_32bit /* bottom 32 bits of a */ + VPMSUMD(v1,v1,const1) /* ma */ + vand v1,v1,mask_32bit /* bottom 32bits of ma */ + VPMSUMD(v1,v1,const2) /* qn */ + vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */ + + /* + * Since we are bit reflected, the result (ie the low 32 bits) is in + * the high 32 bits. We just need to shift it left 4 bytes + * V0 [ 0 1 X 3 ] + * V0 [ 0 X 2 3 ] + */ + vsldoi v0,v0,zeroes,4 /* shift result into top 64 bits of */ + + /* Get it into r3 */ + MFVRD(R3, v0) + +.Lout: + subi r6,r1,56+10*16 + subi r7,r1,56+2*16 + + lvx v20,0,r6 + lvx v21,off16,r6 + lvx v22,off32,r6 + lvx v23,off48,r6 + lvx v24,off64,r6 + lvx v25,off80,r6 + lvx v26,off96,r6 + lvx v27,off112,r6 + lvx v28,0,r7 + lvx v29,off16,r7 + + ld r31,-8(r1) + ld r30,-16(r1) + ld r29,-24(r1) + ld r28,-32(r1) + ld r27,-40(r1) + ld r26,-48(r1) + ld r25,-56(r1) + + blr + +.Lfirst_warm_up_done: + lvx const1,0,r3 + addi r3,r3,16 + + VPMSUMD(v8,v16,const1) + VPMSUMD(v9,v17,const1) + VPMSUMD(v10,v18,const1) + VPMSUMD(v11,v19,const1) + VPMSUMD(v12,v20,const1) + VPMSUMD(v13,v21,const1) + VPMSUMD(v14,v22,const1) + VPMSUMD(v15,v23,const1) + + b .Lsecond_cool_down + +.Lshort: + cmpdi r5,0 + beq .Lzero + + addis r3,r2,.short_constants@toc@ha + addi r3,r3,.short_constants@toc@l + + /* Calculate where in the constant table we need to start */ + subfic r6,r5,256 + add r3,r3,r6 + + /* How many 16 byte chunks? */ + srdi r7,r5,4 + mtctr r7 + + vxor v19,v19,v19 + vxor v20,v20,v20 + + lvx v0,0,r4 + lvx v16,0,r3 + VPERM(v0,v0,v16,byteswap) + vxor v0,v0,v8 /* xor in initial value */ + VPMSUMW(v0,v0,v16) + bdz .Lv0 + + lvx v1,off16,r4 + lvx v17,off16,r3 + VPERM(v1,v1,v17,byteswap) + VPMSUMW(v1,v1,v17) + bdz .Lv1 + + lvx v2,off32,r4 + lvx v16,off32,r3 + VPERM(v2,v2,v16,byteswap) + VPMSUMW(v2,v2,v16) + bdz .Lv2 + + lvx v3,off48,r4 + lvx v17,off48,r3 + VPERM(v3,v3,v17,byteswap) + VPMSUMW(v3,v3,v17) + bdz .Lv3 + + lvx v4,off64,r4 + lvx v16,off64,r3 + VPERM(v4,v4,v16,byteswap) + VPMSUMW(v4,v4,v16) + bdz .Lv4 + + lvx v5,off80,r4 + lvx v17,off80,r3 + VPERM(v5,v5,v17,byteswap) + VPMSUMW(v5,v5,v17) + bdz .Lv5 + + lvx v6,off96,r4 + lvx v16,off96,r3 + VPERM(v6,v6,v16,byteswap) + VPMSUMW(v6,v6,v16) + bdz .Lv6 + + lvx v7,off112,r4 + lvx v17,off112,r3 + VPERM(v7,v7,v17,byteswap) + VPMSUMW(v7,v7,v17) + bdz .Lv7 + + addi r3,r3,128 + addi r4,r4,128 + + lvx v8,0,r4 + lvx v16,0,r3 + VPERM(v8,v8,v16,byteswap) + VPMSUMW(v8,v8,v16) + bdz .Lv8 + + lvx v9,off16,r4 + lvx v17,off16,r3 + VPERM(v9,v9,v17,byteswap) + VPMSUMW(v9,v9,v17) + bdz .Lv9 + + lvx v10,off32,r4 + lvx v16,off32,r3 + VPERM(v10,v10,v16,byteswap) + VPMSUMW(v10,v10,v16) + bdz .Lv10 + + lvx v11,off48,r4 + lvx v17,off48,r3 + VPERM(v11,v11,v17,byteswap) + VPMSUMW(v11,v11,v17) + bdz .Lv11 + + lvx v12,off64,r4 + lvx v16,off64,r3 + VPERM(v12,v12,v16,byteswap) + VPMSUMW(v12,v12,v16) + bdz .Lv12 + + lvx v13,off80,r4 + lvx v17,off80,r3 + VPERM(v13,v13,v17,byteswap) + VPMSUMW(v13,v13,v17) + bdz .Lv13 + + lvx v14,off96,r4 + lvx v16,off96,r3 + VPERM(v14,v14,v16,byteswap) + VPMSUMW(v14,v14,v16) + bdz .Lv14 + + lvx v15,off112,r4 + lvx v17,off112,r3 + VPERM(v15,v15,v17,byteswap) + VPMSUMW(v15,v15,v17) + +.Lv15: vxor v19,v19,v15 +.Lv14: vxor v20,v20,v14 +.Lv13: vxor v19,v19,v13 +.Lv12: vxor v20,v20,v12 +.Lv11: vxor v19,v19,v11 +.Lv10: vxor v20,v20,v10 +.Lv9: vxor v19,v19,v9 +.Lv8: vxor v20,v20,v8 +.Lv7: vxor v19,v19,v7 +.Lv6: vxor v20,v20,v6 +.Lv5: vxor v19,v19,v5 +.Lv4: vxor v20,v20,v4 +.Lv3: vxor v19,v19,v3 +.Lv2: vxor v20,v20,v2 +.Lv1: vxor v19,v19,v1 +.Lv0: vxor v20,v20,v0 + + vxor v0,v19,v20 + + b .Lbarrett_reduction + +.Lzero: + mr r3,r10 + b .Lout + +FUNC_END(__crc32_vpmsum) diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c new file mode 100644 index 0000000000000..bfe3d37a24ef3 --- /dev/null +++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c @@ -0,0 +1,167 @@ +#include +#include +#include +#include +#include +#include +#include + +#define CHKSUM_BLOCK_SIZE 1 +#define CHKSUM_DIGEST_SIZE 4 + +#define VMX_ALIGN 16 +#define VMX_ALIGN_MASK (VMX_ALIGN-1) + +#define VECTOR_BREAKPOINT 512 + +u32 __crc32c_vpmsum(u32 crc, unsigned char const *p, size_t len); + +static u32 crc32c_vpmsum(u32 crc, unsigned char const *p, size_t len) +{ + unsigned int prealign; + unsigned int tail; + + if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || in_interrupt()) + return __crc32c_le(crc, p, len); + + if ((unsigned long)p & VMX_ALIGN_MASK) { + prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK); + crc = __crc32c_le(crc, p, prealign); + len -= prealign; + p += prealign; + } + + if (len & ~VMX_ALIGN_MASK) { + pagefault_disable(); + enable_kernel_altivec(); + crc = __crc32c_vpmsum(crc, p, len & ~VMX_ALIGN_MASK); + pagefault_enable(); + } + + tail = len & VMX_ALIGN_MASK; + if (tail) { + p += len & ~VMX_ALIGN_MASK; + crc = __crc32c_le(crc, p, tail); + } + + return crc; +} + +static int crc32c_vpmsum_cra_init(struct crypto_tfm *tfm) +{ + u32 *key = crypto_tfm_ctx(tfm); + + *key = 0; + + return 0; +} + +/* + * Setting the seed allows arbitrary accumulators and flexible XOR policy + * If your algorithm starts with ~0, then XOR with ~0 before you set + * the seed. + */ +static int crc32c_vpmsum_setkey(struct crypto_shash *hash, const u8 *key, + unsigned int keylen) +{ + u32 *mctx = crypto_shash_ctx(hash); + + if (keylen != sizeof(u32)) { + crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + *mctx = le32_to_cpup((__le32 *)key); + return 0; +} + +static int crc32c_vpmsum_init(struct shash_desc *desc) +{ + u32 *mctx = crypto_shash_ctx(desc->tfm); + u32 *crcp = shash_desc_ctx(desc); + + *crcp = *mctx; + + return 0; +} + +static int crc32c_vpmsum_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + u32 *crcp = shash_desc_ctx(desc); + + *crcp = crc32c_vpmsum(*crcp, data, len); + + return 0; +} + +static int __crc32c_vpmsum_finup(u32 *crcp, const u8 *data, unsigned int len, + u8 *out) +{ + *(__le32 *)out = ~cpu_to_le32(crc32c_vpmsum(*crcp, data, len)); + + return 0; +} + +static int crc32c_vpmsum_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32c_vpmsum_finup(shash_desc_ctx(desc), data, len, out); +} + +static int crc32c_vpmsum_final(struct shash_desc *desc, u8 *out) +{ + u32 *crcp = shash_desc_ctx(desc); + + *(__le32 *)out = ~cpu_to_le32p(crcp); + + return 0; +} + +static int crc32c_vpmsum_digest(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32c_vpmsum_finup(crypto_shash_ctx(desc->tfm), data, len, + out); +} + +static struct shash_alg alg = { + .setkey = crc32c_vpmsum_setkey, + .init = crc32c_vpmsum_init, + .update = crc32c_vpmsum_update, + .final = crc32c_vpmsum_final, + .finup = crc32c_vpmsum_finup, + .digest = crc32c_vpmsum_digest, + .descsize = sizeof(u32), + .digestsize = CHKSUM_DIGEST_SIZE, + .base = { + .cra_name = "crc32c", + .cra_driver_name = "crc32c-vpmsum", + .cra_priority = 200, + .cra_blocksize = CHKSUM_BLOCK_SIZE, + .cra_ctxsize = sizeof(u32), + .cra_module = THIS_MODULE, + .cra_init = crc32c_vpmsum_cra_init, + } +}; + +static int __init crc32c_vpmsum_mod_init(void) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + return crypto_register_shash(&alg); +} + +static void __exit crc32c_vpmsum_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(crc32c_vpmsum_mod_init); +module_exit(crc32c_vpmsum_mod_fini); + +MODULE_AUTHOR("Anton Blanchard "); +MODULE_DESCRIPTION("CRC32C using vector polynomial multiply-sum instructions"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_CRYPTO("crc32c"); +MODULE_ALIAS_CRYPTO("crc32c-vpmsum"); diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 1d035c1cc8898..49cd8760aa7cc 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -174,6 +174,8 @@ #define PPC_INST_MFSPR_DSCR_USER_MASK 0xfc1fffff #define PPC_INST_MTSPR_DSCR_USER 0x7c0303a6 #define PPC_INST_MTSPR_DSCR_USER_MASK 0xfc1fffff +#define PPC_INST_MFVSRD 0x7c000066 +#define PPC_INST_MTVSRD 0x7c000166 #define PPC_INST_SLBFEE 0x7c0007a7 #define PPC_INST_STRING 0x7c00042a @@ -188,6 +190,8 @@ #define PPC_INST_WAIT 0x7c00007c #define PPC_INST_TLBIVAX 0x7c000624 #define PPC_INST_TLBSRX_DOT 0x7c0006a5 +#define PPC_INST_VPMSUMW 0x10000488 +#define PPC_INST_VPMSUMD 0x100004c8 #define PPC_INST_XXLOR 0xf0000510 #define PPC_INST_XXSWAPD 0xf0000250 #define PPC_INST_XVCPSGNDP 0xf0000780 @@ -359,6 +363,14 @@ VSX_XX1((s), a, b)) #define LXVD2X(s, a, b) stringify_in_c(.long PPC_INST_LXVD2X | \ VSX_XX1((s), a, b)) +#define MFVRD(a, t) stringify_in_c(.long PPC_INST_MFVSRD | \ + VSX_XX1((t)+32, a, R0)) +#define MTVRD(t, a) stringify_in_c(.long PPC_INST_MTVSRD | \ + VSX_XX1((t)+32, a, R0)) +#define VPMSUMW(t, a, b) stringify_in_c(.long PPC_INST_VPMSUMW | \ + VSX_XX3((t), a, b)) +#define VPMSUMD(t, a, b) stringify_in_c(.long PPC_INST_VPMSUMD | \ + VSX_XX3((t), a, b)) #define XXLOR(t, a, b) stringify_in_c(.long PPC_INST_XXLOR | \ VSX_XX3((t), a, b)) #define XXSWAPD(t, a) stringify_in_c(.long PPC_INST_XXSWAPD | \ diff --git a/crypto/Kconfig b/crypto/Kconfig index 62fcbb9237533..a9377bef25e3e 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -437,6 +437,17 @@ config CRYPTO_CRC32C_INTEL gain performance compared with software implementation. Module will be crc32c-intel. +config CRYPT_CRC32C_VPMSUM + tristate "CRC32c CRC algorithm (powerpc64)" + depends on PPC64 + select CRYPTO_HASH + select CRC32 + help + CRC32c algorithm implemented using vector polynomial multiply-sum + (vpmsum) instructions, introduced in POWER8. Enable on POWER8 + and newer processors for improved performance. + + config CRYPTO_CRC32C_SPARC64 tristate "CRC32c CRC algorithm (SPARC64)" depends on SPARC64 From 6889621fd2317f52fd2c5ef1178128156f39fa94 Mon Sep 17 00:00:00 2001 From: Salvatore Benedetto Date: Mon, 4 Jul 2016 10:49:28 +0100 Subject: [PATCH 135/180] crypto: qat - Switch to new rsa_helper functions Drop all asn1 related code and use the new rsa_helper functions rsa_parse_[pub|priv]_key for parsing the key Signed-off-by: Salvatore Benedetto Signed-off-by: Herbert Xu --- drivers/crypto/qat/Kconfig | 2 +- drivers/crypto/qat/qat_common/Makefile | 10 ---- drivers/crypto/qat/qat_common/qat_asym_algs.c | 49 ++++++++----------- .../crypto/qat/qat_common/qat_rsaprivkey.asn1 | 11 ----- .../crypto/qat/qat_common/qat_rsapubkey.asn1 | 4 -- 5 files changed, 21 insertions(+), 55 deletions(-) delete mode 100644 drivers/crypto/qat/qat_common/qat_rsaprivkey.asn1 delete mode 100644 drivers/crypto/qat/qat_common/qat_rsapubkey.asn1 diff --git a/drivers/crypto/qat/Kconfig b/drivers/crypto/qat/Kconfig index 85b44e577684b..571d04dda415a 100644 --- a/drivers/crypto/qat/Kconfig +++ b/drivers/crypto/qat/Kconfig @@ -5,11 +5,11 @@ config CRYPTO_DEV_QAT select CRYPTO_BLKCIPHER select CRYPTO_AKCIPHER select CRYPTO_HMAC + select CRYPTO_RSA select CRYPTO_SHA1 select CRYPTO_SHA256 select CRYPTO_SHA512 select FW_LOADER - select ASN1 config CRYPTO_DEV_QAT_DH895xCC tristate "Support for Intel(R) DH895xCC" diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile index 6d74b91f21528..92fb6ffdc0622 100644 --- a/drivers/crypto/qat/qat_common/Makefile +++ b/drivers/crypto/qat/qat_common/Makefile @@ -1,11 +1,3 @@ -$(obj)/qat_rsapubkey-asn1.o: $(obj)/qat_rsapubkey-asn1.c \ - $(obj)/qat_rsapubkey-asn1.h -$(obj)/qat_rsaprivkey-asn1.o: $(obj)/qat_rsaprivkey-asn1.c \ - $(obj)/qat_rsaprivkey-asn1.h - -clean-files += qat_rsapubkey-asn1.c qat_rsapubkey-asn1.h -clean-files += qat_rsaprivkey-asn1.c qat_rsaprivkey-asn1.h - obj-$(CONFIG_CRYPTO_DEV_QAT) += intel_qat.o intel_qat-objs := adf_cfg.o \ adf_isr.o \ @@ -19,8 +11,6 @@ intel_qat-objs := adf_cfg.o \ adf_hw_arbiter.o \ qat_crypto.o \ qat_algs.o \ - qat_rsapubkey-asn1.o \ - qat_rsaprivkey-asn1.o \ qat_asym_algs.o \ qat_uclo.o \ qat_hal.o diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index 05f49d4f94b27..04b0ef8cfaa19 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -52,8 +52,6 @@ #include #include #include -#include "qat_rsapubkey-asn1.h" -#include "qat_rsaprivkey-asn1.h" #include "icp_qat_fw_pke.h" #include "adf_accel_devices.h" #include "adf_transport.h" @@ -502,10 +500,8 @@ static int qat_rsa_dec(struct akcipher_request *req) return ret; } -int qat_rsa_get_n(void *context, size_t hdrlen, unsigned char tag, - const void *value, size_t vlen) +int qat_rsa_set_n(struct qat_rsa_ctx *ctx, const char *value, size_t vlen) { - struct qat_rsa_ctx *ctx = context; struct qat_crypto_instance *inst = ctx->inst; struct device *dev = &GET_DEV(inst->accel_dev); const char *ptr = value; @@ -518,11 +514,6 @@ int qat_rsa_get_n(void *context, size_t hdrlen, unsigned char tag, ctx->key_sz = vlen; ret = -EINVAL; - /* In FIPS mode only allow key size 2K & 3K */ - if (fips_enabled && (ctx->key_sz != 256 && ctx->key_sz != 384)) { - pr_err("QAT: RSA: key size not allowed in FIPS mode\n"); - goto err; - } /* invalid key size provided */ if (!qat_rsa_enc_fn_id(ctx->key_sz)) goto err; @@ -540,10 +531,8 @@ int qat_rsa_get_n(void *context, size_t hdrlen, unsigned char tag, return ret; } -int qat_rsa_get_e(void *context, size_t hdrlen, unsigned char tag, - const void *value, size_t vlen) +int qat_rsa_set_e(struct qat_rsa_ctx *ctx, const char *value, size_t vlen) { - struct qat_rsa_ctx *ctx = context; struct qat_crypto_instance *inst = ctx->inst; struct device *dev = &GET_DEV(inst->accel_dev); const char *ptr = value; @@ -559,18 +548,15 @@ int qat_rsa_get_e(void *context, size_t hdrlen, unsigned char tag, } ctx->e = dma_zalloc_coherent(dev, ctx->key_sz, &ctx->dma_e, GFP_KERNEL); - if (!ctx->e) { - ctx->e = NULL; + if (!ctx->e) return -ENOMEM; - } + memcpy(ctx->e + (ctx->key_sz - vlen), ptr, vlen); return 0; } -int qat_rsa_get_d(void *context, size_t hdrlen, unsigned char tag, - const void *value, size_t vlen) +int qat_rsa_set_d(struct qat_rsa_ctx *ctx, const char *value, size_t vlen) { - struct qat_rsa_ctx *ctx = context; struct qat_crypto_instance *inst = ctx->inst; struct device *dev = &GET_DEV(inst->accel_dev); const char *ptr = value; @@ -585,12 +571,6 @@ int qat_rsa_get_d(void *context, size_t hdrlen, unsigned char tag, if (!ctx->key_sz || !vlen || vlen > ctx->key_sz) goto err; - /* In FIPS mode only allow key size 2K & 3K */ - if (fips_enabled && (vlen != 256 && vlen != 384)) { - pr_err("QAT: RSA: key size not allowed in FIPS mode\n"); - goto err; - } - ret = -ENOMEM; ctx->d = dma_zalloc_coherent(dev, ctx->key_sz, &ctx->dma_d, GFP_KERNEL); if (!ctx->d) @@ -608,6 +588,7 @@ static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key, { struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); struct device *dev = &GET_DEV(ctx->inst->accel_dev); + struct rsa_key rsa_key; int ret; /* Free the old key if any */ @@ -625,13 +606,23 @@ static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key, ctx->d = NULL; if (private) - ret = asn1_ber_decoder(&qat_rsaprivkey_decoder, ctx, key, - keylen); + ret = rsa_parse_priv_key(&rsa_key, key, keylen); else - ret = asn1_ber_decoder(&qat_rsapubkey_decoder, ctx, key, - keylen); + ret = rsa_parse_pub_key(&rsa_key, key, keylen); + if (ret < 0) + goto free; + + ret = qat_rsa_set_n(ctx, rsa_key.n, rsa_key.n_sz); if (ret < 0) goto free; + ret = qat_rsa_set_e(ctx, rsa_key.e, rsa_key.e_sz); + if (ret < 0) + goto free; + if (private) { + ret = qat_rsa_set_d(ctx, rsa_key.d, rsa_key.d_sz); + if (ret < 0) + goto free; + } if (!ctx->n || !ctx->e) { /* invalid key provided */ diff --git a/drivers/crypto/qat/qat_common/qat_rsaprivkey.asn1 b/drivers/crypto/qat/qat_common/qat_rsaprivkey.asn1 deleted file mode 100644 index f0066adb79b83..0000000000000 --- a/drivers/crypto/qat/qat_common/qat_rsaprivkey.asn1 +++ /dev/null @@ -1,11 +0,0 @@ -RsaPrivKey ::= SEQUENCE { - version INTEGER, - n INTEGER ({ qat_rsa_get_n }), - e INTEGER ({ qat_rsa_get_e }), - d INTEGER ({ qat_rsa_get_d }), - prime1 INTEGER, - prime2 INTEGER, - exponent1 INTEGER, - exponent2 INTEGER, - coefficient INTEGER -} diff --git a/drivers/crypto/qat/qat_common/qat_rsapubkey.asn1 b/drivers/crypto/qat/qat_common/qat_rsapubkey.asn1 deleted file mode 100644 index bd667b31a21a2..0000000000000 --- a/drivers/crypto/qat/qat_common/qat_rsapubkey.asn1 +++ /dev/null @@ -1,4 +0,0 @@ -RsaPubKey ::= SEQUENCE { - n INTEGER ({ qat_rsa_get_n }), - e INTEGER ({ qat_rsa_get_e }) -} From 57763f5ec7488d5864e4d6ec9d4197b8f52214bd Mon Sep 17 00:00:00 2001 From: Salvatore Benedetto Date: Mon, 4 Jul 2016 10:52:34 +0100 Subject: [PATCH 136/180] crypto: testmgr - Set err before proceeding Report correct error in case of failure Signed-off-by: Salvatore Benedetto Signed-off-by: Herbert Xu --- crypto/testmgr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 8ea0d3fcb580a..769cc2a88040c 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -1941,6 +1941,7 @@ static int test_akcipher_one(struct crypto_akcipher *tfm, if (err) goto free_req; + err = -ENOMEM; out_len_max = crypto_akcipher_maxsize(tfm); outbuf_enc = kzalloc(out_len_max, GFP_KERNEL); if (!outbuf_enc) From 8c419778ab57e497b5de1352aa39dbe2efb3ed54 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Mon, 4 Jul 2016 13:12:08 +0300 Subject: [PATCH 137/180] crypto: caam - add support for RSA algorithm Add RSA support to caam driver. Initial author is Yashpal Dutta . Signed-off-by: Tudor Ambarus Signed-off-by: Herbert Xu --- drivers/crypto/caam/Kconfig | 12 + drivers/crypto/caam/Makefile | 2 + drivers/crypto/caam/caampkc.c | 607 ++++++++++++++++++++++++++++++ drivers/crypto/caam/caampkc.h | 70 ++++ drivers/crypto/caam/compat.h | 3 + drivers/crypto/caam/desc.h | 2 + drivers/crypto/caam/desc_constr.h | 7 + drivers/crypto/caam/pdb.h | 51 ++- drivers/crypto/caam/pkc_desc.c | 36 ++ 9 files changed, 789 insertions(+), 1 deletion(-) create mode 100644 drivers/crypto/caam/caampkc.c create mode 100644 drivers/crypto/caam/caampkc.h create mode 100644 drivers/crypto/caam/pkc_desc.c diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig index ff54c42e6e510..64bf3024b6808 100644 --- a/drivers/crypto/caam/Kconfig +++ b/drivers/crypto/caam/Kconfig @@ -99,6 +99,18 @@ config CRYPTO_DEV_FSL_CAAM_AHASH_API To compile this as a module, choose M here: the module will be called caamhash. +config CRYPTO_DEV_FSL_CAAM_PKC_API + tristate "Register public key cryptography implementations with Crypto API" + depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR + default y + select CRYPTO_RSA + help + Selecting this will allow SEC Public key support for RSA. + Supported cryptographic primitives: encryption, decryption, + signature and verification. + To compile this as a module, choose M here: the module + will be called caam_pkc. + config CRYPTO_DEV_FSL_CAAM_RNG_API tristate "Register caam device for hwrng API" depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR diff --git a/drivers/crypto/caam/Makefile b/drivers/crypto/caam/Makefile index 3904700ef1102..08bf5515ae8a0 100644 --- a/drivers/crypto/caam/Makefile +++ b/drivers/crypto/caam/Makefile @@ -10,6 +10,8 @@ obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_JR) += caam_jr.o obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API) += caamalg.o obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API) += caamhash.o obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API) += caamrng.o +obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_PKC_API) += caam_pkc.o caam-objs := ctrl.o caam_jr-objs := jr.o key_gen.o error.o +caam_pkc-y := caampkc.o pkc_desc.o diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c new file mode 100644 index 0000000000000..851015e652b81 --- /dev/null +++ b/drivers/crypto/caam/caampkc.c @@ -0,0 +1,607 @@ +/* + * caam - Freescale FSL CAAM support for Public Key Cryptography + * + * Copyright 2016 Freescale Semiconductor, Inc. + * + * There is no Shared Descriptor for PKC so that the Job Descriptor must carry + * all the desired key parameters, input and output pointers. + */ +#include "compat.h" +#include "regs.h" +#include "intern.h" +#include "jr.h" +#include "error.h" +#include "desc_constr.h" +#include "sg_sw_sec4.h" +#include "caampkc.h" + +#define DESC_RSA_PUB_LEN (2 * CAAM_CMD_SZ + sizeof(struct rsa_pub_pdb)) +#define DESC_RSA_PRIV_F1_LEN (2 * CAAM_CMD_SZ + \ + sizeof(struct rsa_priv_f1_pdb)) + +static void rsa_io_unmap(struct device *dev, struct rsa_edesc *edesc, + struct akcipher_request *req) +{ + dma_unmap_sg(dev, req->dst, edesc->dst_nents, DMA_FROM_DEVICE); + dma_unmap_sg(dev, req->src, edesc->src_nents, DMA_TO_DEVICE); + + if (edesc->sec4_sg_bytes) + dma_unmap_single(dev, edesc->sec4_sg_dma, edesc->sec4_sg_bytes, + DMA_TO_DEVICE); +} + +static void rsa_pub_unmap(struct device *dev, struct rsa_edesc *edesc, + struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct rsa_pub_pdb *pdb = &edesc->pdb.pub; + + dma_unmap_single(dev, pdb->n_dma, key->n_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->e_dma, key->e_sz, DMA_TO_DEVICE); +} + +static void rsa_priv_f1_unmap(struct device *dev, struct rsa_edesc *edesc, + struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct rsa_priv_f1_pdb *pdb = &edesc->pdb.priv_f1; + + dma_unmap_single(dev, pdb->n_dma, key->n_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE); +} + +/* RSA Job Completion handler */ +static void rsa_pub_done(struct device *dev, u32 *desc, u32 err, void *context) +{ + struct akcipher_request *req = context; + struct rsa_edesc *edesc; + + if (err) + caam_jr_strstatus(dev, err); + + edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); + + rsa_pub_unmap(dev, edesc, req); + rsa_io_unmap(dev, edesc, req); + kfree(edesc); + + akcipher_request_complete(req, err); +} + +static void rsa_priv_f1_done(struct device *dev, u32 *desc, u32 err, + void *context) +{ + struct akcipher_request *req = context; + struct rsa_edesc *edesc; + + if (err) + caam_jr_strstatus(dev, err); + + edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); + + rsa_priv_f1_unmap(dev, edesc, req); + rsa_io_unmap(dev, edesc, req); + kfree(edesc); + + akcipher_request_complete(req, err); +} + +static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req, + size_t desclen) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct device *dev = ctx->dev; + struct rsa_edesc *edesc; + gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; + int sgc; + int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes; + int src_nents, dst_nents; + + src_nents = sg_nents_for_len(req->src, req->src_len); + dst_nents = sg_nents_for_len(req->dst, req->dst_len); + + if (src_nents > 1) + sec4_sg_len = src_nents; + if (dst_nents > 1) + sec4_sg_len += dst_nents; + + sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry); + + /* allocate space for base edesc, hw desc commands and link tables */ + edesc = kzalloc(sizeof(*edesc) + desclen + sec4_sg_bytes, + GFP_DMA | flags); + if (!edesc) + return ERR_PTR(-ENOMEM); + + sgc = dma_map_sg(dev, req->src, src_nents, DMA_TO_DEVICE); + if (unlikely(!sgc)) { + dev_err(dev, "unable to map source\n"); + goto src_fail; + } + + sgc = dma_map_sg(dev, req->dst, dst_nents, DMA_FROM_DEVICE); + if (unlikely(!sgc)) { + dev_err(dev, "unable to map destination\n"); + goto dst_fail; + } + + edesc->sec4_sg = (void *)edesc + sizeof(*edesc) + desclen; + + sec4_sg_index = 0; + if (src_nents > 1) { + sg_to_sec4_sg_last(req->src, src_nents, edesc->sec4_sg, 0); + sec4_sg_index += src_nents; + } + if (dst_nents > 1) + sg_to_sec4_sg_last(req->dst, dst_nents, + edesc->sec4_sg + sec4_sg_index, 0); + + /* Save nents for later use in Job Descriptor */ + edesc->src_nents = src_nents; + edesc->dst_nents = dst_nents; + + if (!sec4_sg_bytes) + return edesc; + + edesc->sec4_sg_dma = dma_map_single(dev, edesc->sec4_sg, + sec4_sg_bytes, DMA_TO_DEVICE); + if (dma_mapping_error(dev, edesc->sec4_sg_dma)) { + dev_err(dev, "unable to map S/G table\n"); + goto sec4_sg_fail; + } + + edesc->sec4_sg_bytes = sec4_sg_bytes; + + return edesc; + +sec4_sg_fail: + dma_unmap_sg(dev, req->dst, dst_nents, DMA_FROM_DEVICE); +dst_fail: + dma_unmap_sg(dev, req->src, src_nents, DMA_TO_DEVICE); +src_fail: + kfree(edesc); + return ERR_PTR(-ENOMEM); +} + +static int set_rsa_pub_pdb(struct akcipher_request *req, + struct rsa_edesc *edesc) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct device *dev = ctx->dev; + struct rsa_pub_pdb *pdb = &edesc->pdb.pub; + int sec4_sg_index = 0; + + pdb->n_dma = dma_map_single(dev, key->n, key->n_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->n_dma)) { + dev_err(dev, "Unable to map RSA modulus memory\n"); + return -ENOMEM; + } + + pdb->e_dma = dma_map_single(dev, key->e, key->e_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->e_dma)) { + dev_err(dev, "Unable to map RSA public exponent memory\n"); + dma_unmap_single(dev, pdb->n_dma, key->n_sz, DMA_TO_DEVICE); + return -ENOMEM; + } + + if (edesc->src_nents > 1) { + pdb->sgf |= RSA_PDB_SGF_F; + pdb->f_dma = edesc->sec4_sg_dma; + sec4_sg_index += edesc->src_nents; + } else { + pdb->f_dma = sg_dma_address(req->src); + } + + if (edesc->dst_nents > 1) { + pdb->sgf |= RSA_PDB_SGF_G; + pdb->g_dma = edesc->sec4_sg_dma + + sec4_sg_index * sizeof(struct sec4_sg_entry); + } else { + pdb->g_dma = sg_dma_address(req->dst); + } + + pdb->sgf |= (key->e_sz << RSA_PDB_E_SHIFT) | key->n_sz; + pdb->f_len = req->src_len; + + return 0; +} + +static int set_rsa_priv_f1_pdb(struct akcipher_request *req, + struct rsa_edesc *edesc) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct device *dev = ctx->dev; + struct rsa_priv_f1_pdb *pdb = &edesc->pdb.priv_f1; + int sec4_sg_index = 0; + + pdb->n_dma = dma_map_single(dev, key->n, key->n_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->n_dma)) { + dev_err(dev, "Unable to map modulus memory\n"); + return -ENOMEM; + } + + pdb->d_dma = dma_map_single(dev, key->d, key->d_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->d_dma)) { + dev_err(dev, "Unable to map RSA private exponent memory\n"); + dma_unmap_single(dev, pdb->n_dma, key->n_sz, DMA_TO_DEVICE); + return -ENOMEM; + } + + if (edesc->src_nents > 1) { + pdb->sgf |= RSA_PRIV_PDB_SGF_G; + pdb->g_dma = edesc->sec4_sg_dma; + sec4_sg_index += edesc->src_nents; + } else { + pdb->g_dma = sg_dma_address(req->src); + } + + if (edesc->dst_nents > 1) { + pdb->sgf |= RSA_PRIV_PDB_SGF_F; + pdb->f_dma = edesc->sec4_sg_dma + + sec4_sg_index * sizeof(struct sec4_sg_entry); + } else { + pdb->f_dma = sg_dma_address(req->dst); + } + + pdb->sgf |= (key->d_sz << RSA_PDB_D_SHIFT) | key->n_sz; + + return 0; +} + +static int caam_rsa_enc(struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct device *jrdev = ctx->dev; + struct rsa_edesc *edesc; + int ret; + + if (unlikely(!key->n || !key->e)) + return -EINVAL; + + if (req->dst_len < key->n_sz) { + req->dst_len = key->n_sz; + dev_err(jrdev, "Output buffer length less than parameter n\n"); + return -EOVERFLOW; + } + + /* Allocate extended descriptor */ + edesc = rsa_edesc_alloc(req, DESC_RSA_PUB_LEN); + if (IS_ERR(edesc)) + return PTR_ERR(edesc); + + /* Set RSA Encrypt Protocol Data Block */ + ret = set_rsa_pub_pdb(req, edesc); + if (ret) + goto init_fail; + + /* Initialize Job Descriptor */ + init_rsa_pub_desc(edesc->hw_desc, &edesc->pdb.pub); + + ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_pub_done, req); + if (!ret) + return -EINPROGRESS; + + rsa_pub_unmap(jrdev, edesc, req); + +init_fail: + rsa_io_unmap(jrdev, edesc, req); + kfree(edesc); + return ret; +} + +static int caam_rsa_dec(struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct device *jrdev = ctx->dev; + struct rsa_edesc *edesc; + int ret; + + if (unlikely(!key->n || !key->d)) + return -EINVAL; + + if (req->dst_len < key->n_sz) { + req->dst_len = key->n_sz; + dev_err(jrdev, "Output buffer length less than parameter n\n"); + return -EOVERFLOW; + } + + /* Allocate extended descriptor */ + edesc = rsa_edesc_alloc(req, DESC_RSA_PRIV_F1_LEN); + if (IS_ERR(edesc)) + return PTR_ERR(edesc); + + /* Set RSA Decrypt Protocol Data Block - Private Key Form #1 */ + ret = set_rsa_priv_f1_pdb(req, edesc); + if (ret) + goto init_fail; + + /* Initialize Job Descriptor */ + init_rsa_priv_f1_desc(edesc->hw_desc, &edesc->pdb.priv_f1); + + ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f1_done, req); + if (!ret) + return -EINPROGRESS; + + rsa_priv_f1_unmap(jrdev, edesc, req); + +init_fail: + rsa_io_unmap(jrdev, edesc, req); + kfree(edesc); + return ret; +} + +static void caam_rsa_free_key(struct caam_rsa_key *key) +{ + kzfree(key->d); + kfree(key->e); + kfree(key->n); + key->d = NULL; + key->e = NULL; + key->n = NULL; + key->d_sz = 0; + key->e_sz = 0; + key->n_sz = 0; +} + +/** + * caam_read_raw_data - Read a raw byte stream as a positive integer. + * The function skips buffer's leading zeros, copies the remained data + * to a buffer allocated in the GFP_DMA | GFP_KERNEL zone and returns + * the address of the new buffer. + * + * @buf : The data to read + * @nbytes: The amount of data to read + */ +static inline u8 *caam_read_raw_data(const u8 *buf, size_t *nbytes) +{ + u8 *val; + + while (!*buf && *nbytes) { + buf++; + (*nbytes)--; + } + + val = kzalloc(*nbytes, GFP_DMA | GFP_KERNEL); + if (!val) + return NULL; + + memcpy(val, buf, *nbytes); + + return val; +} + +static int caam_rsa_check_key_length(unsigned int len) +{ + if (len > 4096) + return -EINVAL; + return 0; +} + +static int caam_rsa_set_pub_key(struct crypto_akcipher *tfm, const void *key, + unsigned int keylen) +{ + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct rsa_key raw_key = {0}; + struct caam_rsa_key *rsa_key = &ctx->key; + int ret; + + /* Free the old RSA key if any */ + caam_rsa_free_key(rsa_key); + + ret = rsa_parse_pub_key(&raw_key, key, keylen); + if (ret) + return ret; + + /* Copy key in DMA zone */ + rsa_key->e = kzalloc(raw_key.e_sz, GFP_DMA | GFP_KERNEL); + if (!rsa_key->e) + goto err; + + /* + * Skip leading zeros and copy the positive integer to a buffer + * allocated in the GFP_DMA | GFP_KERNEL zone. The decryption descriptor + * expects a positive integer for the RSA modulus and uses its length as + * decryption output length. + */ + rsa_key->n = caam_read_raw_data(raw_key.n, &raw_key.n_sz); + if (!rsa_key->n) + goto err; + + if (caam_rsa_check_key_length(raw_key.n_sz << 3)) { + caam_rsa_free_key(rsa_key); + return -EINVAL; + } + + rsa_key->e_sz = raw_key.e_sz; + rsa_key->n_sz = raw_key.n_sz; + + memcpy(rsa_key->e, raw_key.e, raw_key.e_sz); + + return 0; +err: + caam_rsa_free_key(rsa_key); + return -ENOMEM; +} + +static int caam_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key, + unsigned int keylen) +{ + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct rsa_key raw_key = {0}; + struct caam_rsa_key *rsa_key = &ctx->key; + int ret; + + /* Free the old RSA key if any */ + caam_rsa_free_key(rsa_key); + + ret = rsa_parse_priv_key(&raw_key, key, keylen); + if (ret) + return ret; + + /* Copy key in DMA zone */ + rsa_key->d = kzalloc(raw_key.d_sz, GFP_DMA | GFP_KERNEL); + if (!rsa_key->d) + goto err; + + rsa_key->e = kzalloc(raw_key.e_sz, GFP_DMA | GFP_KERNEL); + if (!rsa_key->e) + goto err; + + /* + * Skip leading zeros and copy the positive integer to a buffer + * allocated in the GFP_DMA | GFP_KERNEL zone. The decryption descriptor + * expects a positive integer for the RSA modulus and uses its length as + * decryption output length. + */ + rsa_key->n = caam_read_raw_data(raw_key.n, &raw_key.n_sz); + if (!rsa_key->n) + goto err; + + if (caam_rsa_check_key_length(raw_key.n_sz << 3)) { + caam_rsa_free_key(rsa_key); + return -EINVAL; + } + + rsa_key->d_sz = raw_key.d_sz; + rsa_key->e_sz = raw_key.e_sz; + rsa_key->n_sz = raw_key.n_sz; + + memcpy(rsa_key->d, raw_key.d, raw_key.d_sz); + memcpy(rsa_key->e, raw_key.e, raw_key.e_sz); + + return 0; + +err: + caam_rsa_free_key(rsa_key); + return -ENOMEM; +} + +static int caam_rsa_max_size(struct crypto_akcipher *tfm) +{ + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + + return (key->n) ? key->n_sz : -EINVAL; +} + +/* Per session pkc's driver context creation function */ +static int caam_rsa_init_tfm(struct crypto_akcipher *tfm) +{ + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + + ctx->dev = caam_jr_alloc(); + + if (IS_ERR(ctx->dev)) { + dev_err(ctx->dev, "Job Ring Device allocation for transform failed\n"); + return PTR_ERR(ctx->dev); + } + + return 0; +} + +/* Per session pkc's driver context cleanup function */ +static void caam_rsa_exit_tfm(struct crypto_akcipher *tfm) +{ + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + + caam_rsa_free_key(key); + caam_jr_free(ctx->dev); +} + +static struct akcipher_alg caam_rsa = { + .encrypt = caam_rsa_enc, + .decrypt = caam_rsa_dec, + .sign = caam_rsa_dec, + .verify = caam_rsa_enc, + .set_pub_key = caam_rsa_set_pub_key, + .set_priv_key = caam_rsa_set_priv_key, + .max_size = caam_rsa_max_size, + .init = caam_rsa_init_tfm, + .exit = caam_rsa_exit_tfm, + .base = { + .cra_name = "rsa", + .cra_driver_name = "rsa-caam", + .cra_priority = 3000, + .cra_module = THIS_MODULE, + .cra_ctxsize = sizeof(struct caam_rsa_ctx), + }, +}; + +/* Public Key Cryptography module initialization handler */ +static int __init caam_pkc_init(void) +{ + struct device_node *dev_node; + struct platform_device *pdev; + struct device *ctrldev; + struct caam_drv_private *priv; + u32 cha_inst, pk_inst; + int err; + + dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0"); + if (!dev_node) { + dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec4.0"); + if (!dev_node) + return -ENODEV; + } + + pdev = of_find_device_by_node(dev_node); + if (!pdev) { + of_node_put(dev_node); + return -ENODEV; + } + + ctrldev = &pdev->dev; + priv = dev_get_drvdata(ctrldev); + of_node_put(dev_node); + + /* + * If priv is NULL, it's probably because the caam driver wasn't + * properly initialized (e.g. RNG4 init failed). Thus, bail out here. + */ + if (!priv) + return -ENODEV; + + /* Determine public key hardware accelerator presence. */ + cha_inst = rd_reg32(&priv->ctrl->perfmon.cha_num_ls); + pk_inst = (cha_inst & CHA_ID_LS_PK_MASK) >> CHA_ID_LS_PK_SHIFT; + + /* Do not register algorithms if PKHA is not present. */ + if (!pk_inst) + return -ENODEV; + + err = crypto_register_akcipher(&caam_rsa); + if (err) + dev_warn(ctrldev, "%s alg registration failed\n", + caam_rsa.base.cra_driver_name); + else + dev_info(ctrldev, "caam pkc algorithms registered in /proc/crypto\n"); + + return err; +} + +static void __exit caam_pkc_exit(void) +{ + crypto_unregister_akcipher(&caam_rsa); +} + +module_init(caam_pkc_init); +module_exit(caam_pkc_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("FSL CAAM support for PKC functions of crypto API"); +MODULE_AUTHOR("Freescale Semiconductor"); diff --git a/drivers/crypto/caam/caampkc.h b/drivers/crypto/caam/caampkc.h new file mode 100644 index 0000000000000..f595d159b112d --- /dev/null +++ b/drivers/crypto/caam/caampkc.h @@ -0,0 +1,70 @@ +/* + * caam - Freescale FSL CAAM support for Public Key Cryptography descriptors + * + * Copyright 2016 Freescale Semiconductor, Inc. + * + * There is no Shared Descriptor for PKC so that the Job Descriptor must carry + * all the desired key parameters, input and output pointers. + */ + +#ifndef _PKC_DESC_H_ +#define _PKC_DESC_H_ +#include "compat.h" +#include "pdb.h" + +/** + * caam_rsa_key - CAAM RSA key structure. Keys are allocated in DMA zone. + * @n : RSA modulus raw byte stream + * @e : RSA public exponent raw byte stream + * @d : RSA private exponent raw byte stream + * @n_sz : length in bytes of RSA modulus n + * @e_sz : length in bytes of RSA public exponent + * @d_sz : length in bytes of RSA private exponent + */ +struct caam_rsa_key { + u8 *n; + u8 *e; + u8 *d; + size_t n_sz; + size_t e_sz; + size_t d_sz; +}; + +/** + * caam_rsa_ctx - per session context. + * @key : RSA key in DMA zone + * @dev : device structure + */ +struct caam_rsa_ctx { + struct caam_rsa_key key; + struct device *dev; +}; + +/** + * rsa_edesc - s/w-extended rsa descriptor + * @src_nents : number of segments in input scatterlist + * @dst_nents : number of segments in output scatterlist + * @sec4_sg_bytes : length of h/w link table + * @sec4_sg_dma : dma address of h/w link table + * @sec4_sg : pointer to h/w link table + * @pdb : specific RSA Protocol Data Block (PDB) + * @hw_desc : descriptor followed by link tables if any + */ +struct rsa_edesc { + int src_nents; + int dst_nents; + int sec4_sg_bytes; + dma_addr_t sec4_sg_dma; + struct sec4_sg_entry *sec4_sg; + union { + struct rsa_pub_pdb pub; + struct rsa_priv_f1_pdb priv_f1; + } pdb; + u32 hw_desc[]; +}; + +/* Descriptor construction primitives. */ +void init_rsa_pub_desc(u32 *desc, struct rsa_pub_pdb *pdb); +void init_rsa_priv_f1_desc(u32 *desc, struct rsa_priv_f1_pdb *pdb); + +#endif diff --git a/drivers/crypto/caam/compat.h b/drivers/crypto/caam/compat.h index b6955ecdfb3f6..7149cd2492e06 100644 --- a/drivers/crypto/caam/compat.h +++ b/drivers/crypto/caam/compat.h @@ -35,8 +35,11 @@ #include #include #include +#include #include #include #include +#include +#include #endif /* !defined(CAAM_COMPAT_H) */ diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index d8d5584b600b4..26427c11ad878 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h @@ -453,6 +453,8 @@ struct sec4_sg_entry { #define OP_PCLID_PUBLICKEYPAIR (0x14 << OP_PCLID_SHIFT) #define OP_PCLID_DSASIGN (0x15 << OP_PCLID_SHIFT) #define OP_PCLID_DSAVERIFY (0x16 << OP_PCLID_SHIFT) +#define OP_PCLID_RSAENC_PUBKEY (0x18 << OP_PCLID_SHIFT) +#define OP_PCLID_RSADEC_PRVKEY (0x19 << OP_PCLID_SHIFT) /* Assuming OP_TYPE = OP_TYPE_DECAP_PROTOCOL/ENCAP_PROTOCOL */ #define OP_PCLID_IPSEC (0x01 << OP_PCLID_SHIFT) diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h index ae3aef6e9feeb..d3869b95e7b1a 100644 --- a/drivers/crypto/caam/desc_constr.h +++ b/drivers/crypto/caam/desc_constr.h @@ -77,6 +77,13 @@ static inline void init_job_desc(u32 *desc, u32 options) init_desc(desc, CMD_DESC_HDR | options); } +static inline void init_job_desc_pdb(u32 *desc, u32 options, size_t pdb_bytes) +{ + u32 pdb_len = (pdb_bytes + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ; + + init_job_desc(desc, (((pdb_len + 1) << HDR_START_IDX_SHIFT)) | options); +} + static inline void append_ptr(u32 *desc, dma_addr_t ptr) { dma_addr_t *offset = (dma_addr_t *)desc_end(desc); diff --git a/drivers/crypto/caam/pdb.h b/drivers/crypto/caam/pdb.h index d383573d3dd45..aaa00dd1c6017 100644 --- a/drivers/crypto/caam/pdb.h +++ b/drivers/crypto/caam/pdb.h @@ -1,12 +1,13 @@ /* * CAAM Protocol Data Block (PDB) definition header file * - * Copyright 2008-2012 Freescale Semiconductor, Inc. + * Copyright 2008-2016 Freescale Semiconductor, Inc. * */ #ifndef CAAM_PDB_H #define CAAM_PDB_H +#include "compat.h" /* * PDB- IPSec ESP Header Modification Options @@ -476,4 +477,52 @@ struct dsa_verify_pdb { u8 *ab; /* only used if ECC processing */ }; +/* RSA Protocol Data Block */ +#define RSA_PDB_SGF_SHIFT 28 +#define RSA_PDB_E_SHIFT 12 +#define RSA_PDB_E_MASK (0xFFF << RSA_PDB_E_SHIFT) +#define RSA_PDB_D_SHIFT 12 +#define RSA_PDB_D_MASK (0xFFF << RSA_PDB_D_SHIFT) + +#define RSA_PDB_SGF_F (0x8 << RSA_PDB_SGF_SHIFT) +#define RSA_PDB_SGF_G (0x4 << RSA_PDB_SGF_SHIFT) +#define RSA_PRIV_PDB_SGF_F (0x4 << RSA_PDB_SGF_SHIFT) +#define RSA_PRIV_PDB_SGF_G (0x8 << RSA_PDB_SGF_SHIFT) + +#define RSA_PRIV_KEY_FRM_1 0 + +/** + * RSA Encrypt Protocol Data Block + * @sgf: scatter-gather field + * @f_dma: dma address of input data + * @g_dma: dma address of encrypted output data + * @n_dma: dma address of RSA modulus + * @e_dma: dma address of RSA public exponent + * @f_len: length in octets of the input data + */ +struct rsa_pub_pdb { + u32 sgf; + dma_addr_t f_dma; + dma_addr_t g_dma; + dma_addr_t n_dma; + dma_addr_t e_dma; + u32 f_len; +} __packed; + +/** + * RSA Decrypt PDB - Private Key Form #1 + * @sgf: scatter-gather field + * @g_dma: dma address of encrypted input data + * @f_dma: dma address of output data + * @n_dma: dma address of RSA modulus + * @d_dma: dma address of RSA private exponent + */ +struct rsa_priv_f1_pdb { + u32 sgf; + dma_addr_t g_dma; + dma_addr_t f_dma; + dma_addr_t n_dma; + dma_addr_t d_dma; +} __packed; + #endif diff --git a/drivers/crypto/caam/pkc_desc.c b/drivers/crypto/caam/pkc_desc.c new file mode 100644 index 0000000000000..4e4183e615eaa --- /dev/null +++ b/drivers/crypto/caam/pkc_desc.c @@ -0,0 +1,36 @@ +/* + * caam - Freescale FSL CAAM support for Public Key Cryptography descriptors + * + * Copyright 2016 Freescale Semiconductor, Inc. + * + * There is no Shared Descriptor for PKC so that the Job Descriptor must carry + * all the desired key parameters, input and output pointers. + */ +#include "caampkc.h" +#include "desc_constr.h" + +/* Descriptor for RSA Public operation */ +void init_rsa_pub_desc(u32 *desc, struct rsa_pub_pdb *pdb) +{ + init_job_desc_pdb(desc, 0, sizeof(*pdb)); + append_cmd(desc, pdb->sgf); + append_ptr(desc, pdb->f_dma); + append_ptr(desc, pdb->g_dma); + append_ptr(desc, pdb->n_dma); + append_ptr(desc, pdb->e_dma); + append_cmd(desc, pdb->f_len); + append_operation(desc, OP_TYPE_UNI_PROTOCOL | OP_PCLID_RSAENC_PUBKEY); +} + +/* Descriptor for RSA Private operation - Private Key Form #1 */ +void init_rsa_priv_f1_desc(u32 *desc, struct rsa_priv_f1_pdb *pdb) +{ + init_job_desc_pdb(desc, 0, sizeof(*pdb)); + append_cmd(desc, pdb->sgf); + append_ptr(desc, pdb->g_dma); + append_ptr(desc, pdb->f_dma); + append_ptr(desc, pdb->n_dma); + append_ptr(desc, pdb->d_dma); + append_operation(desc, OP_TYPE_UNI_PROTOCOL | OP_PCLID_RSADEC_PRVKEY | + RSA_PRIV_KEY_FRM_1); +} From 8d63a6d53b2afaf7b48e89a03bb94c3246f33204 Mon Sep 17 00:00:00 2001 From: Amitoj Kaur Chawla Date: Mon, 4 Jul 2016 18:30:21 +0530 Subject: [PATCH 138/180] crypto: bfin_crc - Simplify use of devm_ioremap_resource Remove unneeded error handling on the result of a call to platform_get_resource when the value is passed to devm_ioremap_resource. The Coccinelle semantic patch that makes this change is as follows: // @@ expression pdev,res,n,e,e1; expression ret != 0; identifier l; @@ - res = platform_get_resource(pdev, IORESOURCE_MEM, n); ... when != res - if (res == NULL) { ... \(goto l;\|return ret;\) } ... when != res + res = platform_get_resource(pdev, IORESOURCE_MEM, n); e = devm_ioremap_resource(e1, res); // Signed-off-by: Amitoj Kaur Chawla Signed-off-by: Herbert Xu --- drivers/crypto/bfin_crc.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/crypto/bfin_crc.c b/drivers/crypto/bfin_crc.c index 95b73968cf72f..10db7df366c8a 100644 --- a/drivers/crypto/bfin_crc.c +++ b/drivers/crypto/bfin_crc.c @@ -588,11 +588,6 @@ static int bfin_crypto_crc_probe(struct platform_device *pdev) crypto_init_queue(&crc->queue, CRC_CCRYPTO_QUEUE_LENGTH); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (res == NULL) { - dev_err(&pdev->dev, "Cannot get IORESOURCE_MEM\n"); - return -ENOENT; - } - crc->regs = devm_ioremap_resource(dev, res); if (IS_ERR((void *)crc->regs)) { dev_err(&pdev->dev, "Cannot map CRC IO\n"); From e24860f2a6b50eb09f3a627e168433005798f8d9 Mon Sep 17 00:00:00 2001 From: Conor McLoughlin Date: Mon, 4 Jul 2016 16:26:00 +0100 Subject: [PATCH 139/180] crypto: qat - Use alternative reset methods depending on the specific device Different product families will use FLR or SBR. Virtual Function devices have no reset method. Signed-off-by: Conor McLoughlin Signed-off-by: Herbert Xu --- .../crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c | 1 + .../crypto/qat/qat_c62x/adf_c62x_hw_data.c | 1 + .../crypto/qat/qat_common/adf_accel_devices.h | 1 + drivers/crypto/qat/qat_common/adf_aer.c | 46 +++++++++++++++---- .../crypto/qat/qat_common/adf_common_drv.h | 2 + .../qat/qat_dh895xcc/adf_dh895xcc_hw_data.c | 1 + 6 files changed, 43 insertions(+), 9 deletions(-) diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c index c5bd5a9abc4d2..6bc68bc00d76c 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c @@ -229,6 +229,7 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data) hw_data->get_arb_mapping = adf_get_arbiter_mapping; hw_data->enable_ints = adf_enable_ints; hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; + hw_data->reset_device = adf_reset_flr; hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; } diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c index 879e04cae714c..618cec360b39c 100644 --- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c +++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c @@ -239,6 +239,7 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data) hw_data->get_arb_mapping = adf_get_arbiter_mapping; hw_data->enable_ints = adf_enable_ints; hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; + hw_data->reset_device = adf_reset_flr; hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; } diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index 5a07208ce7782..e8822536530b9 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h @@ -176,6 +176,7 @@ struct adf_hw_device_data { void (*disable_iov)(struct adf_accel_dev *accel_dev); void (*enable_ints)(struct adf_accel_dev *accel_dev); int (*enable_vf2pf_comms)(struct adf_accel_dev *accel_dev); + void (*reset_device)(struct adf_accel_dev *accel_dev); const char *fw_name; const char *fw_mmp_name; uint32_t fuses; diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c index 7bfb57f1bcb52..2839fccdd84be 100644 --- a/drivers/crypto/qat/qat_common/adf_aer.c +++ b/drivers/crypto/qat/qat_common/adf_aer.c @@ -82,18 +82,12 @@ struct adf_reset_dev_data { struct work_struct reset_work; }; -void adf_dev_restore(struct adf_accel_dev *accel_dev) +void adf_reset_sbr(struct adf_accel_dev *accel_dev) { struct pci_dev *pdev = accel_to_pci_dev(accel_dev); struct pci_dev *parent = pdev->bus->self; uint16_t bridge_ctl = 0; - if (accel_dev->is_vf) - return; - - dev_info(&GET_DEV(accel_dev), "Resetting device qat_dev%d\n", - accel_dev->accel_id); - if (!parent) parent = pdev; @@ -101,6 +95,8 @@ void adf_dev_restore(struct adf_accel_dev *accel_dev) dev_info(&GET_DEV(accel_dev), "Transaction still in progress. Proceeding\n"); + dev_info(&GET_DEV(accel_dev), "Secondary bus reset\n"); + pci_read_config_word(parent, PCI_BRIDGE_CONTROL, &bridge_ctl); bridge_ctl |= PCI_BRIDGE_CTL_BUS_RESET; pci_write_config_word(parent, PCI_BRIDGE_CONTROL, bridge_ctl); @@ -108,8 +104,40 @@ void adf_dev_restore(struct adf_accel_dev *accel_dev) bridge_ctl &= ~PCI_BRIDGE_CTL_BUS_RESET; pci_write_config_word(parent, PCI_BRIDGE_CONTROL, bridge_ctl); msleep(100); - pci_restore_state(pdev); - pci_save_state(pdev); +} +EXPORT_SYMBOL_GPL(adf_reset_sbr); + +void adf_reset_flr(struct adf_accel_dev *accel_dev) +{ + struct pci_dev *pdev = accel_to_pci_dev(accel_dev); + u16 control = 0; + int pos = 0; + + dev_info(&GET_DEV(accel_dev), "Function level reset\n"); + pos = pci_pcie_cap(pdev); + if (!pos) { + dev_err(&GET_DEV(accel_dev), "Restart device failed\n"); + return; + } + pci_read_config_word(pdev, pos + PCI_EXP_DEVCTL, &control); + control |= PCI_EXP_DEVCTL_BCR_FLR; + pci_write_config_word(pdev, pos + PCI_EXP_DEVCTL, control); + msleep(100); +} +EXPORT_SYMBOL_GPL(adf_reset_flr); + +void adf_dev_restore(struct adf_accel_dev *accel_dev) +{ + struct adf_hw_device_data *hw_device = accel_dev->hw_device; + struct pci_dev *pdev = accel_to_pci_dev(accel_dev); + + if (hw_device->reset_device) { + dev_info(&GET_DEV(accel_dev), "Resetting device qat_dev%d\n", + accel_dev->accel_id); + hw_device->reset_device(accel_dev); + pci_restore_state(pdev); + pci_save_state(pdev); + } } static void adf_device_reset_worker(struct work_struct *work) diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index 75faa39bc8d04..980e074750120 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -141,6 +141,8 @@ int adf_ae_stop(struct adf_accel_dev *accel_dev); int adf_enable_aer(struct adf_accel_dev *accel_dev, struct pci_driver *adf); void adf_disable_aer(struct adf_accel_dev *accel_dev); +void adf_reset_sbr(struct adf_accel_dev *accel_dev); +void adf_reset_flr(struct adf_accel_dev *accel_dev); void adf_dev_restore(struct adf_accel_dev *accel_dev); int adf_init_aer(void); void adf_exit_aer(void); diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index 6e1d5e1855262..1dfcab317bede 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -252,6 +252,7 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data) hw_data->get_arb_mapping = adf_get_arbiter_mapping; hw_data->enable_ints = adf_enable_ints; hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; + hw_data->reset_device = adf_reset_sbr; hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; } From 8be0b84e58a9b07c314f920792926c5c5a53d3da Mon Sep 17 00:00:00 2001 From: Salvatore Benedetto Date: Mon, 4 Jul 2016 17:21:38 +0100 Subject: [PATCH 140/180] crypto: rsa - Store rest of the private key components When parsing a private key, store all non-optional fields. These are required for enabling CRT mode for decrypt and verify Signed-off-by: Salvatore Benedetto Signed-off-by: Herbert Xu --- crypto/rsa_helper.c | 75 +++++++++++++++++++++++++++++++++++ crypto/rsaprivkey.asn1 | 10 ++--- include/crypto/internal/rsa.h | 20 ++++++++++ 3 files changed, 100 insertions(+), 5 deletions(-) diff --git a/crypto/rsa_helper.c b/crypto/rsa_helper.c index 583656af4fe28..4df6451e75436 100644 --- a/crypto/rsa_helper.c +++ b/crypto/rsa_helper.c @@ -78,6 +78,81 @@ int rsa_get_d(void *context, size_t hdrlen, unsigned char tag, return 0; } +int rsa_get_p(void *context, size_t hdrlen, unsigned char tag, + const void *value, size_t vlen) +{ + struct rsa_key *key = context; + + /* invalid key provided */ + if (!value || !vlen || vlen > key->n_sz) + return -EINVAL; + + key->p = value; + key->p_sz = vlen; + + return 0; +} + +int rsa_get_q(void *context, size_t hdrlen, unsigned char tag, + const void *value, size_t vlen) +{ + struct rsa_key *key = context; + + /* invalid key provided */ + if (!value || !vlen || vlen > key->n_sz) + return -EINVAL; + + key->q = value; + key->q_sz = vlen; + + return 0; +} + +int rsa_get_dp(void *context, size_t hdrlen, unsigned char tag, + const void *value, size_t vlen) +{ + struct rsa_key *key = context; + + /* invalid key provided */ + if (!value || !vlen || vlen > key->n_sz) + return -EINVAL; + + key->dp = value; + key->dp_sz = vlen; + + return 0; +} + +int rsa_get_dq(void *context, size_t hdrlen, unsigned char tag, + const void *value, size_t vlen) +{ + struct rsa_key *key = context; + + /* invalid key provided */ + if (!value || !vlen || vlen > key->n_sz) + return -EINVAL; + + key->dq = value; + key->dq_sz = vlen; + + return 0; +} + +int rsa_get_qinv(void *context, size_t hdrlen, unsigned char tag, + const void *value, size_t vlen) +{ + struct rsa_key *key = context; + + /* invalid key provided */ + if (!value || !vlen || vlen > key->n_sz) + return -EINVAL; + + key->qinv = value; + key->qinv_sz = vlen; + + return 0; +} + /** * rsa_parse_pub_key() - decodes the BER encoded buffer and stores in the * provided struct rsa_key, pointers to the raw key as is, diff --git a/crypto/rsaprivkey.asn1 b/crypto/rsaprivkey.asn1 index 731aea5edb0cb..4ce06758e8af7 100644 --- a/crypto/rsaprivkey.asn1 +++ b/crypto/rsaprivkey.asn1 @@ -3,9 +3,9 @@ RsaPrivKey ::= SEQUENCE { n INTEGER ({ rsa_get_n }), e INTEGER ({ rsa_get_e }), d INTEGER ({ rsa_get_d }), - prime1 INTEGER, - prime2 INTEGER, - exponent1 INTEGER, - exponent2 INTEGER, - coefficient INTEGER + prime1 INTEGER ({ rsa_get_p }), + prime2 INTEGER ({ rsa_get_q }), + exponent1 INTEGER ({ rsa_get_dp }), + exponent2 INTEGER ({ rsa_get_dq }), + coefficient INTEGER ({ rsa_get_qinv }) } diff --git a/include/crypto/internal/rsa.h b/include/crypto/internal/rsa.h index d6c042a2ee52a..9e8f1590de980 100644 --- a/include/crypto/internal/rsa.h +++ b/include/crypto/internal/rsa.h @@ -19,17 +19,37 @@ * @n : RSA modulus raw byte stream * @e : RSA public exponent raw byte stream * @d : RSA private exponent raw byte stream + * @p : RSA prime factor p of n raw byte stream + * @q : RSA prime factor q of n raw byte stream + * @dp : RSA exponent d mod (p - 1) raw byte stream + * @dq : RSA exponent d mod (q - 1) raw byte stream + * @qinv : RSA CRT coefficient q^(-1) mod p raw byte stream * @n_sz : length in bytes of RSA modulus n * @e_sz : length in bytes of RSA public exponent * @d_sz : length in bytes of RSA private exponent + * @p_sz : length in bytes of p field + * @q_sz : length in bytes of q field + * @dp_sz : length in bytes of dp field + * @dq_sz : length in bytes of dq field + * @qinv_sz : length in bytes of qinv field */ struct rsa_key { const u8 *n; const u8 *e; const u8 *d; + const u8 *p; + const u8 *q; + const u8 *dp; + const u8 *dq; + const u8 *qinv; size_t n_sz; size_t e_sz; size_t d_sz; + size_t p_sz; + size_t q_sz; + size_t dp_sz; + size_t dq_sz; + size_t qinv_sz; }; int rsa_parse_pub_key(struct rsa_key *rsa_key, const void *key, From c8afbc84042980ca6978ea4b140f848e2820b96a Mon Sep 17 00:00:00 2001 From: Salvatore Benedetto Date: Mon, 4 Jul 2016 17:21:39 +0100 Subject: [PATCH 141/180] crypto: testmgr - Add 4K private key to RSA testvector Key generated with openssl. It also contains all fields required for testing CRT mode Signed-off-by: Salvatore Benedetto Signed-off-by: Herbert Xu --- crypto/testmgr.h | 200 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 199 insertions(+), 1 deletion(-) diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 4ce2d866919d7..acb6bbff781a2 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -152,7 +152,7 @@ static char zeroed_string[48]; #ifdef CONFIG_CRYPTO_FIPS #define RSA_TEST_VECTORS 2 #else -#define RSA_TEST_VECTORS 4 +#define RSA_TEST_VECTORS 5 #endif static struct akcipher_testvec rsa_tv_template[] = { { @@ -338,6 +338,204 @@ static struct akcipher_testvec rsa_tv_template[] = { .m_size = 8, .c_size = 256, .public_key_vec = true, + }, { + .key = + "\x30\x82\x09\x29" /* sequence of 2345 bytes */ + "\x02\x01\x00" /* version integer of 1 byte */ + "\x02\x82\x02\x01" /* modulus - integer of 513 bytes */ + "\x00\xC3\x8B\x55\x7B\x73\x4D\xFF\xE9\x9B\xC6\xDC\x67\x3C\xB4\x8E" + "\xA0\x86\xED\xF2\xB9\x50\x5C\x54\x5C\xBA\xE4\xA1\xB2\xA7\xAE\x2F" + "\x1B\x7D\xF1\xFB\xAC\x79\xC5\xDF\x1A\x00\xC9\xB2\xC1\x61\x25\x33" + "\xE6\x9C\xE9\xCF\xD6\x27\xC4\x4E\x44\x30\x44\x5E\x08\xA1\x87\x52" + "\xCC\x6B\x97\x70\x8C\xBC\xA5\x06\x31\x0C\xD4\x2F\xD5\x7D\x26\x24" + "\xA2\xE2\xAC\x78\xF4\x53\x14\xCE\xF7\x19\x2E\xD7\xF7\xE6\x0C\xB9" + "\x56\x7F\x0B\xF1\xB1\xE2\x43\x70\xBD\x86\x1D\xA1\xCC\x2B\x19\x08" + "\x76\xEF\x91\xAC\xBF\x20\x24\x0D\x38\xC0\x89\xB8\x9A\x70\xB3\x64" + "\xD9\x8F\x80\x41\x10\x5B\x9F\xB1\xCB\x76\x43\x00\x21\x25\x36\xD4" + "\x19\xFC\x55\x95\x10\xE4\x26\x74\x98\x2C\xD9\xBD\x0B\x2B\x04\xC2" + "\xAC\x82\x38\xB4\xDD\x4C\x04\x7E\x51\x36\x40\x1E\x0B\xC4\x7C\x25" + "\xDD\x4B\xB2\xE7\x20\x0A\x57\xF9\xB4\x94\xC3\x08\x33\x22\x6F\x8B" + "\x48\xDB\x03\x68\x5A\x5B\xBA\xAE\xF3\xAD\xCF\xC3\x6D\xBA\xF1\x28" + "\x67\x7E\x6C\x79\x07\xDE\xFC\xED\xE7\x96\xE3\x6C\xE0\x2C\x87\xF8" + "\x02\x01\x28\x38\x43\x21\x53\x84\x69\x75\x78\x15\x7E\xEE\xD2\x1B" + "\xB9\x23\x40\xA8\x86\x1E\x38\x83\xB2\x73\x1D\x53\xFB\x9E\x2A\x8A" + "\xB2\x75\x35\x01\xC3\xC3\xC4\x94\xE8\x84\x86\x64\x81\xF4\x42\xAA" + "\x3C\x0E\xD6\x4F\xBC\x0A\x09\x2D\xE7\x1B\xD4\x10\xA8\x54\xEA\x89" + "\x84\x8A\xCB\xF7\x5A\x3C\xCA\x76\x08\x29\x62\xB4\x6A\x22\xDF\x14" + "\x95\x71\xFD\xB6\x86\x39\xB8\x8B\xF8\x91\x7F\x38\xAA\x14\xCD\xE5" + "\xF5\x1D\xC2\x6D\x53\x69\x52\x84\x7F\xA3\x1A\x5E\x26\x04\x83\x06" + "\x73\x52\x56\xCF\x76\x26\xC9\xDD\x75\xD7\xFC\xF4\x69\xD8\x7B\x55" + "\xB7\x68\x13\x53\xB9\xE7\x89\xC3\xE8\xD6\x6E\xA7\x6D\xEA\x81\xFD" + "\xC4\xB7\x05\x5A\xB7\x41\x0A\x23\x8E\x03\x8A\x1C\xAE\xD3\x1E\xCE" + "\xE3\x5E\xFC\x19\x4A\xEE\x61\x9B\x8E\xE5\xE5\xDD\x85\xF9\x41\xEC" + "\x14\x53\x92\xF7\xDD\x06\x85\x02\x91\xE3\xEB\x6C\x43\x03\xB1\x36" + "\x7B\x89\x5A\xA8\xEB\xFC\xD5\xA8\x35\xDC\x81\xD9\x5C\xBD\xCA\xDC" + "\x9B\x98\x0B\x06\x5D\x0C\x5B\xEE\xF3\xD5\xCC\x57\xC9\x71\x2F\x90" + "\x3B\x3C\xF0\x8E\x4E\x35\x48\xAE\x63\x74\xA9\xFC\x72\x75\x8E\x34" + "\xA8\xF2\x1F\xEA\xDF\x3A\x37\x2D\xE5\x39\x39\xF8\x57\x58\x3C\x04" + "\xFE\x87\x06\x98\xBC\x7B\xD3\x21\x36\x60\x25\x54\xA7\x3D\xFA\x91" + "\xCC\xA8\x0B\x92\x8E\xB4\xF7\x06\xFF\x1E\x95\xCB\x07\x76\x97\x3B" + "\x9D" + "\x02\x03\x01\x00\x01" /* public key integer of 3 bytes */ + "\x02\x82\x02\x00" /* private key integer of 512 bytes */ + "\x74\xA9\xE0\x6A\x32\xB4\xCA\x85\xD9\x86\x9F\x60\x88\x7B\x40\xCC" + "\xCD\x33\x91\xA8\xB6\x25\x1F\xBF\xE3\x51\x1C\x97\xB6\x2A\xD9\xB8" + "\x11\x40\x19\xE3\x21\x13\xC8\xB3\x7E\xDC\xD7\x65\x40\x4C\x2D\xD6" + "\xDC\xAF\x32\x6C\x96\x75\x2C\x2C\xCA\x8F\x3F\x7A\xEE\xC4\x09\xC6" + "\x24\x3A\xC9\xCF\x6D\x8D\x17\x50\x94\x52\xD3\xE7\x0F\x2F\x7E\x94" + "\x1F\xA0\xBE\xD9\x25\xE8\x38\x42\x7C\x27\xD2\x79\xF8\x2A\x87\x38" + "\xEF\xBB\x74\x8B\xA8\x6E\x8C\x08\xC6\xC7\x4F\x0C\xBC\x79\xC6\xEF" + "\x0E\xA7\x5E\xE4\xF8\x8C\x09\xC7\x5E\x37\xCC\x87\x77\xCD\xCF\xD1" + "\x6D\x28\x1B\xA9\x62\xC0\xB8\x16\xA7\x8B\xF9\xBB\xCC\xB4\x15\x7F" + "\x1B\x69\x03\xF2\x7B\xEB\xE5\x8C\x14\xD6\x23\x4F\x52\x6F\x18\xA6" + "\x4B\x5B\x01\xAD\x35\xF9\x48\x53\xB3\x86\x35\x66\xD7\xE7\x29\xC0" + "\x09\xB5\xC6\xE6\xFA\xC4\xDA\x19\xBE\xD7\x4D\x41\x14\xBE\x6F\xDF" + "\x1B\xAB\xC0\xCA\x88\x07\xAC\xF1\x7D\x35\x83\x67\x28\x2D\x50\xE9" + "\xCE\x27\x71\x5E\x1C\xCF\xD2\x30\x65\x79\x72\x2F\x9C\xE1\xD2\x39" + "\x7F\xEF\x3B\x01\xF2\x14\x1D\xDF\xBD\x51\xD3\xA1\x53\x62\xCF\x5F" + "\x79\x84\xCE\x06\x96\x69\x29\x49\x82\x1C\x71\x4A\xA1\x66\xC8\x2F" + "\xFD\x7B\x96\x7B\xFC\xC4\x26\x58\xC4\xFC\x7C\xAF\xB5\xE8\x95\x83" + "\x87\xCB\x46\xDE\x97\xA7\xB3\xA2\x54\x5B\xD7\xAF\xAB\xEB\xC8\xF3" + "\x55\x9D\x48\x2B\x30\x9C\xDC\x26\x4B\xC2\x89\x45\x13\xB2\x01\x9A" + "\xA4\x65\xC3\xEC\x24\x2D\x26\x97\xEB\x80\x8A\x9D\x03\xBC\x59\x66" + "\x9E\xE2\xBB\xBB\x63\x19\x64\x93\x11\x7B\x25\x65\x30\xCD\x5B\x4B" + "\x2C\xFF\xDC\x2D\x30\x87\x1F\x3C\x88\x07\xD0\xFC\x48\xCC\x05\x8A" + "\xA2\xC8\x39\x3E\xD5\x51\xBC\x0A\xBE\x6D\xA8\xA0\xF6\x88\x06\x79" + "\x13\xFF\x1B\x45\xDA\x54\xC9\x24\x25\x8A\x75\x0A\x26\xD1\x69\x81" + "\x14\x14\xD1\x79\x7D\x8E\x76\xF2\xE0\xEB\xDD\x0F\xDE\xC2\xEC\x80" + "\xD7\xDC\x16\x99\x92\xBE\xCB\x40\x0C\xCE\x7C\x3B\x46\xA2\x5B\x5D" + "\x0C\x45\xEB\xE1\x00\xDE\x72\x50\xB1\xA6\x0B\x76\xC5\x8D\xFC\x82" + "\x38\x6D\x99\x14\x1D\x1A\x4A\xD3\x7C\x53\xB8\x12\x46\xA2\x30\x38" + "\x82\xF4\x96\x6E\x8C\xCE\x47\x0D\xAF\x0A\x3B\x45\xB7\x43\x95\x43" + "\x9E\x02\x2C\x44\x07\x6D\x1F\x3C\x66\x89\x09\xB6\x1F\x06\x30\xCC" + "\xAD\xCE\x7D\x9A\xDE\x3E\xFB\x6C\xE4\x58\x43\xD2\x4F\xA5\x9E\x5E" + "\xA7\x7B\xAE\x3A\xF6\x7E\xD9\xDB\xD3\xF5\xC5\x41\xAF\xE6\x9C\x91" + "\x02\x82\x01\x01" /* prime1 - integer of 257 bytes */ + "\x00\xE0\xA6\x6C\xF0\xA2\xF8\x81\x85\x36\x43\xD0\x13\x0B\x33\x8B" + "\x8F\x78\x3D\xAC\xC7\x5E\x46\x6A\x7F\x05\xAE\x3E\x26\x0A\xA6\xD0" + "\x51\xF3\xC8\x61\xF5\x77\x22\x48\x10\x87\x4C\xD5\xA4\xD5\xAE\x2D" + "\x4E\x7A\xFE\x1C\x31\xE7\x6B\xFF\xA4\x69\x20\xF9\x2A\x0B\x99\xBE" + "\x7C\x32\x68\xAD\xB0\xC6\x94\x81\x41\x75\xDC\x06\x78\x0A\xB4\xCF" + "\xCD\x1B\x2D\x31\xE4\x7B\xEA\xA8\x35\x99\x75\x57\xC6\x0E\xF6\x78" + "\x4F\xA0\x92\x4A\x00\x1B\xE7\x96\xF2\x5B\xFD\x2C\x0A\x0A\x13\x81" + "\xAF\xCB\x59\x87\x31\xD9\x83\x65\xF2\x22\x48\xD0\x03\x67\x39\xF6" + "\xFF\xA8\x36\x07\x3A\x68\xE3\x7B\xA9\x64\xFD\x9C\xF7\xB1\x3D\xBF" + "\x26\x5C\xCC\x7A\xFC\xA2\x8F\x51\xD1\xE1\xE2\x3C\xEC\x06\x75\x7C" + "\x34\xF9\xA9\x33\x70\x11\xAD\x5A\xDC\x5F\xCF\x50\xF6\x23\x2F\x39" + "\xAC\x92\x48\x53\x4D\x01\x96\x3C\xD8\xDC\x1F\x23\x23\x78\x80\x34" + "\x54\x14\x76\x8B\xB6\xBB\xFB\x88\x78\x31\x59\x28\xD2\xB1\x75\x17" + "\x88\x04\x4A\x78\x62\x18\x2E\xF5\xFB\x9B\xEF\x15\xD8\x16\x47\xC6" + "\x42\xB1\x02\xDA\x9E\xE3\x84\x90\xB4\x2D\xC3\xCE\x13\xC9\x12\x7D" + "\x3E\xCD\x39\x39\xC9\xAD\xA1\x1A\xE6\xD5\xAD\x5A\x09\x4D\x1B\x0C" + "\xAB" + "\x02\x82\x01\x01" /* prime 2 - integer of 257 bytes */ + "\x00\xDE\xD5\x1B\xF6\xCD\x83\xB1\xC6\x47\x7E\xB9\xC0\x6B\xA9\xB8" + "\x02\xF3\xAE\x40\x5D\xFC\xD3\xE5\x4E\xF1\xE3\x39\x04\x52\x84\x89" + "\x40\x37\xBB\xC2\xCD\x7F\x71\x77\x17\xDF\x6A\x4C\x31\x24\x7F\xB9" + "\x7E\x7F\xC8\x43\x4A\x3C\xEB\x8D\x1B\x7F\x21\x51\x67\x45\x8F\xA0" + "\x36\x29\x3A\x18\x45\xA5\x32\xEC\x74\x88\x3C\x98\x5D\x67\x3B\xD7" + "\x51\x1F\xE9\xAE\x09\x01\xDE\xDE\x7C\xFB\x60\xD1\xA5\x6C\xE9\x6A" + "\x93\x04\x02\x3A\xBB\x67\x02\xB9\xFD\x23\xF0\x02\x2B\x49\x85\xC9" + "\x5B\xE7\x4B\xDF\xA3\xF4\xEE\x59\x4C\x45\xEF\x8B\xC1\x6B\xDE\xDE" + "\xBC\x1A\xFC\xD2\x76\x3F\x33\x74\xA9\x8E\xA3\x7E\x0C\xC6\xCE\x70" + "\xA1\x5B\xA6\x77\xEA\x76\xEB\x18\xCE\xB9\xD7\x78\x8D\xAE\x06\xBB" + "\xD3\x1F\x16\x0D\x05\xAB\x4F\xC6\x52\xC8\x6B\x36\x51\x7D\x1D\x27" + "\xAF\x88\x9A\x6F\xCC\x25\x2E\x74\x06\x72\xCE\x9E\xDB\xE0\x9D\x30" + "\xEF\x55\xA5\x58\x21\xA7\x42\x12\x2C\x2C\x23\x87\xC1\x0F\xE8\x51" + "\xDA\x53\xDA\xFC\x05\x36\xDF\x08\x0E\x08\x36\xBE\x5C\x86\x9E\xCA" + "\x68\x90\x33\x12\x0B\x14\x82\xAB\x90\x1A\xD4\x49\x32\x9C\xBD\xAA" + "\xAB\x4E\x38\xF1\xEE\xED\x3D\x3F\xE8\xBD\x48\x56\xA6\x64\xEE\xC8" + "\xD7" + "\x02\x82\x01\x01" /* exponent 1 - integer of 257 bytes */ + "\x00\x96\x5E\x6F\x8F\x06\xD6\xE6\x03\x1F\x96\x76\x81\x38\xBF\x30" + "\xCC\x40\x84\xAF\xD0\xE7\x06\xA5\x24\x0E\xCE\x59\xA5\x26\xFE\x0F" + "\x74\xBB\x83\xC6\x26\x02\xAF\x3C\xA3\x6B\x9C\xFF\x68\x0C\xEB\x40" + "\x42\x46\xCB\x2E\x5E\x2C\xF4\x3A\x32\x77\x77\xED\xAF\xBA\x02\x17" + "\xE1\x93\xF0\x43\x4A\x8F\x31\x39\xEF\x72\x0F\x6B\x79\x10\x59\x84" + "\xBA\x5A\x55\x7F\x0E\xDB\xEE\xEE\xD6\xA9\xB8\x44\x9F\x3A\xC6\xB9" + "\x33\x3B\x5C\x90\x11\xD0\x9B\xCC\x8A\xBF\x0E\x10\x5B\x4B\xF1\x50" + "\x9E\x35\xB3\xE0\x6D\x7A\x95\x9C\x38\x5D\xC0\x75\x13\xC2\x15\xA7" + "\x81\xEA\xBA\xF7\x4D\x9E\x85\x9D\xF1\x7D\xBA\xD0\x45\x6F\x2A\xD0" + "\x76\xC2\x28\xD0\xAD\xA7\xB5\xDC\xE3\x6A\x99\xFF\x83\x50\xB3\x75" + "\x07\x14\x91\xAF\xEF\x74\xB5\x9F\x9A\xE0\xBA\xA9\x0B\x87\xF3\x85" + "\x5C\x40\xB2\x0E\xA7\xFD\xC6\xED\x45\x8E\xD9\x7C\xB0\xB2\x68\xC6" + "\x1D\xFD\x70\x78\x06\x41\x7F\x95\x12\x36\x9D\xE2\x58\x5D\x15\xEE" + "\x41\x49\xF5\xFA\xEC\x56\x19\xA0\xE6\xE0\xB2\x40\xE1\xD9\xD0\x03" + "\x22\x02\xCF\xD1\x3C\x07\x38\x65\x8F\x65\x0E\xAA\x32\xCE\x25\x05" + "\x16\x73\x51\xB9\x9F\x88\x0B\xCD\x30\xF3\x97\xCC\x2B\x6B\xA4\x0E" + "\x6F" + "\x02\x82\x01\x00" /* exponent 2 - integer of 256 bytes */ + "\x2A\x5F\x3F\xB8\x08\x90\x58\x47\xA9\xE4\xB1\x11\xA3\xE7\x5B\xF4" + "\x43\xBE\x08\xC3\x56\x86\x3C\x7E\x6C\x84\x96\x9C\xF9\xCB\xF6\x05" + "\x5E\x13\xB8\x11\x37\x80\xAD\xF2\xBE\x2B\x0A\x5D\xF5\xE0\xCB\xB7" + "\x00\x39\x66\x82\x41\x5F\x51\x2F\xBF\x56\xE8\x91\xC8\xAA\x6C\xFE" + "\x9F\x8C\x4A\x7D\x43\xD2\x91\x1F\xFF\x9F\xF6\x21\x1C\xB6\x46\x55" + "\x48\xCA\x38\xAB\xC1\xCD\x4D\x65\x5A\xAF\xA8\x6D\xDA\x6D\xF0\x34" + "\x10\x79\x14\x0D\xFA\xA2\x8C\x17\x54\xB4\x18\xD5\x7E\x5F\x90\x50" + "\x87\x84\xE7\xFB\xD7\x61\x53\x5D\xAB\x96\xC7\x6E\x7A\x42\xA0\xFC" + "\x07\xED\xB7\x5F\x80\xD9\x19\xFF\xFB\xFD\x9E\xC4\x73\x31\x62\x3D" + "\x6C\x9E\x15\x03\x62\xA5\x85\xCC\x19\x8E\x9D\x7F\xE3\x6D\xA8\x5D" + "\x96\xF5\xAC\x78\x3D\x81\x27\xE7\x29\xF1\x29\x1D\x09\xBB\x77\x86" + "\x6B\x65\x62\x88\xE1\x31\x1A\x22\xF7\xC5\xCE\x73\x65\x1C\xBE\xE7" + "\x63\xD3\xD3\x14\x63\x27\xAF\x28\xF3\x23\xB6\x76\xC1\xBD\x9D\x82" + "\xF4\x9B\x19\x7D\x2C\x57\xF0\xC2\x2A\x51\xAE\x95\x0D\x8C\x38\x54" + "\xF5\xC6\xA0\x51\xB7\x0E\xB9\xEC\xE7\x0D\x22\xF6\x1A\xD3\xFE\x16" + "\x21\x03\xB7\x0D\x85\xD3\x35\xC9\xDD\xE4\x59\x85\xBE\x7F\xA1\x75" + "\x02\x82\x01\x01" /* coefficient - integer of 257 bytes */ + "\x00\xB9\x48\xD2\x54\x2F\x19\x54\x64\xAE\x62\x80\x61\x89\x80\xB4" + "\x48\x0B\x8D\x7E\x1B\x0F\x50\x08\x82\x3F\xED\x75\x84\xB7\x13\xE4" + "\xF8\x8D\xA8\xBB\x54\x21\x4C\x5A\x54\x07\x16\x4B\xB4\xA4\x9E\x30" + "\xBF\x7A\x30\x1B\x39\x60\xA3\x21\x53\xFB\xB0\xDC\x0F\x7C\x2C\xFB" + "\xAA\x95\x7D\x51\x39\x28\x33\x1F\x25\x31\x53\xF5\xD2\x64\x2B\xF2" + "\x1E\xB3\xC0\x6A\x0B\xC9\xA4\x42\x64\x5C\xFB\x15\xA3\xE8\x4C\x3A" + "\x9C\x3C\xBE\xA3\x39\x83\x23\xE3\x6D\x18\xCC\xC2\xDC\x63\x8D\xBA" + "\x98\xE0\xE0\x31\x4A\x2B\x37\x9C\x4D\x6B\xF3\x9F\x51\xE4\x43\x5C" + "\x83\x5F\xBF\x5C\xFE\x92\x45\x01\xAF\xF5\xC2\xF4\xB7\x56\x93\xA5" + "\xF4\xAA\x67\x3C\x48\x37\xBD\x9A\x3C\xFE\xA5\x9A\xB0\xD1\x6B\x85" + "\xDD\x81\xD4\xFA\xAD\x31\x83\xA8\x22\x9B\xFD\xB4\x61\xDC\x7A\x51" + "\x59\x62\x10\x1B\x7E\x44\xA3\xFE\x90\x51\x5A\x3E\x02\x87\xAD\xFA" + "\xDD\x0B\x1F\x3D\x35\xAF\xEE\x13\x85\x51\xA7\x42\xC0\xEE\x9E\x20" + "\xE9\xD0\x29\xB2\xE4\x21\xE4\x6D\x62\xB9\xF4\x48\x4A\xD8\x46\x8E" + "\x61\xA6\x2C\x5D\xDF\x8F\x97\x2B\x3A\x75\x1D\x83\x17\x6F\xC6\xB0" + "\xDE\xFC\x14\x25\x06\x5A\x60\xBB\xB8\x21\x89\xD1\xEF\x57\xF1\x71" + "\x3D", + .m = "\x54\x85\x9b\x34\x2c\x49\xea\x2a", + .c = + "\x5c\xce\x9c\xd7\x9a\x9e\xa1\xfe\x7a\x82\x3c\x68\x27\x98\xe3\x5d" + "\xd5\xd7\x07\x29\xf5\xfb\xc3\x1a\x7f\x63\x1e\x62\x31\x3b\x19\x87" + "\x79\x4f\xec\x7b\xf3\xcb\xea\x9b\x95\x52\x3a\x40\xe5\x87\x7b\x72" + "\xd1\x72\xc9\xfb\x54\x63\xd8\xc9\xd7\x2c\xfc\x7b\xc3\x14\x1e\xbc" + "\x18\xb4\x34\xa1\xbf\x14\xb1\x37\x31\x6e\xf0\x1b\x35\x19\x54\x07" + "\xf7\x99\xec\x3e\x63\xe2\xcd\x61\x28\x65\xc3\xcd\xb1\x38\x36\xa5" + "\xb2\xd7\xb0\xdc\x1f\xf5\xef\x19\xc7\x53\x32\x2d\x1c\x26\xda\xe4" + "\x0d\xd6\x90\x7e\x28\xd8\xdc\xe4\x61\x05\xd2\x25\x90\x01\xd3\x96" + "\x6d\xa6\xcf\x58\x20\xbb\x03\xf4\x01\xbc\x79\xb9\x18\xd8\xb8\xba" + "\xbd\x93\xfc\xf2\x62\x5d\x8c\x66\x1e\x0e\x84\x59\x93\xdd\xe2\x93" + "\xa2\x62\x7d\x08\x82\x7a\xdd\xfc\xb8\xbc\xc5\x4f\x9c\x4e\xbf\xb4" + "\xfc\xf4\xc5\x01\xe8\x00\x70\x4d\x28\x26\xcc\x2e\xfe\x0e\x58\x41" + "\x8b\xec\xaf\x7c\x4b\x54\xd0\xa0\x64\xf9\x32\xf4\x2e\x47\x65\x0a" + "\x67\x88\x39\x3a\xdb\xb2\xdb\x7b\xb5\xf6\x17\xa8\xd9\xc6\x5e\x28" + "\x13\x82\x8a\x99\xdb\x60\x08\xa5\x23\x37\xfa\x88\x90\x31\xc8\x9d" + "\x8f\xec\xfb\x85\x9f\xb1\xce\xa6\x24\x50\x46\x44\x47\xcb\x65\xd1" + "\xdf\xc0\xb1\x6c\x90\x1f\x99\x8e\x4d\xd5\x9e\x31\x07\x66\x87\xdf" + "\x01\xaa\x56\x3c\x71\xe0\x2b\x6f\x67\x3b\x23\xed\xc2\xbd\x03\x30" + "\x79\x76\x02\x10\x10\x98\x85\x8a\xff\xfd\x0b\xda\xa5\xd9\x32\x48" + "\x02\xa0\x0b\xb9\x2a\x8a\x18\xca\xc6\x8f\x3f\xbb\x16\xb2\xaa\x98" + "\x27\xe3\x60\x43\xed\x15\x70\xd4\x57\x15\xfe\x19\xd4\x9b\x13\x78" + "\x8a\xf7\x21\xf1\xa2\xa2\x2d\xb3\x09\xcf\x44\x91\x6e\x08\x3a\x30" + "\x81\x3e\x90\x93\x8a\x67\x33\x00\x59\x54\x9a\x25\xd3\x49\x8e\x9f" + "\xc1\x4b\xe5\x86\xf3\x50\x4c\xbc\xc5\xd3\xf5\x3a\x54\xe1\x36\x3f" + "\xe2\x5a\xb4\x37\xc0\xeb\x70\x35\xec\xf6\xb7\xe8\x44\x3b\x7b\xf3" + "\xf1\xf2\x1e\xdb\x60\x7d\xd5\xbe\xf0\x71\x34\x90\x4c\xcb\xd4\x35" + "\x51\xc7\xdd\xd8\xc9\x81\xf5\x5d\x57\x46\x2c\xb1\x7b\x9b\xaa\xcb" + "\xd1\x22\x25\x49\x44\xa3\xd4\x6b\x29\x7b\xd8\xb2\x07\x93\xbf\x3d" + "\x52\x49\x84\x79\xef\xb8\xe5\xc4\xad\xca\xa8\xc6\xf6\xa6\x76\x70" + "\x5b\x0b\xe5\x83\xc6\x0e\xef\x55\xf2\xe7\xff\x04\xea\xe6\x13\xbe" + "\x40\xe1\x40\x45\x48\x66\x75\x31\xae\x35\x64\x91\x11\x6f\xda\xee" + "\x26\x86\x45\x6f\x0b\xd5\x9f\x03\xb1\x65\x5b\xdb\xa4\xe4\xf9\x45", + .key_len = 2349, + .m_size = 8, + .c_size = 512, } }; From 879f77e9071f029e1c9bd5a75814ecf51370f846 Mon Sep 17 00:00:00 2001 From: Salvatore Benedetto Date: Mon, 4 Jul 2016 17:21:40 +0100 Subject: [PATCH 142/180] crypto: qat - Add RSA CRT mode Extend qat driver to use RSA CRT mode when all CRT related components are present in the private key. Simplify code in qat_rsa_setkey by adding qat_rsa_clear_ctx. Signed-off-by: Salvatore Benedetto Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_asym_algs.c | 234 ++++++++++++++++-- 1 file changed, 209 insertions(+), 25 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index 04b0ef8cfaa19..eaff02a3b1ac4 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -73,6 +73,14 @@ struct qat_rsa_input_params { dma_addr_t d; dma_addr_t n; } dec; + struct { + dma_addr_t c; + dma_addr_t p; + dma_addr_t q; + dma_addr_t dp; + dma_addr_t dq; + dma_addr_t qinv; + } dec_crt; u64 in_tab[8]; }; } __packed __aligned(64); @@ -93,10 +101,21 @@ struct qat_rsa_ctx { char *n; char *e; char *d; + char *p; + char *q; + char *dp; + char *dq; + char *qinv; dma_addr_t dma_n; dma_addr_t dma_e; dma_addr_t dma_d; + dma_addr_t dma_p; + dma_addr_t dma_q; + dma_addr_t dma_dp; + dma_addr_t dma_dq; + dma_addr_t dma_qinv; unsigned int key_sz; + bool crt_mode; struct qat_crypto_instance *inst; } __packed __aligned(64); @@ -235,6 +254,35 @@ static unsigned long qat_rsa_dec_fn_id(unsigned int len) }; } +#define PKE_RSA_DP2_512 0x1c131b57 +#define PKE_RSA_DP2_1024 0x26131c2d +#define PKE_RSA_DP2_1536 0x45111d12 +#define PKE_RSA_DP2_2048 0x59121dfa +#define PKE_RSA_DP2_3072 0x81121ed9 +#define PKE_RSA_DP2_4096 0xb1111fb2 + +static unsigned long qat_rsa_dec_fn_id_crt(unsigned int len) +{ + unsigned int bitslen = len << 3; + + switch (bitslen) { + case 512: + return PKE_RSA_DP2_512; + case 1024: + return PKE_RSA_DP2_1024; + case 1536: + return PKE_RSA_DP2_1536; + case 2048: + return PKE_RSA_DP2_2048; + case 3072: + return PKE_RSA_DP2_3072; + case 4096: + return PKE_RSA_DP2_4096; + default: + return 0; + }; +} + static int qat_rsa_enc(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); @@ -388,7 +436,9 @@ static int qat_rsa_dec(struct akcipher_request *req) memset(msg, '\0', sizeof(*msg)); ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(msg->pke_hdr, ICP_QAT_FW_COMN_REQ_FLAG_SET); - msg->pke_hdr.cd_pars.func_id = qat_rsa_dec_fn_id(ctx->key_sz); + msg->pke_hdr.cd_pars.func_id = ctx->crt_mode ? + qat_rsa_dec_fn_id_crt(ctx->key_sz) : + qat_rsa_dec_fn_id(ctx->key_sz); if (unlikely(!msg->pke_hdr.cd_pars.func_id)) return -EINVAL; @@ -398,8 +448,16 @@ static int qat_rsa_dec(struct akcipher_request *req) ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_PTR_TYPE_FLAT, QAT_COMN_CD_FLD_TYPE_64BIT_ADR); - qat_req->in.dec.d = ctx->dma_d; - qat_req->in.dec.n = ctx->dma_n; + if (ctx->crt_mode) { + qat_req->in.dec_crt.p = ctx->dma_p; + qat_req->in.dec_crt.q = ctx->dma_q; + qat_req->in.dec_crt.dp = ctx->dma_dp; + qat_req->in.dec_crt.dq = ctx->dma_dq; + qat_req->in.dec_crt.qinv = ctx->dma_qinv; + } else { + qat_req->in.dec.d = ctx->dma_d; + qat_req->in.dec.n = ctx->dma_n; + } ret = -ENOMEM; /* @@ -446,7 +504,10 @@ static int qat_rsa_dec(struct akcipher_request *req) } - qat_req->in.in_tab[3] = 0; + if (ctx->crt_mode) + qat_req->in.in_tab[6] = 0; + else + qat_req->in.in_tab[3] = 0; qat_req->out.out_tab[1] = 0; qat_req->phy_in = dma_map_single(dev, &qat_req->in.dec.c, sizeof(struct qat_rsa_input_params), @@ -463,7 +524,11 @@ static int qat_rsa_dec(struct akcipher_request *req) msg->pke_mid.src_data_addr = qat_req->phy_in; msg->pke_mid.dest_data_addr = qat_req->phy_out; msg->pke_mid.opaque = (uint64_t)(__force long)req; - msg->input_param_count = 3; + if (ctx->crt_mode) + msg->input_param_count = 6; + else + msg->input_param_count = 3; + msg->output_param_count = 1; do { ret = adf_send_message(ctx->inst->pke_tx, (uint32_t *)msg); @@ -583,13 +648,106 @@ int qat_rsa_set_d(struct qat_rsa_ctx *ctx, const char *value, size_t vlen) return ret; } -static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key, - unsigned int keylen, bool private) +static void qat_rsa_drop_leading_zeros(const char **ptr, unsigned int *len) { - struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); - struct device *dev = &GET_DEV(ctx->inst->accel_dev); - struct rsa_key rsa_key; - int ret; + while (!**ptr && *len) { + (*ptr)++; + (*len)--; + } +} + +static void qat_rsa_setkey_crt(struct qat_rsa_ctx *ctx, struct rsa_key *rsa_key) +{ + struct qat_crypto_instance *inst = ctx->inst; + struct device *dev = &GET_DEV(inst->accel_dev); + const char *ptr; + unsigned int len; + unsigned int half_key_sz = ctx->key_sz / 2; + + /* p */ + ptr = rsa_key->p; + len = rsa_key->p_sz; + qat_rsa_drop_leading_zeros(&ptr, &len); + if (!len) + goto err; + ctx->p = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_p, GFP_KERNEL); + if (!ctx->p) + goto err; + memcpy(ctx->p + (half_key_sz - len), ptr, len); + + /* q */ + ptr = rsa_key->q; + len = rsa_key->q_sz; + qat_rsa_drop_leading_zeros(&ptr, &len); + if (!len) + goto free_p; + ctx->q = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_q, GFP_KERNEL); + if (!ctx->q) + goto free_p; + memcpy(ctx->q + (half_key_sz - len), ptr, len); + + /* dp */ + ptr = rsa_key->dp; + len = rsa_key->dp_sz; + qat_rsa_drop_leading_zeros(&ptr, &len); + if (!len) + goto free_q; + ctx->dp = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_dp, + GFP_KERNEL); + if (!ctx->dp) + goto free_q; + memcpy(ctx->dp + (half_key_sz - len), ptr, len); + + /* dq */ + ptr = rsa_key->dq; + len = rsa_key->dq_sz; + qat_rsa_drop_leading_zeros(&ptr, &len); + if (!len) + goto free_dp; + ctx->dq = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_dq, + GFP_KERNEL); + if (!ctx->dq) + goto free_dp; + memcpy(ctx->dq + (half_key_sz - len), ptr, len); + + /* qinv */ + ptr = rsa_key->qinv; + len = rsa_key->qinv_sz; + qat_rsa_drop_leading_zeros(&ptr, &len); + if (!len) + goto free_dq; + ctx->qinv = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_qinv, + GFP_KERNEL); + if (!ctx->qinv) + goto free_dq; + memcpy(ctx->qinv + (half_key_sz - len), ptr, len); + + ctx->crt_mode = true; + return; + +free_dq: + memset(ctx->dq, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->dq, ctx->dma_dq); + ctx->dq = NULL; +free_dp: + memset(ctx->dp, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->dp, ctx->dma_dp); + ctx->dp = NULL; +free_q: + memset(ctx->q, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->q, ctx->dma_q); + ctx->q = NULL; +free_p: + memset(ctx->p, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->p, ctx->dma_p); + ctx->p = NULL; +err: + ctx->crt_mode = false; +} + +static void qat_rsa_clear_ctx(struct device *dev, struct qat_rsa_ctx *ctx) +{ + unsigned int half_key_sz = ctx->key_sz / 2; /* Free the old key if any */ if (ctx->n) @@ -600,10 +758,48 @@ static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key, memset(ctx->d, '\0', ctx->key_sz); dma_free_coherent(dev, ctx->key_sz, ctx->d, ctx->dma_d); } + if (ctx->p) { + memset(ctx->p, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->p, ctx->dma_p); + } + if (ctx->q) { + memset(ctx->q, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->q, ctx->dma_q); + } + if (ctx->dp) { + memset(ctx->dp, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->dp, ctx->dma_dp); + } + if (ctx->dq) { + memset(ctx->dq, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->dq, ctx->dma_dq); + } + if (ctx->qinv) { + memset(ctx->qinv, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->qinv, ctx->dma_qinv); + } ctx->n = NULL; ctx->e = NULL; ctx->d = NULL; + ctx->p = NULL; + ctx->q = NULL; + ctx->dp = NULL; + ctx->dq = NULL; + ctx->qinv = NULL; + ctx->crt_mode = false; + ctx->key_sz = 0; +} + +static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key, + unsigned int keylen, bool private) +{ + struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct device *dev = &GET_DEV(ctx->inst->accel_dev); + struct rsa_key rsa_key; + int ret; + + qat_rsa_clear_ctx(dev, ctx); if (private) ret = rsa_parse_priv_key(&rsa_key, key, keylen); @@ -622,6 +818,7 @@ static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key, ret = qat_rsa_set_d(ctx, rsa_key.d, rsa_key.d_sz); if (ret < 0) goto free; + qat_rsa_setkey_crt(ctx, &rsa_key); } if (!ctx->n || !ctx->e) { @@ -637,20 +834,7 @@ static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key, return 0; free: - if (ctx->d) { - memset(ctx->d, '\0', ctx->key_sz); - dma_free_coherent(dev, ctx->key_sz, ctx->d, ctx->dma_d); - ctx->d = NULL; - } - if (ctx->e) { - dma_free_coherent(dev, ctx->key_sz, ctx->e, ctx->dma_e); - ctx->e = NULL; - } - if (ctx->n) { - dma_free_coherent(dev, ctx->key_sz, ctx->n, ctx->dma_n); - ctx->n = NULL; - ctx->key_sz = 0; - } + qat_rsa_clear_ctx(dev, ctx); return ret; } From 8ef7cafbccd84d509bbfba63188c8a3e0936e2ee Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Thu, 7 Jul 2016 21:58:16 +0900 Subject: [PATCH 143/180] crypto: doc - Fix double words "the the" in crypto-API.tmpl This patch fix double words "the the" in crypto-API.tmpl. Signed-off-by: Masanari Iida Signed-off-by: Herbert Xu --- Documentation/DocBook/crypto-API.tmpl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/DocBook/crypto-API.tmpl b/Documentation/DocBook/crypto-API.tmpl index d55dc5a39bad9..fb2a1526f6ecd 100644 --- a/Documentation/DocBook/crypto-API.tmpl +++ b/Documentation/DocBook/crypto-API.tmpl @@ -440,8 +440,8 @@ The type flag specifies the type of the cipher algorithm. The caller usually provides a 0 when the caller wants the default handling. Otherwise, the caller may provide the - following selections which match the the aforementioned - cipher types: + following selections which match the aforementioned cipher + types: From c9839143ebbf5e821128da44f7e271d745aab19e Mon Sep 17 00:00:00 2001 From: Salvatore Benedetto Date: Thu, 7 Jul 2016 15:27:29 +0100 Subject: [PATCH 144/180] crypto: qat - Add DH support Add DH support under kpp api. Drop struct qat_rsa_request and introduce a more generic struct qat_asym_request and share it between RSA and DH requests. Signed-off-by: Salvatore Benedetto Signed-off-by: Herbert Xu --- drivers/crypto/qat/Kconfig | 1 + drivers/crypto/qat/qat_common/qat_asym_algs.c | 593 +++++++++++++++--- 2 files changed, 522 insertions(+), 72 deletions(-) diff --git a/drivers/crypto/qat/Kconfig b/drivers/crypto/qat/Kconfig index 571d04dda415a..ce3cae40f9498 100644 --- a/drivers/crypto/qat/Kconfig +++ b/drivers/crypto/qat/Kconfig @@ -4,6 +4,7 @@ config CRYPTO_DEV_QAT select CRYPTO_AUTHENC select CRYPTO_BLKCIPHER select CRYPTO_AKCIPHER + select CRYPTO_DH select CRYPTO_HMAC select CRYPTO_RSA select CRYPTO_SHA1 diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index eaff02a3b1ac4..3d56fb82f48a9 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -49,6 +49,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -119,36 +122,454 @@ struct qat_rsa_ctx { struct qat_crypto_instance *inst; } __packed __aligned(64); -struct qat_rsa_request { - struct qat_rsa_input_params in; - struct qat_rsa_output_params out; +struct qat_dh_input_params { + union { + struct { + dma_addr_t b; + dma_addr_t xa; + dma_addr_t p; + } in; + struct { + dma_addr_t xa; + dma_addr_t p; + } in_g2; + u64 in_tab[8]; + }; +} __packed __aligned(64); + +struct qat_dh_output_params { + union { + dma_addr_t r; + u64 out_tab[8]; + }; +} __packed __aligned(64); + +struct qat_dh_ctx { + char *g; + char *xa; + char *p; + dma_addr_t dma_g; + dma_addr_t dma_xa; + dma_addr_t dma_p; + unsigned int p_size; + bool g2; + struct qat_crypto_instance *inst; +} __packed __aligned(64); + +struct qat_asym_request { + union { + struct qat_rsa_input_params rsa; + struct qat_dh_input_params dh; + } in; + union { + struct qat_rsa_output_params rsa; + struct qat_dh_output_params dh; + } out; dma_addr_t phy_in; dma_addr_t phy_out; char *src_align; char *dst_align; struct icp_qat_fw_pke_request req; - struct qat_rsa_ctx *ctx; + union { + struct qat_rsa_ctx *rsa; + struct qat_dh_ctx *dh; + } ctx; + union { + struct akcipher_request *rsa; + struct kpp_request *dh; + } areq; int err; + void (*cb)(struct icp_qat_fw_pke_resp *resp); } __aligned(64); +static void qat_dh_cb(struct icp_qat_fw_pke_resp *resp) +{ + struct qat_asym_request *req = (void *)(__force long)resp->opaque; + struct kpp_request *areq = req->areq.dh; + struct device *dev = &GET_DEV(req->ctx.dh->inst->accel_dev); + int err = ICP_QAT_FW_PKE_RESP_PKE_STAT_GET( + resp->pke_resp_hdr.comn_resp_flags); + + err = (err == ICP_QAT_FW_COMN_STATUS_FLAG_OK) ? 0 : -EINVAL; + + if (areq->src) { + if (req->src_align) + dma_free_coherent(dev, req->ctx.dh->p_size, + req->src_align, req->in.dh.in.b); + else + dma_unmap_single(dev, req->in.dh.in.b, + req->ctx.dh->p_size, DMA_TO_DEVICE); + } + + areq->dst_len = req->ctx.dh->p_size; + if (req->dst_align) { + scatterwalk_map_and_copy(req->dst_align, areq->dst, 0, + areq->dst_len, 1); + + dma_free_coherent(dev, req->ctx.dh->p_size, req->dst_align, + req->out.dh.r); + } else { + dma_unmap_single(dev, req->out.dh.r, req->ctx.dh->p_size, + DMA_FROM_DEVICE); + } + + dma_unmap_single(dev, req->phy_in, sizeof(struct qat_dh_input_params), + DMA_TO_DEVICE); + dma_unmap_single(dev, req->phy_out, + sizeof(struct qat_dh_output_params), + DMA_TO_DEVICE); + + kpp_request_complete(areq, err); +} + +#define PKE_DH_1536 0x390c1a49 +#define PKE_DH_G2_1536 0x2e0b1a3e +#define PKE_DH_2048 0x4d0c1a60 +#define PKE_DH_G2_2048 0x3e0b1a55 +#define PKE_DH_3072 0x510c1a77 +#define PKE_DH_G2_3072 0x3a0b1a6c +#define PKE_DH_4096 0x690c1a8e +#define PKE_DH_G2_4096 0x4a0b1a83 + +static unsigned long qat_dh_fn_id(unsigned int len, bool g2) +{ + unsigned int bitslen = len << 3; + + switch (bitslen) { + case 1536: + return g2 ? PKE_DH_G2_1536 : PKE_DH_1536; + case 2048: + return g2 ? PKE_DH_G2_2048 : PKE_DH_2048; + case 3072: + return g2 ? PKE_DH_G2_3072 : PKE_DH_3072; + case 4096: + return g2 ? PKE_DH_G2_4096 : PKE_DH_4096; + default: + return 0; + }; +} + +static inline struct qat_dh_ctx *qat_dh_get_params(struct crypto_kpp *tfm) +{ + return kpp_tfm_ctx(tfm); +} + +static int qat_dh_compute_value(struct kpp_request *req) +{ + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + struct qat_crypto_instance *inst = ctx->inst; + struct device *dev = &GET_DEV(inst->accel_dev); + struct qat_asym_request *qat_req = + PTR_ALIGN(kpp_request_ctx(req), 64); + struct icp_qat_fw_pke_request *msg = &qat_req->req; + int ret, ctr = 0; + int n_input_params = 0; + + if (unlikely(!ctx->xa)) + return -EINVAL; + + if (req->dst_len < ctx->p_size) { + req->dst_len = ctx->p_size; + return -EOVERFLOW; + } + memset(msg, '\0', sizeof(*msg)); + ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(msg->pke_hdr, + ICP_QAT_FW_COMN_REQ_FLAG_SET); + + msg->pke_hdr.cd_pars.func_id = qat_dh_fn_id(ctx->p_size, + !req->src && ctx->g2); + if (unlikely(!msg->pke_hdr.cd_pars.func_id)) + return -EINVAL; + + qat_req->cb = qat_dh_cb; + qat_req->ctx.dh = ctx; + qat_req->areq.dh = req; + msg->pke_hdr.service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_PKE; + msg->pke_hdr.comn_req_flags = + ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_PTR_TYPE_FLAT, + QAT_COMN_CD_FLD_TYPE_64BIT_ADR); + + /* + * If no source is provided use g as base + */ + if (req->src) { + qat_req->in.dh.in.xa = ctx->dma_xa; + qat_req->in.dh.in.p = ctx->dma_p; + n_input_params = 3; + } else { + if (ctx->g2) { + qat_req->in.dh.in_g2.xa = ctx->dma_xa; + qat_req->in.dh.in_g2.p = ctx->dma_p; + n_input_params = 2; + } else { + qat_req->in.dh.in.b = ctx->dma_g; + qat_req->in.dh.in.xa = ctx->dma_xa; + qat_req->in.dh.in.p = ctx->dma_p; + n_input_params = 3; + } + } + + ret = -ENOMEM; + if (req->src) { + /* + * src can be of any size in valid range, but HW expects it to + * be the same as modulo p so in case it is different we need + * to allocate a new buf and copy src data. + * In other case we just need to map the user provided buffer. + * Also need to make sure that it is in contiguous buffer. + */ + if (sg_is_last(req->src) && req->src_len == ctx->p_size) { + qat_req->src_align = NULL; + qat_req->in.dh.in.b = dma_map_single(dev, + sg_virt(req->src), + req->src_len, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, + qat_req->in.dh.in.b))) + return ret; + + } else { + int shift = ctx->p_size - req->src_len; + + qat_req->src_align = dma_zalloc_coherent(dev, + ctx->p_size, + &qat_req->in.dh.in.b, + GFP_KERNEL); + if (unlikely(!qat_req->src_align)) + return ret; + + scatterwalk_map_and_copy(qat_req->src_align + shift, + req->src, 0, req->src_len, 0); + } + } + /* + * dst can be of any size in valid range, but HW expects it to be the + * same as modulo m so in case it is different we need to allocate a + * new buf and copy src data. + * In other case we just need to map the user provided buffer. + * Also need to make sure that it is in contiguous buffer. + */ + if (sg_is_last(req->dst) && req->dst_len == ctx->p_size) { + qat_req->dst_align = NULL; + qat_req->out.dh.r = dma_map_single(dev, sg_virt(req->dst), + req->dst_len, + DMA_FROM_DEVICE); + + if (unlikely(dma_mapping_error(dev, qat_req->out.dh.r))) + goto unmap_src; + + } else { + qat_req->dst_align = dma_zalloc_coherent(dev, ctx->p_size, + &qat_req->out.dh.r, + GFP_KERNEL); + if (unlikely(!qat_req->dst_align)) + goto unmap_src; + } + + qat_req->in.dh.in_tab[n_input_params] = 0; + qat_req->out.dh.out_tab[1] = 0; + /* Mapping in.in.b or in.in_g2.xa is the same */ + qat_req->phy_in = dma_map_single(dev, &qat_req->in.dh.in.b, + sizeof(struct qat_dh_input_params), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->phy_in))) + goto unmap_dst; + + qat_req->phy_out = dma_map_single(dev, &qat_req->out.dh.r, + sizeof(struct qat_dh_output_params), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->phy_out))) + goto unmap_in_params; + + msg->pke_mid.src_data_addr = qat_req->phy_in; + msg->pke_mid.dest_data_addr = qat_req->phy_out; + msg->pke_mid.opaque = (uint64_t)(__force long)qat_req; + msg->input_param_count = n_input_params; + msg->output_param_count = 1; + + do { + ret = adf_send_message(ctx->inst->pke_tx, (uint32_t *)msg); + } while (ret == -EBUSY && ctr++ < 100); + + if (!ret) + return -EINPROGRESS; + + if (!dma_mapping_error(dev, qat_req->phy_out)) + dma_unmap_single(dev, qat_req->phy_out, + sizeof(struct qat_dh_output_params), + DMA_TO_DEVICE); +unmap_in_params: + if (!dma_mapping_error(dev, qat_req->phy_in)) + dma_unmap_single(dev, qat_req->phy_in, + sizeof(struct qat_dh_input_params), + DMA_TO_DEVICE); +unmap_dst: + if (qat_req->dst_align) + dma_free_coherent(dev, ctx->p_size, qat_req->dst_align, + qat_req->out.dh.r); + else + if (!dma_mapping_error(dev, qat_req->out.dh.r)) + dma_unmap_single(dev, qat_req->out.dh.r, ctx->p_size, + DMA_FROM_DEVICE); +unmap_src: + if (req->src) { + if (qat_req->src_align) + dma_free_coherent(dev, ctx->p_size, qat_req->src_align, + qat_req->in.dh.in.b); + else + if (!dma_mapping_error(dev, qat_req->in.dh.in.b)) + dma_unmap_single(dev, qat_req->in.dh.in.b, + ctx->p_size, + DMA_TO_DEVICE); + } + return ret; +} + +static int qat_dh_check_params_length(unsigned int p_len) +{ + switch (p_len) { + case 1536: + case 2048: + case 3072: + case 4096: + return 0; + } + return -EINVAL; +} + +static int qat_dh_set_params(struct qat_dh_ctx *ctx, struct dh *params) +{ + struct qat_crypto_instance *inst = ctx->inst; + struct device *dev = &GET_DEV(inst->accel_dev); + + if (unlikely(!params->p || !params->g)) + return -EINVAL; + + if (qat_dh_check_params_length(params->p_size << 3)) + return -EINVAL; + + ctx->p_size = params->p_size; + ctx->p = dma_zalloc_coherent(dev, ctx->p_size, &ctx->dma_p, GFP_KERNEL); + if (!ctx->p) + return -ENOMEM; + memcpy(ctx->p, params->p, ctx->p_size); + + /* If g equals 2 don't copy it */ + if (params->g_size == 1 && *(char *)params->g == 0x02) { + ctx->g2 = true; + return 0; + } + + ctx->g = dma_zalloc_coherent(dev, ctx->p_size, &ctx->dma_g, GFP_KERNEL); + if (!ctx->g) { + dma_free_coherent(dev, ctx->p_size, ctx->p, ctx->dma_p); + ctx->p = NULL; + return -ENOMEM; + } + memcpy(ctx->g + (ctx->p_size - params->g_size), params->g, + params->g_size); + + return 0; +} + +static void qat_dh_clear_ctx(struct device *dev, struct qat_dh_ctx *ctx) +{ + if (ctx->g) { + dma_free_coherent(dev, ctx->p_size, ctx->g, ctx->dma_g); + ctx->g = NULL; + } + if (ctx->xa) { + dma_free_coherent(dev, ctx->p_size, ctx->xa, ctx->dma_xa); + ctx->xa = NULL; + } + if (ctx->p) { + dma_free_coherent(dev, ctx->p_size, ctx->p, ctx->dma_p); + ctx->p = NULL; + } + ctx->p_size = 0; + ctx->g2 = false; +} + +static int qat_dh_set_secret(struct crypto_kpp *tfm, void *buf, + unsigned int len) +{ + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + struct device *dev = &GET_DEV(ctx->inst->accel_dev); + struct dh params; + int ret; + + if (crypto_dh_decode_key(buf, len, ¶ms) < 0) + return -EINVAL; + + /* Free old secret if any */ + qat_dh_clear_ctx(dev, ctx); + + ret = qat_dh_set_params(ctx, ¶ms); + if (ret < 0) + return ret; + + ctx->xa = dma_zalloc_coherent(dev, ctx->p_size, &ctx->dma_xa, + GFP_KERNEL); + if (!ctx->xa) { + qat_dh_clear_ctx(dev, ctx); + return -ENOMEM; + } + memcpy(ctx->xa + (ctx->p_size - params.key_size), params.key, + params.key_size); + + return 0; +} + +static int qat_dh_max_size(struct crypto_kpp *tfm) +{ + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + + return ctx->p ? ctx->p_size : -EINVAL; +} + +static int qat_dh_init_tfm(struct crypto_kpp *tfm) +{ + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + struct qat_crypto_instance *inst = + qat_crypto_get_instance_node(get_current_node()); + + if (!inst) + return -EINVAL; + + ctx->p_size = 0; + ctx->g2 = false; + ctx->inst = inst; + return 0; +} + +static void qat_dh_exit_tfm(struct crypto_kpp *tfm) +{ + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + struct device *dev = &GET_DEV(ctx->inst->accel_dev); + + qat_dh_clear_ctx(dev, ctx); + qat_crypto_put_instance(ctx->inst); +} + static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp) { - struct akcipher_request *areq = (void *)(__force long)resp->opaque; - struct qat_rsa_request *req = PTR_ALIGN(akcipher_request_ctx(areq), 64); - struct device *dev = &GET_DEV(req->ctx->inst->accel_dev); + struct qat_asym_request *req = (void *)(__force long)resp->opaque; + struct akcipher_request *areq = req->areq.rsa; + struct device *dev = &GET_DEV(req->ctx.rsa->inst->accel_dev); int err = ICP_QAT_FW_PKE_RESP_PKE_STAT_GET( resp->pke_resp_hdr.comn_resp_flags); err = (err == ICP_QAT_FW_COMN_STATUS_FLAG_OK) ? 0 : -EINVAL; if (req->src_align) - dma_free_coherent(dev, req->ctx->key_sz, req->src_align, - req->in.enc.m); + dma_free_coherent(dev, req->ctx.rsa->key_sz, req->src_align, + req->in.rsa.enc.m); else - dma_unmap_single(dev, req->in.enc.m, req->ctx->key_sz, + dma_unmap_single(dev, req->in.rsa.enc.m, req->ctx.rsa->key_sz, DMA_TO_DEVICE); - areq->dst_len = req->ctx->key_sz; + areq->dst_len = req->ctx.rsa->key_sz; if (req->dst_align) { char *ptr = req->dst_align; @@ -157,14 +578,14 @@ static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp) ptr++; } - if (areq->dst_len != req->ctx->key_sz) + if (areq->dst_len != req->ctx.rsa->key_sz) memmove(req->dst_align, ptr, areq->dst_len); scatterwalk_map_and_copy(req->dst_align, areq->dst, 0, areq->dst_len, 1); - dma_free_coherent(dev, req->ctx->key_sz, req->dst_align, - req->out.enc.c); + dma_free_coherent(dev, req->ctx.rsa->key_sz, req->dst_align, + req->out.rsa.enc.c); } else { char *ptr = sg_virt(areq->dst); @@ -176,7 +597,7 @@ static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp) if (sg_virt(areq->dst) != ptr && areq->dst_len) memmove(sg_virt(areq->dst), ptr, areq->dst_len); - dma_unmap_single(dev, req->out.enc.c, req->ctx->key_sz, + dma_unmap_single(dev, req->out.rsa.enc.c, req->ctx.rsa->key_sz, DMA_FROM_DEVICE); } @@ -192,8 +613,9 @@ static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp) void qat_alg_asym_callback(void *_resp) { struct icp_qat_fw_pke_resp *resp = _resp; + struct qat_asym_request *areq = (void *)(__force long)resp->opaque; - qat_rsa_cb(resp); + areq->cb(resp); } #define PKE_RSA_EP_512 0x1c161b21 @@ -289,7 +711,7 @@ static int qat_rsa_enc(struct akcipher_request *req) struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); struct qat_crypto_instance *inst = ctx->inst; struct device *dev = &GET_DEV(inst->accel_dev); - struct qat_rsa_request *qat_req = + struct qat_asym_request *qat_req = PTR_ALIGN(akcipher_request_ctx(req), 64); struct icp_qat_fw_pke_request *msg = &qat_req->req; int ret, ctr = 0; @@ -308,14 +730,16 @@ static int qat_rsa_enc(struct akcipher_request *req) if (unlikely(!msg->pke_hdr.cd_pars.func_id)) return -EINVAL; - qat_req->ctx = ctx; + qat_req->cb = qat_rsa_cb; + qat_req->ctx.rsa = ctx; + qat_req->areq.rsa = req; msg->pke_hdr.service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_PKE; msg->pke_hdr.comn_req_flags = ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_PTR_TYPE_FLAT, QAT_COMN_CD_FLD_TYPE_64BIT_ADR); - qat_req->in.enc.e = ctx->dma_e; - qat_req->in.enc.n = ctx->dma_n; + qat_req->in.rsa.enc.e = ctx->dma_e; + qat_req->in.rsa.enc.n = ctx->dma_n; ret = -ENOMEM; /* @@ -327,16 +751,16 @@ static int qat_rsa_enc(struct akcipher_request *req) */ if (sg_is_last(req->src) && req->src_len == ctx->key_sz) { qat_req->src_align = NULL; - qat_req->in.enc.m = dma_map_single(dev, sg_virt(req->src), + qat_req->in.rsa.enc.m = dma_map_single(dev, sg_virt(req->src), req->src_len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(dev, qat_req->in.enc.m))) + if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.enc.m))) return ret; } else { int shift = ctx->key_sz - req->src_len; qat_req->src_align = dma_zalloc_coherent(dev, ctx->key_sz, - &qat_req->in.enc.m, + &qat_req->in.rsa.enc.m, GFP_KERNEL); if (unlikely(!qat_req->src_align)) return ret; @@ -346,30 +770,30 @@ static int qat_rsa_enc(struct akcipher_request *req) } if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) { qat_req->dst_align = NULL; - qat_req->out.enc.c = dma_map_single(dev, sg_virt(req->dst), - req->dst_len, - DMA_FROM_DEVICE); + qat_req->out.rsa.enc.c = dma_map_single(dev, sg_virt(req->dst), + req->dst_len, + DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(dev, qat_req->out.enc.c))) + if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.enc.c))) goto unmap_src; } else { qat_req->dst_align = dma_zalloc_coherent(dev, ctx->key_sz, - &qat_req->out.enc.c, + &qat_req->out.rsa.enc.c, GFP_KERNEL); if (unlikely(!qat_req->dst_align)) goto unmap_src; } - qat_req->in.in_tab[3] = 0; - qat_req->out.out_tab[1] = 0; - qat_req->phy_in = dma_map_single(dev, &qat_req->in.enc.m, + qat_req->in.rsa.in_tab[3] = 0; + qat_req->out.rsa.out_tab[1] = 0; + qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.enc.m, sizeof(struct qat_rsa_input_params), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, qat_req->phy_in))) goto unmap_dst; - qat_req->phy_out = dma_map_single(dev, &qat_req->out.enc.c, + qat_req->phy_out = dma_map_single(dev, &qat_req->out.rsa.enc.c, sizeof(struct qat_rsa_output_params), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, qat_req->phy_out))) @@ -377,7 +801,7 @@ static int qat_rsa_enc(struct akcipher_request *req) msg->pke_mid.src_data_addr = qat_req->phy_in; msg->pke_mid.dest_data_addr = qat_req->phy_out; - msg->pke_mid.opaque = (uint64_t)(__force long)req; + msg->pke_mid.opaque = (uint64_t)(__force long)qat_req; msg->input_param_count = 3; msg->output_param_count = 1; do { @@ -399,19 +823,19 @@ static int qat_rsa_enc(struct akcipher_request *req) unmap_dst: if (qat_req->dst_align) dma_free_coherent(dev, ctx->key_sz, qat_req->dst_align, - qat_req->out.enc.c); + qat_req->out.rsa.enc.c); else - if (!dma_mapping_error(dev, qat_req->out.enc.c)) - dma_unmap_single(dev, qat_req->out.enc.c, ctx->key_sz, - DMA_FROM_DEVICE); + if (!dma_mapping_error(dev, qat_req->out.rsa.enc.c)) + dma_unmap_single(dev, qat_req->out.rsa.enc.c, + ctx->key_sz, DMA_FROM_DEVICE); unmap_src: if (qat_req->src_align) dma_free_coherent(dev, ctx->key_sz, qat_req->src_align, - qat_req->in.enc.m); + qat_req->in.rsa.enc.m); else - if (!dma_mapping_error(dev, qat_req->in.enc.m)) - dma_unmap_single(dev, qat_req->in.enc.m, ctx->key_sz, - DMA_TO_DEVICE); + if (!dma_mapping_error(dev, qat_req->in.rsa.enc.m)) + dma_unmap_single(dev, qat_req->in.rsa.enc.m, + ctx->key_sz, DMA_TO_DEVICE); return ret; } @@ -421,7 +845,7 @@ static int qat_rsa_dec(struct akcipher_request *req) struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); struct qat_crypto_instance *inst = ctx->inst; struct device *dev = &GET_DEV(inst->accel_dev); - struct qat_rsa_request *qat_req = + struct qat_asym_request *qat_req = PTR_ALIGN(akcipher_request_ctx(req), 64); struct icp_qat_fw_pke_request *msg = &qat_req->req; int ret, ctr = 0; @@ -442,21 +866,23 @@ static int qat_rsa_dec(struct akcipher_request *req) if (unlikely(!msg->pke_hdr.cd_pars.func_id)) return -EINVAL; - qat_req->ctx = ctx; + qat_req->cb = qat_rsa_cb; + qat_req->ctx.rsa = ctx; + qat_req->areq.rsa = req; msg->pke_hdr.service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_PKE; msg->pke_hdr.comn_req_flags = ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_PTR_TYPE_FLAT, QAT_COMN_CD_FLD_TYPE_64BIT_ADR); if (ctx->crt_mode) { - qat_req->in.dec_crt.p = ctx->dma_p; - qat_req->in.dec_crt.q = ctx->dma_q; - qat_req->in.dec_crt.dp = ctx->dma_dp; - qat_req->in.dec_crt.dq = ctx->dma_dq; - qat_req->in.dec_crt.qinv = ctx->dma_qinv; + qat_req->in.rsa.dec_crt.p = ctx->dma_p; + qat_req->in.rsa.dec_crt.q = ctx->dma_q; + qat_req->in.rsa.dec_crt.dp = ctx->dma_dp; + qat_req->in.rsa.dec_crt.dq = ctx->dma_dq; + qat_req->in.rsa.dec_crt.qinv = ctx->dma_qinv; } else { - qat_req->in.dec.d = ctx->dma_d; - qat_req->in.dec.n = ctx->dma_n; + qat_req->in.rsa.dec.d = ctx->dma_d; + qat_req->in.rsa.dec.n = ctx->dma_n; } ret = -ENOMEM; @@ -469,16 +895,16 @@ static int qat_rsa_dec(struct akcipher_request *req) */ if (sg_is_last(req->src) && req->src_len == ctx->key_sz) { qat_req->src_align = NULL; - qat_req->in.dec.c = dma_map_single(dev, sg_virt(req->src), + qat_req->in.rsa.dec.c = dma_map_single(dev, sg_virt(req->src), req->dst_len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(dev, qat_req->in.dec.c))) + if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.dec.c))) return ret; } else { int shift = ctx->key_sz - req->src_len; qat_req->src_align = dma_zalloc_coherent(dev, ctx->key_sz, - &qat_req->in.dec.c, + &qat_req->in.rsa.dec.c, GFP_KERNEL); if (unlikely(!qat_req->src_align)) return ret; @@ -488,16 +914,16 @@ static int qat_rsa_dec(struct akcipher_request *req) } if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) { qat_req->dst_align = NULL; - qat_req->out.dec.m = dma_map_single(dev, sg_virt(req->dst), + qat_req->out.rsa.dec.m = dma_map_single(dev, sg_virt(req->dst), req->dst_len, DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(dev, qat_req->out.dec.m))) + if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.dec.m))) goto unmap_src; } else { qat_req->dst_align = dma_zalloc_coherent(dev, ctx->key_sz, - &qat_req->out.dec.m, + &qat_req->out.rsa.dec.m, GFP_KERNEL); if (unlikely(!qat_req->dst_align)) goto unmap_src; @@ -505,17 +931,17 @@ static int qat_rsa_dec(struct akcipher_request *req) } if (ctx->crt_mode) - qat_req->in.in_tab[6] = 0; + qat_req->in.rsa.in_tab[6] = 0; else - qat_req->in.in_tab[3] = 0; - qat_req->out.out_tab[1] = 0; - qat_req->phy_in = dma_map_single(dev, &qat_req->in.dec.c, + qat_req->in.rsa.in_tab[3] = 0; + qat_req->out.rsa.out_tab[1] = 0; + qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.dec.c, sizeof(struct qat_rsa_input_params), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, qat_req->phy_in))) goto unmap_dst; - qat_req->phy_out = dma_map_single(dev, &qat_req->out.dec.m, + qat_req->phy_out = dma_map_single(dev, &qat_req->out.rsa.dec.m, sizeof(struct qat_rsa_output_params), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, qat_req->phy_out))) @@ -523,7 +949,7 @@ static int qat_rsa_dec(struct akcipher_request *req) msg->pke_mid.src_data_addr = qat_req->phy_in; msg->pke_mid.dest_data_addr = qat_req->phy_out; - msg->pke_mid.opaque = (uint64_t)(__force long)req; + msg->pke_mid.opaque = (uint64_t)(__force long)qat_req; if (ctx->crt_mode) msg->input_param_count = 6; else @@ -549,19 +975,19 @@ static int qat_rsa_dec(struct akcipher_request *req) unmap_dst: if (qat_req->dst_align) dma_free_coherent(dev, ctx->key_sz, qat_req->dst_align, - qat_req->out.dec.m); + qat_req->out.rsa.dec.m); else - if (!dma_mapping_error(dev, qat_req->out.dec.m)) - dma_unmap_single(dev, qat_req->out.dec.m, ctx->key_sz, - DMA_FROM_DEVICE); + if (!dma_mapping_error(dev, qat_req->out.rsa.dec.m)) + dma_unmap_single(dev, qat_req->out.rsa.dec.m, + ctx->key_sz, DMA_FROM_DEVICE); unmap_src: if (qat_req->src_align) dma_free_coherent(dev, ctx->key_sz, qat_req->src_align, - qat_req->in.dec.c); + qat_req->in.rsa.dec.c); else - if (!dma_mapping_error(dev, qat_req->in.dec.c)) - dma_unmap_single(dev, qat_req->in.dec.c, ctx->key_sz, - DMA_TO_DEVICE); + if (!dma_mapping_error(dev, qat_req->in.rsa.dec.c)) + dma_unmap_single(dev, qat_req->in.rsa.dec.c, + ctx->key_sz, DMA_TO_DEVICE); return ret; } @@ -900,7 +1326,7 @@ static struct akcipher_alg rsa = { .max_size = qat_rsa_max_size, .init = qat_rsa_init_tfm, .exit = qat_rsa_exit_tfm, - .reqsize = sizeof(struct qat_rsa_request) + 64, + .reqsize = sizeof(struct qat_asym_request) + 64, .base = { .cra_name = "rsa", .cra_driver_name = "qat-rsa", @@ -910,6 +1336,23 @@ static struct akcipher_alg rsa = { }, }; +static struct kpp_alg dh = { + .set_secret = qat_dh_set_secret, + .generate_public_key = qat_dh_compute_value, + .compute_shared_secret = qat_dh_compute_value, + .max_size = qat_dh_max_size, + .init = qat_dh_init_tfm, + .exit = qat_dh_exit_tfm, + .reqsize = sizeof(struct qat_asym_request) + 64, + .base = { + .cra_name = "dh", + .cra_driver_name = "qat-dh", + .cra_priority = 1000, + .cra_module = THIS_MODULE, + .cra_ctxsize = sizeof(struct qat_dh_ctx), + }, +}; + int qat_asym_algs_register(void) { int ret = 0; @@ -918,7 +1361,11 @@ int qat_asym_algs_register(void) if (++active_devs == 1) { rsa.base.cra_flags = 0; ret = crypto_register_akcipher(&rsa); + if (ret) + goto unlock; + ret = crypto_register_kpp(&dh); } +unlock: mutex_unlock(&algs_lock); return ret; } @@ -926,7 +1373,9 @@ int qat_asym_algs_register(void) void qat_asym_algs_unregister(void) { mutex_lock(&algs_lock); - if (--active_devs == 0) + if (--active_devs == 0) { crypto_unregister_akcipher(&rsa); + crypto_unregister_kpp(&dh); + } mutex_unlock(&algs_lock); } From bd76ad4abfdccd26a9ac11214aa715e83bc8e808 Mon Sep 17 00:00:00 2001 From: Salvatore Benedetto Date: Thu, 7 Jul 2016 15:52:17 +0100 Subject: [PATCH 145/180] crypto: qat - Stop dropping leading zeros from RSA output There is not need to drop leading zeros from the RSA output operations results. Signed-off-by: Salvatore Benedetto Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_asym_algs.c | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index 3d56fb82f48a9..0d35dca2e925c 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -571,32 +571,12 @@ static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp) areq->dst_len = req->ctx.rsa->key_sz; if (req->dst_align) { - char *ptr = req->dst_align; - - while (!(*ptr) && areq->dst_len) { - areq->dst_len--; - ptr++; - } - - if (areq->dst_len != req->ctx.rsa->key_sz) - memmove(req->dst_align, ptr, areq->dst_len); - scatterwalk_map_and_copy(req->dst_align, areq->dst, 0, areq->dst_len, 1); dma_free_coherent(dev, req->ctx.rsa->key_sz, req->dst_align, req->out.rsa.enc.c); } else { - char *ptr = sg_virt(areq->dst); - - while (!(*ptr) && areq->dst_len) { - areq->dst_len--; - ptr++; - } - - if (sg_virt(areq->dst) != ptr && areq->dst_len) - memmove(sg_virt(areq->dst), ptr, areq->dst_len); - dma_unmap_single(dev, req->out.rsa.enc.c, req->ctx.rsa->key_sz, DMA_FROM_DEVICE); } From eb9bc8e7afaa9f062105dad55ec1c0663d961bb3 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Fri, 8 Jul 2016 09:28:03 -0700 Subject: [PATCH 146/180] crypto: sha-mb - Cleanup code to use || instead of | for condition comparison and cleanup multiline comment style In sha*_ctx_mgr_submit, we currently use the | operator instead of || ((ctx->partial_block_buffer_length) | (len < SHA1_BLOCK_SIZE)) Switching it to || and remove extraneous paranthesis to adhere to coding style. Also cleanup inconsistent multiline comment style. Signed-off-by: Tim Chen Signed-off-by: Herbert Xu --- arch/x86/crypto/sha1-mb/sha1_mb.c | 2 +- arch/x86/crypto/sha256-mb/sha256_mb.c | 11 +++++++---- arch/x86/crypto/sha512-mb/sha512_mb.c | 11 +++++++---- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/arch/x86/crypto/sha1-mb/sha1_mb.c b/arch/x86/crypto/sha1-mb/sha1_mb.c index 561b2867b8788..9e5b67127a094 100644 --- a/arch/x86/crypto/sha1-mb/sha1_mb.c +++ b/arch/x86/crypto/sha1-mb/sha1_mb.c @@ -304,7 +304,7 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr, * Or if the user's buffer contains less than a whole block, * append as much as possible to the extra block. */ - if ((ctx->partial_block_buffer_length) | (len < SHA1_BLOCK_SIZE)) { + if (ctx->partial_block_buffer_length || len < SHA1_BLOCK_SIZE) { /* * Compute how many bytes to copy from user buffer into * extra block diff --git a/arch/x86/crypto/sha256-mb/sha256_mb.c b/arch/x86/crypto/sha256-mb/sha256_mb.c index c9d5dcc81c96a..89fa85e8b10ca 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb.c +++ b/arch/x86/crypto/sha256-mb/sha256_mb.c @@ -283,7 +283,8 @@ static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr, ctx->incoming_buffer = buffer; ctx->incoming_buffer_length = len; - /* Store the user's request flags and mark this ctx as currently + /* + * Store the user's request flags and mark this ctx as currently * being processed. */ ctx->status = (flags & HASH_LAST) ? @@ -299,8 +300,9 @@ static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr, * Or if the user's buffer contains less than a whole block, * append as much as possible to the extra block. */ - if ((ctx->partial_block_buffer_length) | (len < SHA256_BLOCK_SIZE)) { - /* Compute how many bytes to copy from user buffer into + if (ctx->partial_block_buffer_length || len < SHA256_BLOCK_SIZE) { + /* + * Compute how many bytes to copy from user buffer into * extra block */ uint32_t copy_len = SHA256_BLOCK_SIZE - @@ -323,7 +325,8 @@ static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr, /* The extra block should never contain more than 1 block */ assert(ctx->partial_block_buffer_length <= SHA256_BLOCK_SIZE); - /* If the extra block buffer contains exactly 1 block, + /* + * If the extra block buffer contains exactly 1 block, * it can be hashed. */ if (ctx->partial_block_buffer_length >= SHA256_BLOCK_SIZE) { diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c index 676f0f272f2b5..f4cf5b78fd360 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb.c +++ b/arch/x86/crypto/sha512-mb/sha512_mb.c @@ -253,7 +253,8 @@ static struct sha512_hash_ctx int flags) { if (flags & (~HASH_ENTIRE)) { - /* User should not pass anything other than FIRST, UPDATE, or + /* + * User should not pass anything other than FIRST, UPDATE, or * LAST */ ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; @@ -284,7 +285,8 @@ static struct sha512_hash_ctx ctx->partial_block_buffer_length = 0; } - /* If we made it here, there were no errors during this call to + /* + * If we made it here, there were no errors during this call to * submit */ ctx->error = HASH_CTX_ERROR_NONE; @@ -293,7 +295,8 @@ static struct sha512_hash_ctx ctx->incoming_buffer = buffer; ctx->incoming_buffer_length = len; - /* Store the user's request flags and mark this ctx as currently being + /* + * Store the user's request flags and mark this ctx as currently being * processed. */ ctx->status = (flags & HASH_LAST) ? @@ -309,7 +312,7 @@ static struct sha512_hash_ctx * Or if the user's buffer contains less than a whole block, * append as much as possible to the extra block. */ - if ((ctx->partial_block_buffer_length) | (len < SHA512_BLOCK_SIZE)) { + if (ctx->partial_block_buffer_length || len < SHA512_BLOCK_SIZE) { /* Compute how many bytes to copy from user buffer into extra * block */ From 4e6c3df4d729f85997cbf276bfa8ffd8579b8e77 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:31 +0800 Subject: [PATCH 147/180] crypto: skcipher - Add low-level skcipher interface This patch allows skcipher algorithms and instances to be created and registered with the crypto API. They are accessible through the top-level skcipher interface, along with ablkcipher/blkcipher algorithms and instances. This patch also introduces a new parameter called chunk size which is meant for ciphers such as CTR and CTS which ostensibly can handle arbitrary lengths, but still behave like block ciphers in that you can only process a partial block at the very end. For these ciphers the block size will continue to be set to 1 as it is now while the chunk size will be set to the underlying block size. Signed-off-by: Herbert Xu --- crypto/skcipher.c | 196 +++++++++++++++++++++++++++-- include/crypto/internal/skcipher.h | 87 +++++++++++++ include/crypto/skcipher.h | 130 +++++++++++++++++++ include/linux/crypto.h | 1 + 4 files changed, 407 insertions(+), 7 deletions(-) diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 69230e9d4ac99..d248008e7f7bc 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -16,7 +16,11 @@ #include #include +#include #include +#include +#include +#include #include "internal.h" @@ -25,10 +29,11 @@ static unsigned int crypto_skcipher_extsize(struct crypto_alg *alg) if (alg->cra_type == &crypto_blkcipher_type) return sizeof(struct crypto_blkcipher *); - BUG_ON(alg->cra_type != &crypto_ablkcipher_type && - alg->cra_type != &crypto_givcipher_type); + if (alg->cra_type == &crypto_ablkcipher_type || + alg->cra_type == &crypto_givcipher_type) + return sizeof(struct crypto_ablkcipher *); - return sizeof(struct crypto_ablkcipher *); + return crypto_alg_extsize(alg); } static int skcipher_setkey_blkcipher(struct crypto_skcipher *tfm, @@ -216,26 +221,118 @@ static int crypto_init_skcipher_ops_ablkcipher(struct crypto_tfm *tfm) return 0; } +static void crypto_skcipher_exit_tfm(struct crypto_tfm *tfm) +{ + struct crypto_skcipher *skcipher = __crypto_skcipher_cast(tfm); + struct skcipher_alg *alg = crypto_skcipher_alg(skcipher); + + alg->exit(skcipher); +} + static int crypto_skcipher_init_tfm(struct crypto_tfm *tfm) { + struct crypto_skcipher *skcipher = __crypto_skcipher_cast(tfm); + struct skcipher_alg *alg = crypto_skcipher_alg(skcipher); + if (tfm->__crt_alg->cra_type == &crypto_blkcipher_type) return crypto_init_skcipher_ops_blkcipher(tfm); - BUG_ON(tfm->__crt_alg->cra_type != &crypto_ablkcipher_type && - tfm->__crt_alg->cra_type != &crypto_givcipher_type); + if (tfm->__crt_alg->cra_type == &crypto_ablkcipher_type || + tfm->__crt_alg->cra_type == &crypto_givcipher_type) + return crypto_init_skcipher_ops_ablkcipher(tfm); + + skcipher->setkey = alg->setkey; + skcipher->encrypt = alg->encrypt; + skcipher->decrypt = alg->decrypt; + skcipher->ivsize = alg->ivsize; + skcipher->keysize = alg->max_keysize; + + if (alg->exit) + skcipher->base.exit = crypto_skcipher_exit_tfm; - return crypto_init_skcipher_ops_ablkcipher(tfm); + if (alg->init) + return alg->init(skcipher); + + return 0; +} + +static void crypto_skcipher_free_instance(struct crypto_instance *inst) +{ + struct skcipher_instance *skcipher = + container_of(inst, struct skcipher_instance, s.base); + + skcipher->free(skcipher); +} + +static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg) + __attribute__ ((unused)); +static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg) +{ + struct skcipher_alg *skcipher = container_of(alg, struct skcipher_alg, + base); + + seq_printf(m, "type : skcipher\n"); + seq_printf(m, "async : %s\n", + alg->cra_flags & CRYPTO_ALG_ASYNC ? "yes" : "no"); + seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); + seq_printf(m, "min keysize : %u\n", skcipher->min_keysize); + seq_printf(m, "max keysize : %u\n", skcipher->max_keysize); + seq_printf(m, "ivsize : %u\n", skcipher->ivsize); + seq_printf(m, "chunksize : %u\n", skcipher->chunksize); } +#ifdef CONFIG_NET +static int crypto_skcipher_report(struct sk_buff *skb, struct crypto_alg *alg) +{ + struct crypto_report_blkcipher rblkcipher; + struct skcipher_alg *skcipher = container_of(alg, struct skcipher_alg, + base); + + strncpy(rblkcipher.type, "skcipher", sizeof(rblkcipher.type)); + strncpy(rblkcipher.geniv, "", sizeof(rblkcipher.geniv)); + + rblkcipher.blocksize = alg->cra_blocksize; + rblkcipher.min_keysize = skcipher->min_keysize; + rblkcipher.max_keysize = skcipher->max_keysize; + rblkcipher.ivsize = skcipher->ivsize; + + if (nla_put(skb, CRYPTOCFGA_REPORT_BLKCIPHER, + sizeof(struct crypto_report_blkcipher), &rblkcipher)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} +#else +static int crypto_skcipher_report(struct sk_buff *skb, struct crypto_alg *alg) +{ + return -ENOSYS; +} +#endif + static const struct crypto_type crypto_skcipher_type2 = { .extsize = crypto_skcipher_extsize, .init_tfm = crypto_skcipher_init_tfm, + .free = crypto_skcipher_free_instance, +#ifdef CONFIG_PROC_FS + .show = crypto_skcipher_show, +#endif + .report = crypto_skcipher_report, .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_BLKCIPHER_MASK, - .type = CRYPTO_ALG_TYPE_BLKCIPHER, + .type = CRYPTO_ALG_TYPE_SKCIPHER, .tfmsize = offsetof(struct crypto_skcipher, base), }; +int crypto_grab_skcipher2(struct crypto_skcipher_spawn *spawn, + const char *name, u32 type, u32 mask) +{ + spawn->base.frontend = &crypto_skcipher_type2; + return crypto_grab_spawn(&spawn->base, name, type, mask); +} +EXPORT_SYMBOL_GPL(crypto_grab_skcipher2); + struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name, u32 type, u32 mask) { @@ -243,5 +340,90 @@ struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name, } EXPORT_SYMBOL_GPL(crypto_alloc_skcipher); +int crypto_has_skcipher2(const char *alg_name, u32 type, u32 mask) +{ + return crypto_type_has_alg(alg_name, &crypto_skcipher_type2, + type, mask); +} +EXPORT_SYMBOL_GPL(crypto_has_skcipher2); + +static int skcipher_prepare_alg(struct skcipher_alg *alg) +{ + struct crypto_alg *base = &alg->base; + + if (alg->ivsize > PAGE_SIZE / 8 || alg->chunksize > PAGE_SIZE / 8) + return -EINVAL; + + if (!alg->chunksize) + alg->chunksize = base->cra_blocksize; + + base->cra_type = &crypto_skcipher_type2; + base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; + base->cra_flags |= CRYPTO_ALG_TYPE_SKCIPHER; + + return 0; +} + +int crypto_register_skcipher(struct skcipher_alg *alg) +{ + struct crypto_alg *base = &alg->base; + int err; + + err = skcipher_prepare_alg(alg); + if (err) + return err; + + return crypto_register_alg(base); +} +EXPORT_SYMBOL_GPL(crypto_register_skcipher); + +void crypto_unregister_skcipher(struct skcipher_alg *alg) +{ + crypto_unregister_alg(&alg->base); +} +EXPORT_SYMBOL_GPL(crypto_unregister_skcipher); + +int crypto_register_skciphers(struct skcipher_alg *algs, int count) +{ + int i, ret; + + for (i = 0; i < count; i++) { + ret = crypto_register_skcipher(&algs[i]); + if (ret) + goto err; + } + + return 0; + +err: + for (--i; i >= 0; --i) + crypto_unregister_skcipher(&algs[i]); + + return ret; +} +EXPORT_SYMBOL_GPL(crypto_register_skciphers); + +void crypto_unregister_skciphers(struct skcipher_alg *algs, int count) +{ + int i; + + for (i = count - 1; i >= 0; --i) + crypto_unregister_skcipher(&algs[i]); +} +EXPORT_SYMBOL_GPL(crypto_unregister_skciphers); + +int skcipher_register_instance(struct crypto_template *tmpl, + struct skcipher_instance *inst) +{ + int err; + + err = skcipher_prepare_alg(&inst->alg); + if (err) + return err; + + return crypto_register_instance(tmpl, skcipher_crypto_instance(inst)); +} +EXPORT_SYMBOL_GPL(skcipher_register_instance); + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Symmetric key cipher type"); diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h index 2cf7a61ece59e..ce6619c339fee 100644 --- a/include/crypto/internal/skcipher.h +++ b/include/crypto/internal/skcipher.h @@ -19,12 +19,46 @@ struct rtattr; +struct skcipher_instance { + void (*free)(struct skcipher_instance *inst); + union { + struct { + char head[offsetof(struct skcipher_alg, base)]; + struct crypto_instance base; + } s; + struct skcipher_alg alg; + }; +}; + struct crypto_skcipher_spawn { struct crypto_spawn base; }; extern const struct crypto_type crypto_givcipher_type; +static inline struct crypto_instance *skcipher_crypto_instance( + struct skcipher_instance *inst) +{ + return &inst->s.base; +} + +static inline struct skcipher_instance *skcipher_alg_instance( + struct crypto_skcipher *skcipher) +{ + return container_of(crypto_skcipher_alg(skcipher), + struct skcipher_instance, alg); +} + +static inline void *skcipher_instance_ctx(struct skcipher_instance *inst) +{ + return crypto_instance_ctx(skcipher_crypto_instance(inst)); +} + +static inline void skcipher_request_complete(struct skcipher_request *req, int err) +{ + req->base.complete(&req->base, err); +} + static inline void crypto_set_skcipher_spawn( struct crypto_skcipher_spawn *spawn, struct crypto_instance *inst) { @@ -33,6 +67,8 @@ static inline void crypto_set_skcipher_spawn( int crypto_grab_skcipher(struct crypto_skcipher_spawn *spawn, const char *name, u32 type, u32 mask); +int crypto_grab_skcipher2(struct crypto_skcipher_spawn *spawn, + const char *name, u32 type, u32 mask); struct crypto_alg *crypto_lookup_skcipher(const char *name, u32 type, u32 mask); @@ -47,6 +83,12 @@ static inline struct crypto_alg *crypto_skcipher_spawn_alg( return spawn->base.alg; } +static inline struct skcipher_alg *crypto_spawn_skcipher_alg( + struct crypto_skcipher_spawn *spawn) +{ + return container_of(spawn->base.alg, struct skcipher_alg, base); +} + static inline struct crypto_ablkcipher *crypto_spawn_skcipher( struct crypto_skcipher_spawn *spawn) { @@ -55,6 +97,25 @@ static inline struct crypto_ablkcipher *crypto_spawn_skcipher( crypto_skcipher_mask(0))); } +static inline struct crypto_skcipher *crypto_spawn_skcipher2( + struct crypto_skcipher_spawn *spawn) +{ + return crypto_spawn_tfm2(&spawn->base); +} + +static inline void crypto_skcipher_set_reqsize( + struct crypto_skcipher *skcipher, unsigned int reqsize) +{ + skcipher->reqsize = reqsize; +} + +int crypto_register_skcipher(struct skcipher_alg *alg); +void crypto_unregister_skcipher(struct skcipher_alg *alg); +int crypto_register_skciphers(struct skcipher_alg *algs, int count); +void crypto_unregister_skciphers(struct skcipher_alg *algs, int count); +int skcipher_register_instance(struct crypto_template *tmpl, + struct skcipher_instance *inst); + int skcipher_null_givencrypt(struct skcipher_givcrypt_request *req); int skcipher_null_givdecrypt(struct skcipher_givcrypt_request *req); const char *crypto_default_geniv(const struct crypto_alg *alg); @@ -122,5 +183,31 @@ static inline u32 skcipher_request_flags(struct skcipher_request *req) return req->base.flags; } +static inline unsigned int crypto_skcipher_alg_min_keysize( + struct skcipher_alg *alg) +{ + if ((alg->base.cra_flags & CRYPTO_ALG_TYPE_MASK) == + CRYPTO_ALG_TYPE_BLKCIPHER) + return alg->base.cra_blkcipher.min_keysize; + + if (alg->base.cra_ablkcipher.encrypt) + return alg->base.cra_ablkcipher.min_keysize; + + return alg->min_keysize; +} + +static inline unsigned int crypto_skcipher_alg_max_keysize( + struct skcipher_alg *alg) +{ + if ((alg->base.cra_flags & CRYPTO_ALG_TYPE_MASK) == + CRYPTO_ALG_TYPE_BLKCIPHER) + return alg->base.cra_blkcipher.max_keysize; + + if (alg->base.cra_ablkcipher.encrypt) + return alg->base.cra_ablkcipher.max_keysize; + + return alg->max_keysize; +} + #endif /* _CRYPTO_INTERNAL_SKCIPHER_H */ diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h index 0f987f50bb52c..a381f57ea6952 100644 --- a/include/crypto/skcipher.h +++ b/include/crypto/skcipher.h @@ -65,6 +65,75 @@ struct crypto_skcipher { struct crypto_tfm base; }; +/** + * struct skcipher_alg - symmetric key cipher definition + * @min_keysize: Minimum key size supported by the transformation. This is the + * smallest key length supported by this transformation algorithm. + * This must be set to one of the pre-defined values as this is + * not hardware specific. Possible values for this field can be + * found via git grep "_MIN_KEY_SIZE" include/crypto/ + * @max_keysize: Maximum key size supported by the transformation. This is the + * largest key length supported by this transformation algorithm. + * This must be set to one of the pre-defined values as this is + * not hardware specific. Possible values for this field can be + * found via git grep "_MAX_KEY_SIZE" include/crypto/ + * @setkey: Set key for the transformation. This function is used to either + * program a supplied key into the hardware or store the key in the + * transformation context for programming it later. Note that this + * function does modify the transformation context. This function can + * be called multiple times during the existence of the transformation + * object, so one must make sure the key is properly reprogrammed into + * the hardware. This function is also responsible for checking the key + * length for validity. In case a software fallback was put in place in + * the @cra_init call, this function might need to use the fallback if + * the algorithm doesn't support all of the key sizes. + * @encrypt: Encrypt a scatterlist of blocks. This function is used to encrypt + * the supplied scatterlist containing the blocks of data. The crypto + * API consumer is responsible for aligning the entries of the + * scatterlist properly and making sure the chunks are correctly + * sized. In case a software fallback was put in place in the + * @cra_init call, this function might need to use the fallback if + * the algorithm doesn't support all of the key sizes. In case the + * key was stored in transformation context, the key might need to be + * re-programmed into the hardware in this function. This function + * shall not modify the transformation context, as this function may + * be called in parallel with the same transformation object. + * @decrypt: Decrypt a single block. This is a reverse counterpart to @encrypt + * and the conditions are exactly the same. + * @init: Initialize the cryptographic transformation object. This function + * is used to initialize the cryptographic transformation object. + * This function is called only once at the instantiation time, right + * after the transformation context was allocated. In case the + * cryptographic hardware has some special requirements which need to + * be handled by software, this function shall check for the precise + * requirement of the transformation and put any software fallbacks + * in place. + * @exit: Deinitialize the cryptographic transformation object. This is a + * counterpart to @init, used to remove various changes set in + * @init. + * @ivsize: IV size applicable for transformation. The consumer must provide an + * IV of exactly that size to perform the encrypt or decrypt operation. + * @chunksize: Equal to the block size except for stream ciphers such as + * CTR where it is set to the underlying block size. + * + * All fields except @ivsize are mandatory and must be filled. + */ +struct skcipher_alg { + int (*setkey)(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen); + int (*encrypt)(struct skcipher_request *req); + int (*decrypt)(struct skcipher_request *req); + int (*init)(struct crypto_skcipher *tfm); + void (*exit)(struct crypto_skcipher *tfm); + + unsigned int min_keysize; + unsigned int max_keysize; + unsigned int ivsize; + unsigned int chunksize; + + struct crypto_alg base; +}; + #define SKCIPHER_REQUEST_ON_STACK(name, tfm) \ char __##name##_desc[sizeof(struct skcipher_request) + \ crypto_skcipher_reqsize(tfm)] CRYPTO_MINALIGN_ATTR; \ @@ -231,12 +300,43 @@ static inline int crypto_has_skcipher(const char *alg_name, u32 type, crypto_skcipher_mask(mask)); } +/** + * crypto_has_skcipher2() - Search for the availability of an skcipher. + * @alg_name: is the cra_name / name or cra_driver_name / driver name of the + * skcipher + * @type: specifies the type of the skcipher + * @mask: specifies the mask for the skcipher + * + * Return: true when the skcipher is known to the kernel crypto API; false + * otherwise + */ +int crypto_has_skcipher2(const char *alg_name, u32 type, u32 mask); + static inline const char *crypto_skcipher_driver_name( struct crypto_skcipher *tfm) { return crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)); } +static inline struct skcipher_alg *crypto_skcipher_alg( + struct crypto_skcipher *tfm) +{ + return container_of(crypto_skcipher_tfm(tfm)->__crt_alg, + struct skcipher_alg, base); +} + +static inline unsigned int crypto_skcipher_alg_ivsize(struct skcipher_alg *alg) +{ + if ((alg->base.cra_flags & CRYPTO_ALG_TYPE_MASK) == + CRYPTO_ALG_TYPE_BLKCIPHER) + return alg->base.cra_blkcipher.ivsize; + + if (alg->base.cra_ablkcipher.encrypt) + return alg->base.cra_ablkcipher.ivsize; + + return alg->ivsize; +} + /** * crypto_skcipher_ivsize() - obtain IV size * @tfm: cipher handle @@ -251,6 +351,36 @@ static inline unsigned int crypto_skcipher_ivsize(struct crypto_skcipher *tfm) return tfm->ivsize; } +static inline unsigned int crypto_skcipher_alg_chunksize( + struct skcipher_alg *alg) +{ + if ((alg->base.cra_flags & CRYPTO_ALG_TYPE_MASK) == + CRYPTO_ALG_TYPE_BLKCIPHER) + return alg->base.cra_blocksize; + + if (alg->base.cra_ablkcipher.encrypt) + return alg->base.cra_blocksize; + + return alg->chunksize; +} + +/** + * crypto_skcipher_chunksize() - obtain chunk size + * @tfm: cipher handle + * + * The block size is set to one for ciphers such as CTR. However, + * you still need to provide incremental updates in multiples of + * the underlying block size as the IV does not have sub-block + * granularity. This is known in this API as the chunk size. + * + * Return: chunk size in bytes + */ +static inline unsigned int crypto_skcipher_chunksize( + struct crypto_skcipher *tfm) +{ + return crypto_skcipher_alg_chunksize(crypto_skcipher_alg(tfm)); +} + /** * crypto_skcipher_blocksize() - obtain block size of cipher * @tfm: cipher handle diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 992cfc2e5df1d..37a652d1639d3 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -47,6 +47,7 @@ #define CRYPTO_ALG_TYPE_AEAD 0x00000003 #define CRYPTO_ALG_TYPE_BLKCIPHER 0x00000004 #define CRYPTO_ALG_TYPE_ABLKCIPHER 0x00000005 +#define CRYPTO_ALG_TYPE_SKCIPHER 0x00000005 #define CRYPTO_ALG_TYPE_GIVCIPHER 0x00000006 #define CRYPTO_ALG_TYPE_KPP 0x00000008 #define CRYPTO_ALG_TYPE_RNG 0x0000000c From a0129733a3f5eaa973c1ea76848d905b851548f1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:32 +0800 Subject: [PATCH 148/180] crypto: null - Add new default null skcipher Current the default null skcipher is actually a crypto_blkcipher. This patch creates a synchronous crypto_skcipher version of the null cipher which unfortunately has to settle for the name skcipher2. Signed-off-by: Herbert Xu --- crypto/crypto_null.c | 38 ++++++++++++++++++++++++++++++++++++++ include/crypto/null.h | 2 ++ 2 files changed, 40 insertions(+) diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c index 941c9a434d50f..c3f683910d072 100644 --- a/crypto/crypto_null.c +++ b/crypto/crypto_null.c @@ -28,6 +28,8 @@ static DEFINE_MUTEX(crypto_default_null_skcipher_lock); static struct crypto_blkcipher *crypto_default_null_skcipher; static int crypto_default_null_skcipher_refcnt; +static struct crypto_skcipher *crypto_default_null_skcipher2; +static int crypto_default_null_skcipher2_refcnt; static int null_compress(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen) @@ -188,6 +190,42 @@ void crypto_put_default_null_skcipher(void) } EXPORT_SYMBOL_GPL(crypto_put_default_null_skcipher); +struct crypto_skcipher *crypto_get_default_null_skcipher2(void) +{ + struct crypto_skcipher *tfm; + + mutex_lock(&crypto_default_null_skcipher_lock); + tfm = crypto_default_null_skcipher2; + + if (!tfm) { + tfm = crypto_alloc_skcipher("ecb(cipher_null)", + 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + goto unlock; + + crypto_default_null_skcipher2 = tfm; + } + + crypto_default_null_skcipher2_refcnt++; + +unlock: + mutex_unlock(&crypto_default_null_skcipher_lock); + + return tfm; +} +EXPORT_SYMBOL_GPL(crypto_get_default_null_skcipher2); + +void crypto_put_default_null_skcipher2(void) +{ + mutex_lock(&crypto_default_null_skcipher_lock); + if (!--crypto_default_null_skcipher2_refcnt) { + crypto_free_skcipher(crypto_default_null_skcipher2); + crypto_default_null_skcipher2 = NULL; + } + mutex_unlock(&crypto_default_null_skcipher_lock); +} +EXPORT_SYMBOL_GPL(crypto_put_default_null_skcipher2); + static int __init crypto_null_mod_init(void) { int ret = 0; diff --git a/include/crypto/null.h b/include/crypto/null.h index 06dc30d9f56e9..dda87cbb62b2a 100644 --- a/include/crypto/null.h +++ b/include/crypto/null.h @@ -10,5 +10,7 @@ struct crypto_blkcipher *crypto_get_default_null_skcipher(void); void crypto_put_default_null_skcipher(void); +struct crypto_skcipher *crypto_get_default_null_skcipher2(void); +void crypto_put_default_null_skcipher2(void); #endif From 7a530aa9cf3f282b6b123b9296dfce80ed8f068e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:33 +0800 Subject: [PATCH 149/180] crypto: aead - Add chunk size This patch adds a chunk size parameter to aead algorithms, just like the chunk size for skcipher algorithms. However, unlike skcipher we do not currently export this to AEAD users. It is only meant to be used by AEAD implementors for now. Signed-off-by: Herbert Xu --- crypto/aead.c | 6 +++++- include/crypto/aead.h | 12 +++++++----- include/crypto/internal/aead.h | 21 +++++++++++++++++++++ 3 files changed, 33 insertions(+), 6 deletions(-) diff --git a/crypto/aead.c b/crypto/aead.c index 9b18a1e40d6af..b155cbc3a0dd9 100644 --- a/crypto/aead.c +++ b/crypto/aead.c @@ -346,9 +346,13 @@ static int aead_prepare_alg(struct aead_alg *alg) { struct crypto_alg *base = &alg->base; - if (max(alg->maxauthsize, alg->ivsize) > PAGE_SIZE / 8) + if (max3(alg->maxauthsize, alg->ivsize, alg->chunksize) > + PAGE_SIZE / 8) return -EINVAL; + if (!alg->chunksize) + alg->chunksize = base->cra_blocksize; + base->cra_type = &crypto_aead_type; base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; base->cra_flags |= CRYPTO_ALG_TYPE_AEAD; diff --git a/include/crypto/aead.h b/include/crypto/aead.h index 75174f80a1063..12f84327ca366 100644 --- a/include/crypto/aead.h +++ b/include/crypto/aead.h @@ -112,11 +112,12 @@ struct aead_request { * supplied during the decryption operation. This function is also * responsible for checking the authentication tag size for * validity. - * @setkey: see struct ablkcipher_alg - * @encrypt: see struct ablkcipher_alg - * @decrypt: see struct ablkcipher_alg - * @geniv: see struct ablkcipher_alg - * @ivsize: see struct ablkcipher_alg + * @setkey: see struct skcipher_alg + * @encrypt: see struct skcipher_alg + * @decrypt: see struct skcipher_alg + * @geniv: see struct skcipher_alg + * @ivsize: see struct skcipher_alg + * @chunksize: see struct skcipher_alg * @init: Initialize the cryptographic transformation object. This function * is used to initialize the cryptographic transformation object. * This function is called only once at the instantiation time, right @@ -145,6 +146,7 @@ struct aead_alg { unsigned int ivsize; unsigned int maxauthsize; + unsigned int chunksize; struct crypto_alg base; }; diff --git a/include/crypto/internal/aead.h b/include/crypto/internal/aead.h index da3864991d4c5..6ad8e31d38682 100644 --- a/include/crypto/internal/aead.h +++ b/include/crypto/internal/aead.h @@ -159,6 +159,27 @@ static inline struct aead_request *aead_get_backlog(struct aead_queue *queue) return req ? container_of(req, struct aead_request, base) : NULL; } +static inline unsigned int crypto_aead_alg_chunksize(struct aead_alg *alg) +{ + return alg->chunksize; +} + +/** + * crypto_aead_chunksize() - obtain chunk size + * @tfm: cipher handle + * + * The block size is set to one for ciphers such as CCM. However, + * you still need to provide incremental updates in multiples of + * the underlying block size as the IV does not have sub-block + * granularity. This is known in this API as the chunk size. + * + * Return: chunk size in bytes + */ +static inline unsigned int crypto_aead_chunksize(struct crypto_aead *tfm) +{ + return crypto_aead_alg_chunksize(crypto_aead_alg(tfm)); +} + int crypto_register_aead(struct aead_alg *alg); void crypto_unregister_aead(struct aead_alg *alg); int crypto_register_aeads(struct aead_alg *algs, int count); From 7217d49f1684b8cd8f6e0e9010efb49f81787cd1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:34 +0800 Subject: [PATCH 150/180] crypto: authenc - Use skcipher This patch converts authenc to use the new skcipher interface as opposed to ablkcipher. It also fixes a little bug where if a sync version of authenc is requested we may still end up using an async ahash. This should have no effect as none of the authenc users can request for a sync authenc. Signed-off-by: Herbert Xu --- crypto/authenc.c | 107 +++++++++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 51 deletions(-) diff --git a/crypto/authenc.c b/crypto/authenc.c index 309fbc17222d0..a7e1ac786c5db 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -32,8 +32,8 @@ struct authenc_instance_ctx { struct crypto_authenc_ctx { struct crypto_ahash *auth; - struct crypto_ablkcipher *enc; - struct crypto_blkcipher *null; + struct crypto_skcipher *enc; + struct crypto_skcipher *null; }; struct authenc_request_ctx { @@ -83,7 +83,7 @@ static int crypto_authenc_setkey(struct crypto_aead *authenc, const u8 *key, { struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc); struct crypto_ahash *auth = ctx->auth; - struct crypto_ablkcipher *enc = ctx->enc; + struct crypto_skcipher *enc = ctx->enc; struct crypto_authenc_keys keys; int err = -EINVAL; @@ -100,11 +100,11 @@ static int crypto_authenc_setkey(struct crypto_aead *authenc, const u8 *key, if (err) goto out; - crypto_ablkcipher_clear_flags(enc, CRYPTO_TFM_REQ_MASK); - crypto_ablkcipher_set_flags(enc, crypto_aead_get_flags(authenc) & - CRYPTO_TFM_REQ_MASK); - err = crypto_ablkcipher_setkey(enc, keys.enckey, keys.enckeylen); - crypto_aead_set_flags(authenc, crypto_ablkcipher_get_flags(enc) & + crypto_skcipher_clear_flags(enc, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(enc, crypto_aead_get_flags(authenc) & + CRYPTO_TFM_REQ_MASK); + err = crypto_skcipher_setkey(enc, keys.enckey, keys.enckeylen); + crypto_aead_set_flags(authenc, crypto_skcipher_get_flags(enc) & CRYPTO_TFM_RES_MASK); out: @@ -184,12 +184,15 @@ static int crypto_authenc_copy_assoc(struct aead_request *req) { struct crypto_aead *authenc = crypto_aead_reqtfm(req); struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc); - struct blkcipher_desc desc = { - .tfm = ctx->null, - }; + SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null); - return crypto_blkcipher_encrypt(&desc, req->dst, req->src, - req->assoclen); + skcipher_request_set_tfm(skreq, ctx->null); + skcipher_request_set_callback(skreq, aead_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(skreq, req->src, req->dst, req->assoclen, + NULL); + + return crypto_skcipher_encrypt(skreq); } static int crypto_authenc_encrypt(struct aead_request *req) @@ -199,10 +202,10 @@ static int crypto_authenc_encrypt(struct aead_request *req) struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc); struct authenc_instance_ctx *ictx = aead_instance_ctx(inst); struct authenc_request_ctx *areq_ctx = aead_request_ctx(req); - struct crypto_ablkcipher *enc = ctx->enc; + struct crypto_skcipher *enc = ctx->enc; unsigned int cryptlen = req->cryptlen; - struct ablkcipher_request *abreq = (void *)(areq_ctx->tail + - ictx->reqoff); + struct skcipher_request *skreq = (void *)(areq_ctx->tail + + ictx->reqoff); struct scatterlist *src, *dst; int err; @@ -217,12 +220,12 @@ static int crypto_authenc_encrypt(struct aead_request *req) dst = scatterwalk_ffwd(areq_ctx->dst, req->dst, req->assoclen); } - ablkcipher_request_set_tfm(abreq, enc); - ablkcipher_request_set_callback(abreq, aead_request_flags(req), - crypto_authenc_encrypt_done, req); - ablkcipher_request_set_crypt(abreq, src, dst, cryptlen, req->iv); + skcipher_request_set_tfm(skreq, enc); + skcipher_request_set_callback(skreq, aead_request_flags(req), + crypto_authenc_encrypt_done, req); + skcipher_request_set_crypt(skreq, src, dst, cryptlen, req->iv); - err = crypto_ablkcipher_encrypt(abreq); + err = crypto_skcipher_encrypt(skreq); if (err) return err; @@ -238,8 +241,8 @@ static int crypto_authenc_decrypt_tail(struct aead_request *req, struct authenc_instance_ctx *ictx = aead_instance_ctx(inst); struct authenc_request_ctx *areq_ctx = aead_request_ctx(req); struct ahash_request *ahreq = (void *)(areq_ctx->tail + ictx->reqoff); - struct ablkcipher_request *abreq = (void *)(areq_ctx->tail + - ictx->reqoff); + struct skcipher_request *skreq = (void *)(areq_ctx->tail + + ictx->reqoff); unsigned int authsize = crypto_aead_authsize(authenc); u8 *ihash = ahreq->result + authsize; struct scatterlist *src, *dst; @@ -255,13 +258,13 @@ static int crypto_authenc_decrypt_tail(struct aead_request *req, if (req->src != req->dst) dst = scatterwalk_ffwd(areq_ctx->dst, req->dst, req->assoclen); - ablkcipher_request_set_tfm(abreq, ctx->enc); - ablkcipher_request_set_callback(abreq, aead_request_flags(req), - req->base.complete, req->base.data); - ablkcipher_request_set_crypt(abreq, src, dst, - req->cryptlen - authsize, req->iv); + skcipher_request_set_tfm(skreq, ctx->enc); + skcipher_request_set_callback(skreq, aead_request_flags(req), + req->base.complete, req->base.data); + skcipher_request_set_crypt(skreq, src, dst, + req->cryptlen - authsize, req->iv); - return crypto_ablkcipher_decrypt(abreq); + return crypto_skcipher_decrypt(skreq); } static void authenc_verify_ahash_done(struct crypto_async_request *areq, @@ -313,20 +316,20 @@ static int crypto_authenc_init_tfm(struct crypto_aead *tfm) struct authenc_instance_ctx *ictx = aead_instance_ctx(inst); struct crypto_authenc_ctx *ctx = crypto_aead_ctx(tfm); struct crypto_ahash *auth; - struct crypto_ablkcipher *enc; - struct crypto_blkcipher *null; + struct crypto_skcipher *enc; + struct crypto_skcipher *null; int err; auth = crypto_spawn_ahash(&ictx->auth); if (IS_ERR(auth)) return PTR_ERR(auth); - enc = crypto_spawn_skcipher(&ictx->enc); + enc = crypto_spawn_skcipher2(&ictx->enc); err = PTR_ERR(enc); if (IS_ERR(enc)) goto err_free_ahash; - null = crypto_get_default_null_skcipher(); + null = crypto_get_default_null_skcipher2(); err = PTR_ERR(null); if (IS_ERR(null)) goto err_free_skcipher; @@ -342,13 +345,13 @@ static int crypto_authenc_init_tfm(struct crypto_aead *tfm) max_t(unsigned int, crypto_ahash_reqsize(auth) + sizeof(struct ahash_request), - sizeof(struct ablkcipher_request) + - crypto_ablkcipher_reqsize(enc))); + sizeof(struct skcipher_request) + + crypto_skcipher_reqsize(enc))); return 0; err_free_skcipher: - crypto_free_ablkcipher(enc); + crypto_free_skcipher(enc); err_free_ahash: crypto_free_ahash(auth); return err; @@ -359,8 +362,8 @@ static void crypto_authenc_exit_tfm(struct crypto_aead *tfm) struct crypto_authenc_ctx *ctx = crypto_aead_ctx(tfm); crypto_free_ahash(ctx->auth); - crypto_free_ablkcipher(ctx->enc); - crypto_put_default_null_skcipher(); + crypto_free_skcipher(ctx->enc); + crypto_put_default_null_skcipher2(); } static void crypto_authenc_free(struct aead_instance *inst) @@ -379,7 +382,7 @@ static int crypto_authenc_create(struct crypto_template *tmpl, struct aead_instance *inst; struct hash_alg_common *auth; struct crypto_alg *auth_base; - struct crypto_alg *enc; + struct skcipher_alg *enc; struct authenc_instance_ctx *ctx; const char *enc_name; int err; @@ -417,38 +420,40 @@ static int crypto_authenc_create(struct crypto_template *tmpl, goto err_free_inst; crypto_set_skcipher_spawn(&ctx->enc, aead_crypto_instance(inst)); - err = crypto_grab_skcipher(&ctx->enc, enc_name, 0, - crypto_requires_sync(algt->type, - algt->mask)); + err = crypto_grab_skcipher2(&ctx->enc, enc_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); if (err) goto err_drop_auth; - enc = crypto_skcipher_spawn_alg(&ctx->enc); + enc = crypto_spawn_skcipher_alg(&ctx->enc); ctx->reqoff = ALIGN(2 * auth->digestsize + auth_base->cra_alignmask, auth_base->cra_alignmask + 1); err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, - "authenc(%s,%s)", auth_base->cra_name, enc->cra_name) >= + "authenc(%s,%s)", auth_base->cra_name, + enc->base.cra_name) >= CRYPTO_MAX_ALG_NAME) goto err_drop_enc; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)", auth_base->cra_driver_name, - enc->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) + enc->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) goto err_drop_enc; - inst->alg.base.cra_flags = (auth_base->cra_flags | enc->cra_flags) & - CRYPTO_ALG_ASYNC; - inst->alg.base.cra_priority = enc->cra_priority * 10 + + inst->alg.base.cra_flags = (auth_base->cra_flags | + enc->base.cra_flags) & CRYPTO_ALG_ASYNC; + inst->alg.base.cra_priority = enc->base.cra_priority * 10 + auth_base->cra_priority; - inst->alg.base.cra_blocksize = enc->cra_blocksize; + inst->alg.base.cra_blocksize = enc->base.cra_blocksize; inst->alg.base.cra_alignmask = auth_base->cra_alignmask | - enc->cra_alignmask; + enc->base.cra_alignmask; inst->alg.base.cra_ctxsize = sizeof(struct crypto_authenc_ctx); - inst->alg.ivsize = enc->cra_ablkcipher.ivsize; + inst->alg.ivsize = crypto_skcipher_alg_ivsize(enc); + inst->alg.chunksize = crypto_skcipher_alg_chunksize(enc); inst->alg.maxauthsize = auth->digestsize; inst->alg.init = crypto_authenc_init_tfm; From e75445a8445215ab4623f180cbc930ecf271181f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:35 +0800 Subject: [PATCH 151/180] crypto: authencesn - Use skcipher This patch converts authencesn to use the new skcipher interface as opposed to ablkcipher. It also fixes a little bug where if a sync version of authencesn is requested we may still end up using an async ahash. This should have no effect as none of the authencesn users can request for a sync authencesn. Signed-off-by: Herbert Xu --- crypto/authencesn.c | 104 +++++++++++++++++++++++--------------------- 1 file changed, 54 insertions(+), 50 deletions(-) diff --git a/crypto/authencesn.c b/crypto/authencesn.c index 0662b1848c5d8..121010ac99629 100644 --- a/crypto/authencesn.c +++ b/crypto/authencesn.c @@ -35,8 +35,8 @@ struct authenc_esn_instance_ctx { struct crypto_authenc_esn_ctx { unsigned int reqoff; struct crypto_ahash *auth; - struct crypto_ablkcipher *enc; - struct crypto_blkcipher *null; + struct crypto_skcipher *enc; + struct crypto_skcipher *null; }; struct authenc_esn_request_ctx { @@ -65,7 +65,7 @@ static int crypto_authenc_esn_setkey(struct crypto_aead *authenc_esn, const u8 * { struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(authenc_esn); struct crypto_ahash *auth = ctx->auth; - struct crypto_ablkcipher *enc = ctx->enc; + struct crypto_skcipher *enc = ctx->enc; struct crypto_authenc_keys keys; int err = -EINVAL; @@ -82,11 +82,11 @@ static int crypto_authenc_esn_setkey(struct crypto_aead *authenc_esn, const u8 * if (err) goto out; - crypto_ablkcipher_clear_flags(enc, CRYPTO_TFM_REQ_MASK); - crypto_ablkcipher_set_flags(enc, crypto_aead_get_flags(authenc_esn) & + crypto_skcipher_clear_flags(enc, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(enc, crypto_aead_get_flags(authenc_esn) & CRYPTO_TFM_REQ_MASK); - err = crypto_ablkcipher_setkey(enc, keys.enckey, keys.enckeylen); - crypto_aead_set_flags(authenc_esn, crypto_ablkcipher_get_flags(enc) & + err = crypto_skcipher_setkey(enc, keys.enckey, keys.enckeylen); + crypto_aead_set_flags(authenc_esn, crypto_skcipher_get_flags(enc) & CRYPTO_TFM_RES_MASK); out: @@ -182,11 +182,14 @@ static int crypto_authenc_esn_copy(struct aead_request *req, unsigned int len) { struct crypto_aead *authenc_esn = crypto_aead_reqtfm(req); struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(authenc_esn); - struct blkcipher_desc desc = { - .tfm = ctx->null, - }; + SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null); - return crypto_blkcipher_encrypt(&desc, req->dst, req->src, len); + skcipher_request_set_tfm(skreq, ctx->null); + skcipher_request_set_callback(skreq, aead_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(skreq, req->src, req->dst, len, NULL); + + return crypto_skcipher_encrypt(skreq); } static int crypto_authenc_esn_encrypt(struct aead_request *req) @@ -194,9 +197,9 @@ static int crypto_authenc_esn_encrypt(struct aead_request *req) struct crypto_aead *authenc_esn = crypto_aead_reqtfm(req); struct authenc_esn_request_ctx *areq_ctx = aead_request_ctx(req); struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(authenc_esn); - struct ablkcipher_request *abreq = (void *)(areq_ctx->tail - + ctx->reqoff); - struct crypto_ablkcipher *enc = ctx->enc; + struct skcipher_request *skreq = (void *)(areq_ctx->tail + + ctx->reqoff); + struct crypto_skcipher *enc = ctx->enc; unsigned int assoclen = req->assoclen; unsigned int cryptlen = req->cryptlen; struct scatterlist *src, *dst; @@ -215,12 +218,12 @@ static int crypto_authenc_esn_encrypt(struct aead_request *req) dst = scatterwalk_ffwd(areq_ctx->dst, req->dst, assoclen); } - ablkcipher_request_set_tfm(abreq, enc); - ablkcipher_request_set_callback(abreq, aead_request_flags(req), - crypto_authenc_esn_encrypt_done, req); - ablkcipher_request_set_crypt(abreq, src, dst, cryptlen, req->iv); + skcipher_request_set_tfm(skreq, enc); + skcipher_request_set_callback(skreq, aead_request_flags(req), + crypto_authenc_esn_encrypt_done, req); + skcipher_request_set_crypt(skreq, src, dst, cryptlen, req->iv); - err = crypto_ablkcipher_encrypt(abreq); + err = crypto_skcipher_encrypt(skreq); if (err) return err; @@ -234,8 +237,8 @@ static int crypto_authenc_esn_decrypt_tail(struct aead_request *req, unsigned int authsize = crypto_aead_authsize(authenc_esn); struct authenc_esn_request_ctx *areq_ctx = aead_request_ctx(req); struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(authenc_esn); - struct ablkcipher_request *abreq = (void *)(areq_ctx->tail - + ctx->reqoff); + struct skcipher_request *skreq = (void *)(areq_ctx->tail + + ctx->reqoff); struct crypto_ahash *auth = ctx->auth; u8 *ohash = PTR_ALIGN((u8 *)areq_ctx->tail, crypto_ahash_alignmask(auth) + 1); @@ -256,12 +259,12 @@ static int crypto_authenc_esn_decrypt_tail(struct aead_request *req, sg_init_table(areq_ctx->dst, 2); dst = scatterwalk_ffwd(areq_ctx->dst, dst, assoclen); - ablkcipher_request_set_tfm(abreq, ctx->enc); - ablkcipher_request_set_callback(abreq, flags, - req->base.complete, req->base.data); - ablkcipher_request_set_crypt(abreq, dst, dst, cryptlen, req->iv); + skcipher_request_set_tfm(skreq, ctx->enc); + skcipher_request_set_callback(skreq, flags, + req->base.complete, req->base.data); + skcipher_request_set_crypt(skreq, dst, dst, cryptlen, req->iv); - return crypto_ablkcipher_decrypt(abreq); + return crypto_skcipher_decrypt(skreq); } static void authenc_esn_verify_ahash_done(struct crypto_async_request *areq, @@ -331,20 +334,20 @@ static int crypto_authenc_esn_init_tfm(struct crypto_aead *tfm) struct authenc_esn_instance_ctx *ictx = aead_instance_ctx(inst); struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(tfm); struct crypto_ahash *auth; - struct crypto_ablkcipher *enc; - struct crypto_blkcipher *null; + struct crypto_skcipher *enc; + struct crypto_skcipher *null; int err; auth = crypto_spawn_ahash(&ictx->auth); if (IS_ERR(auth)) return PTR_ERR(auth); - enc = crypto_spawn_skcipher(&ictx->enc); + enc = crypto_spawn_skcipher2(&ictx->enc); err = PTR_ERR(enc); if (IS_ERR(enc)) goto err_free_ahash; - null = crypto_get_default_null_skcipher(); + null = crypto_get_default_null_skcipher2(); err = PTR_ERR(null); if (IS_ERR(null)) goto err_free_skcipher; @@ -361,15 +364,15 @@ static int crypto_authenc_esn_init_tfm(struct crypto_aead *tfm) sizeof(struct authenc_esn_request_ctx) + ctx->reqoff + max_t(unsigned int, - crypto_ahash_reqsize(auth) + - sizeof(struct ahash_request), - sizeof(struct skcipher_givcrypt_request) + - crypto_ablkcipher_reqsize(enc))); + crypto_ahash_reqsize(auth) + + sizeof(struct ahash_request), + sizeof(struct skcipher_request) + + crypto_skcipher_reqsize(enc))); return 0; err_free_skcipher: - crypto_free_ablkcipher(enc); + crypto_free_skcipher(enc); err_free_ahash: crypto_free_ahash(auth); return err; @@ -380,8 +383,8 @@ static void crypto_authenc_esn_exit_tfm(struct crypto_aead *tfm) struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(tfm); crypto_free_ahash(ctx->auth); - crypto_free_ablkcipher(ctx->enc); - crypto_put_default_null_skcipher(); + crypto_free_skcipher(ctx->enc); + crypto_put_default_null_skcipher2(); } static void crypto_authenc_esn_free(struct aead_instance *inst) @@ -400,7 +403,7 @@ static int crypto_authenc_esn_create(struct crypto_template *tmpl, struct aead_instance *inst; struct hash_alg_common *auth; struct crypto_alg *auth_base; - struct crypto_alg *enc; + struct skcipher_alg *enc; struct authenc_esn_instance_ctx *ctx; const char *enc_name; int err; @@ -438,35 +441,36 @@ static int crypto_authenc_esn_create(struct crypto_template *tmpl, goto err_free_inst; crypto_set_skcipher_spawn(&ctx->enc, aead_crypto_instance(inst)); - err = crypto_grab_skcipher(&ctx->enc, enc_name, 0, - crypto_requires_sync(algt->type, - algt->mask)); + err = crypto_grab_skcipher2(&ctx->enc, enc_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); if (err) goto err_drop_auth; - enc = crypto_skcipher_spawn_alg(&ctx->enc); + enc = crypto_spawn_skcipher_alg(&ctx->enc); err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "authencesn(%s,%s)", auth_base->cra_name, - enc->cra_name) >= CRYPTO_MAX_ALG_NAME) + enc->base.cra_name) >= CRYPTO_MAX_ALG_NAME) goto err_drop_enc; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "authencesn(%s,%s)", auth_base->cra_driver_name, - enc->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) + enc->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) goto err_drop_enc; - inst->alg.base.cra_flags = (auth_base->cra_flags | enc->cra_flags) & - CRYPTO_ALG_ASYNC; - inst->alg.base.cra_priority = enc->cra_priority * 10 + + inst->alg.base.cra_flags = (auth_base->cra_flags | + enc->base.cra_flags) & CRYPTO_ALG_ASYNC; + inst->alg.base.cra_priority = enc->base.cra_priority * 10 + auth_base->cra_priority; - inst->alg.base.cra_blocksize = enc->cra_blocksize; + inst->alg.base.cra_blocksize = enc->base.cra_blocksize; inst->alg.base.cra_alignmask = auth_base->cra_alignmask | - enc->cra_alignmask; + enc->base.cra_alignmask; inst->alg.base.cra_ctxsize = sizeof(struct crypto_authenc_esn_ctx); - inst->alg.ivsize = enc->cra_ablkcipher.ivsize; + inst->alg.ivsize = crypto_skcipher_alg_ivsize(enc); + inst->alg.chunksize = crypto_skcipher_alg_chunksize(enc); inst->alg.maxauthsize = auth->digestsize; inst->alg.init = crypto_authenc_esn_init_tfm; From b2b39c2f976bc7b69e26dbe4530593d2867544eb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:37 +0800 Subject: [PATCH 152/180] crypto: ctr - Use skcipher in rfc3686 This patch converts rfc3686 to use the new skcipher interface as opposed to ablkcipher. Signed-off-by: Herbert Xu --- crypto/ctr.c | 183 ++++++++++++++++++++++++++------------------------- 1 file changed, 94 insertions(+), 89 deletions(-) diff --git a/crypto/ctr.c b/crypto/ctr.c index 2386f73139520..ff4d21eddb83c 100644 --- a/crypto/ctr.c +++ b/crypto/ctr.c @@ -26,13 +26,13 @@ struct crypto_ctr_ctx { }; struct crypto_rfc3686_ctx { - struct crypto_ablkcipher *child; + struct crypto_skcipher *child; u8 nonce[CTR_RFC3686_NONCE_SIZE]; }; struct crypto_rfc3686_req_ctx { u8 iv[CTR_RFC3686_BLOCK_SIZE]; - struct ablkcipher_request subreq CRYPTO_MINALIGN_ATTR; + struct skcipher_request subreq CRYPTO_MINALIGN_ATTR; }; static int crypto_ctr_setkey(struct crypto_tfm *parent, const u8 *key, @@ -249,11 +249,11 @@ static struct crypto_template crypto_ctr_tmpl = { .module = THIS_MODULE, }; -static int crypto_rfc3686_setkey(struct crypto_ablkcipher *parent, +static int crypto_rfc3686_setkey(struct crypto_skcipher *parent, const u8 *key, unsigned int keylen) { - struct crypto_rfc3686_ctx *ctx = crypto_ablkcipher_ctx(parent); - struct crypto_ablkcipher *child = ctx->child; + struct crypto_rfc3686_ctx *ctx = crypto_skcipher_ctx(parent); + struct crypto_skcipher *child = ctx->child; int err; /* the nonce is stored in bytes at end of key */ @@ -265,173 +265,178 @@ static int crypto_rfc3686_setkey(struct crypto_ablkcipher *parent, keylen -= CTR_RFC3686_NONCE_SIZE; - crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(parent) & - CRYPTO_TFM_REQ_MASK); - err = crypto_ablkcipher_setkey(child, key, keylen); - crypto_ablkcipher_set_flags(parent, crypto_ablkcipher_get_flags(child) & - CRYPTO_TFM_RES_MASK); + crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); + err = crypto_skcipher_setkey(child, key, keylen); + crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) & + CRYPTO_TFM_RES_MASK); return err; } -static int crypto_rfc3686_crypt(struct ablkcipher_request *req) +static int crypto_rfc3686_crypt(struct skcipher_request *req) { - struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); - struct crypto_rfc3686_ctx *ctx = crypto_ablkcipher_ctx(tfm); - struct crypto_ablkcipher *child = ctx->child; - unsigned long align = crypto_ablkcipher_alignmask(tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_rfc3686_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_skcipher *child = ctx->child; + unsigned long align = crypto_skcipher_alignmask(tfm); struct crypto_rfc3686_req_ctx *rctx = - (void *)PTR_ALIGN((u8 *)ablkcipher_request_ctx(req), align + 1); - struct ablkcipher_request *subreq = &rctx->subreq; + (void *)PTR_ALIGN((u8 *)skcipher_request_ctx(req), align + 1); + struct skcipher_request *subreq = &rctx->subreq; u8 *iv = rctx->iv; /* set up counter block */ memcpy(iv, ctx->nonce, CTR_RFC3686_NONCE_SIZE); - memcpy(iv + CTR_RFC3686_NONCE_SIZE, req->info, CTR_RFC3686_IV_SIZE); + memcpy(iv + CTR_RFC3686_NONCE_SIZE, req->iv, CTR_RFC3686_IV_SIZE); /* initialize counter portion of counter block */ *(__be32 *)(iv + CTR_RFC3686_NONCE_SIZE + CTR_RFC3686_IV_SIZE) = cpu_to_be32(1); - ablkcipher_request_set_tfm(subreq, child); - ablkcipher_request_set_callback(subreq, req->base.flags, - req->base.complete, req->base.data); - ablkcipher_request_set_crypt(subreq, req->src, req->dst, req->nbytes, - iv); + skcipher_request_set_tfm(subreq, child); + skcipher_request_set_callback(subreq, req->base.flags, + req->base.complete, req->base.data); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->cryptlen, iv); - return crypto_ablkcipher_encrypt(subreq); + return crypto_skcipher_encrypt(subreq); } -static int crypto_rfc3686_init_tfm(struct crypto_tfm *tfm) +static int crypto_rfc3686_init_tfm(struct crypto_skcipher *tfm) { - struct crypto_instance *inst = (void *)tfm->__crt_alg; - struct crypto_skcipher_spawn *spawn = crypto_instance_ctx(inst); - struct crypto_rfc3686_ctx *ctx = crypto_tfm_ctx(tfm); - struct crypto_ablkcipher *cipher; + struct skcipher_instance *inst = skcipher_alg_instance(tfm); + struct crypto_skcipher_spawn *spawn = skcipher_instance_ctx(inst); + struct crypto_rfc3686_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_skcipher *cipher; unsigned long align; + unsigned int reqsize; - cipher = crypto_spawn_skcipher(spawn); + cipher = crypto_spawn_skcipher2(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); ctx->child = cipher; - align = crypto_tfm_alg_alignmask(tfm); + align = crypto_skcipher_alignmask(tfm); align &= ~(crypto_tfm_ctx_alignment() - 1); - tfm->crt_ablkcipher.reqsize = align + - sizeof(struct crypto_rfc3686_req_ctx) + - crypto_ablkcipher_reqsize(cipher); + reqsize = align + sizeof(struct crypto_rfc3686_req_ctx) + + crypto_skcipher_reqsize(cipher); + crypto_skcipher_set_reqsize(tfm, reqsize); return 0; } -static void crypto_rfc3686_exit_tfm(struct crypto_tfm *tfm) +static void crypto_rfc3686_exit_tfm(struct crypto_skcipher *tfm) { - struct crypto_rfc3686_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_rfc3686_ctx *ctx = crypto_skcipher_ctx(tfm); + + crypto_free_skcipher(ctx->child); +} - crypto_free_ablkcipher(ctx->child); +static void crypto_rfc3686_free(struct skcipher_instance *inst) +{ + struct crypto_skcipher_spawn *spawn = skcipher_instance_ctx(inst); + + crypto_drop_skcipher(spawn); + kfree(inst); } -static struct crypto_instance *crypto_rfc3686_alloc(struct rtattr **tb) +static int crypto_rfc3686_create(struct crypto_template *tmpl, + struct rtattr **tb) { struct crypto_attr_type *algt; - struct crypto_instance *inst; - struct crypto_alg *alg; + struct skcipher_instance *inst; + struct skcipher_alg *alg; struct crypto_skcipher_spawn *spawn; const char *cipher_name; int err; algt = crypto_get_attr_type(tb); if (IS_ERR(algt)) - return ERR_CAST(algt); + return PTR_ERR(algt); - if ((algt->type ^ CRYPTO_ALG_TYPE_BLKCIPHER) & algt->mask) - return ERR_PTR(-EINVAL); + if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask) + return -EINVAL; cipher_name = crypto_attr_alg_name(tb[1]); if (IS_ERR(cipher_name)) - return ERR_CAST(cipher_name); + return PTR_ERR(cipher_name); inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) - return ERR_PTR(-ENOMEM); + return -ENOMEM; - spawn = crypto_instance_ctx(inst); + spawn = skcipher_instance_ctx(inst); - crypto_set_skcipher_spawn(spawn, inst); - err = crypto_grab_skcipher(spawn, cipher_name, 0, - crypto_requires_sync(algt->type, - algt->mask)); + crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst)); + err = crypto_grab_skcipher2(spawn, cipher_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); if (err) goto err_free_inst; - alg = crypto_skcipher_spawn_alg(spawn); + alg = crypto_spawn_skcipher_alg(spawn); /* We only support 16-byte blocks. */ err = -EINVAL; - if (alg->cra_ablkcipher.ivsize != CTR_RFC3686_BLOCK_SIZE) + if (crypto_skcipher_alg_ivsize(alg) != CTR_RFC3686_BLOCK_SIZE) goto err_drop_spawn; /* Not a stream cipher? */ - if (alg->cra_blocksize != 1) + if (alg->base.cra_blocksize != 1) goto err_drop_spawn; err = -ENAMETOOLONG; - if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME, "rfc3686(%s)", - alg->cra_name) >= CRYPTO_MAX_ALG_NAME) + if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, + "rfc3686(%s)", alg->base.cra_name) >= CRYPTO_MAX_ALG_NAME) goto err_drop_spawn; - if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, - "rfc3686(%s)", alg->cra_driver_name) >= - CRYPTO_MAX_ALG_NAME) + if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, + "rfc3686(%s)", alg->base.cra_driver_name) >= + CRYPTO_MAX_ALG_NAME) goto err_drop_spawn; - inst->alg.cra_priority = alg->cra_priority; - inst->alg.cra_blocksize = 1; - inst->alg.cra_alignmask = alg->cra_alignmask; + inst->alg.base.cra_priority = alg->base.cra_priority; + inst->alg.base.cra_blocksize = 1; + inst->alg.base.cra_alignmask = alg->base.cra_alignmask; - inst->alg.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | - (alg->cra_flags & CRYPTO_ALG_ASYNC); - inst->alg.cra_type = &crypto_ablkcipher_type; + inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC; - inst->alg.cra_ablkcipher.ivsize = CTR_RFC3686_IV_SIZE; - inst->alg.cra_ablkcipher.min_keysize = - alg->cra_ablkcipher.min_keysize + CTR_RFC3686_NONCE_SIZE; - inst->alg.cra_ablkcipher.max_keysize = - alg->cra_ablkcipher.max_keysize + CTR_RFC3686_NONCE_SIZE; + inst->alg.ivsize = CTR_RFC3686_IV_SIZE; + inst->alg.chunksize = crypto_skcipher_alg_chunksize(alg); + inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg) + + CTR_RFC3686_NONCE_SIZE; + inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg) + + CTR_RFC3686_NONCE_SIZE; - inst->alg.cra_ablkcipher.geniv = "seqiv"; + inst->alg.setkey = crypto_rfc3686_setkey; + inst->alg.encrypt = crypto_rfc3686_crypt; + inst->alg.decrypt = crypto_rfc3686_crypt; - inst->alg.cra_ablkcipher.setkey = crypto_rfc3686_setkey; - inst->alg.cra_ablkcipher.encrypt = crypto_rfc3686_crypt; - inst->alg.cra_ablkcipher.decrypt = crypto_rfc3686_crypt; + inst->alg.base.cra_ctxsize = sizeof(struct crypto_rfc3686_ctx); - inst->alg.cra_ctxsize = sizeof(struct crypto_rfc3686_ctx); + inst->alg.init = crypto_rfc3686_init_tfm; + inst->alg.exit = crypto_rfc3686_exit_tfm; - inst->alg.cra_init = crypto_rfc3686_init_tfm; - inst->alg.cra_exit = crypto_rfc3686_exit_tfm; + inst->free = crypto_rfc3686_free; - return inst; + err = skcipher_register_instance(tmpl, inst); + if (err) + goto err_drop_spawn; + +out: + return err; err_drop_spawn: crypto_drop_skcipher(spawn); err_free_inst: kfree(inst); - return ERR_PTR(err); -} - -static void crypto_rfc3686_free(struct crypto_instance *inst) -{ - struct crypto_skcipher_spawn *spawn = crypto_instance_ctx(inst); - - crypto_drop_skcipher(spawn); - kfree(inst); + goto out; } static struct crypto_template crypto_rfc3686_tmpl = { .name = "rfc3686", - .alloc = crypto_rfc3686_alloc, - .free = crypto_rfc3686_free, + .create = crypto_rfc3686_create, .module = THIS_MODULE, }; From 464b93a3c7d16831bca9e02e6c9624e793d7e5cc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:38 +0800 Subject: [PATCH 153/180] crypto: ccm - Use skcipher This patch converts ccm to use the new skcipher interface as opposed to ablkcipher. Signed-off-by: Herbert Xu --- crypto/ccm.c | 72 +++++++++++++++++++++++++++------------------------- 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/crypto/ccm.c b/crypto/ccm.c index cc31ea4335bf3..006d8575ef5c7 100644 --- a/crypto/ccm.c +++ b/crypto/ccm.c @@ -28,7 +28,7 @@ struct ccm_instance_ctx { struct crypto_ccm_ctx { struct crypto_cipher *cipher; - struct crypto_ablkcipher *ctr; + struct crypto_skcipher *ctr; }; struct crypto_rfc4309_ctx { @@ -50,7 +50,7 @@ struct crypto_ccm_req_priv_ctx { u32 flags; struct scatterlist src[3]; struct scatterlist dst[3]; - struct ablkcipher_request abreq; + struct skcipher_request skreq; }; static inline struct crypto_ccm_req_priv_ctx *crypto_ccm_reqctx( @@ -83,15 +83,15 @@ static int crypto_ccm_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead); - struct crypto_ablkcipher *ctr = ctx->ctr; + struct crypto_skcipher *ctr = ctx->ctr; struct crypto_cipher *tfm = ctx->cipher; int err = 0; - crypto_ablkcipher_clear_flags(ctr, CRYPTO_TFM_REQ_MASK); - crypto_ablkcipher_set_flags(ctr, crypto_aead_get_flags(aead) & - CRYPTO_TFM_REQ_MASK); - err = crypto_ablkcipher_setkey(ctr, key, keylen); - crypto_aead_set_flags(aead, crypto_ablkcipher_get_flags(ctr) & + crypto_skcipher_clear_flags(ctr, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(ctr, crypto_aead_get_flags(aead) & + CRYPTO_TFM_REQ_MASK); + err = crypto_skcipher_setkey(ctr, key, keylen); + crypto_aead_set_flags(aead, crypto_skcipher_get_flags(ctr) & CRYPTO_TFM_RES_MASK); if (err) goto out; @@ -347,7 +347,7 @@ static int crypto_ccm_encrypt(struct aead_request *req) struct crypto_aead *aead = crypto_aead_reqtfm(req); struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead); struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req); - struct ablkcipher_request *abreq = &pctx->abreq; + struct skcipher_request *skreq = &pctx->skreq; struct scatterlist *dst; unsigned int cryptlen = req->cryptlen; u8 *odata = pctx->odata; @@ -366,11 +366,11 @@ static int crypto_ccm_encrypt(struct aead_request *req) if (req->src != req->dst) dst = pctx->dst; - ablkcipher_request_set_tfm(abreq, ctx->ctr); - ablkcipher_request_set_callback(abreq, pctx->flags, - crypto_ccm_encrypt_done, req); - ablkcipher_request_set_crypt(abreq, pctx->src, dst, cryptlen + 16, iv); - err = crypto_ablkcipher_encrypt(abreq); + skcipher_request_set_tfm(skreq, ctx->ctr); + skcipher_request_set_callback(skreq, pctx->flags, + crypto_ccm_encrypt_done, req); + skcipher_request_set_crypt(skreq, pctx->src, dst, cryptlen + 16, iv); + err = crypto_skcipher_encrypt(skreq); if (err) return err; @@ -407,7 +407,7 @@ static int crypto_ccm_decrypt(struct aead_request *req) struct crypto_aead *aead = crypto_aead_reqtfm(req); struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead); struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req); - struct ablkcipher_request *abreq = &pctx->abreq; + struct skcipher_request *skreq = &pctx->skreq; struct scatterlist *dst; unsigned int authsize = crypto_aead_authsize(aead); unsigned int cryptlen = req->cryptlen; @@ -429,11 +429,11 @@ static int crypto_ccm_decrypt(struct aead_request *req) if (req->src != req->dst) dst = pctx->dst; - ablkcipher_request_set_tfm(abreq, ctx->ctr); - ablkcipher_request_set_callback(abreq, pctx->flags, - crypto_ccm_decrypt_done, req); - ablkcipher_request_set_crypt(abreq, pctx->src, dst, cryptlen + 16, iv); - err = crypto_ablkcipher_decrypt(abreq); + skcipher_request_set_tfm(skreq, ctx->ctr); + skcipher_request_set_callback(skreq, pctx->flags, + crypto_ccm_decrypt_done, req); + skcipher_request_set_crypt(skreq, pctx->src, dst, cryptlen + 16, iv); + err = crypto_skcipher_decrypt(skreq); if (err) return err; @@ -454,7 +454,7 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm) struct ccm_instance_ctx *ictx = aead_instance_ctx(inst); struct crypto_ccm_ctx *ctx = crypto_aead_ctx(tfm); struct crypto_cipher *cipher; - struct crypto_ablkcipher *ctr; + struct crypto_skcipher *ctr; unsigned long align; int err; @@ -462,7 +462,7 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm) if (IS_ERR(cipher)) return PTR_ERR(cipher); - ctr = crypto_spawn_skcipher(&ictx->ctr); + ctr = crypto_spawn_skcipher2(&ictx->ctr); err = PTR_ERR(ctr); if (IS_ERR(ctr)) goto err_free_cipher; @@ -475,7 +475,7 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm) crypto_aead_set_reqsize( tfm, align + sizeof(struct crypto_ccm_req_priv_ctx) + - crypto_ablkcipher_reqsize(ctr)); + crypto_skcipher_reqsize(ctr)); return 0; @@ -489,7 +489,7 @@ static void crypto_ccm_exit_tfm(struct crypto_aead *tfm) struct crypto_ccm_ctx *ctx = crypto_aead_ctx(tfm); crypto_free_cipher(ctx->cipher); - crypto_free_ablkcipher(ctx->ctr); + crypto_free_skcipher(ctx->ctr); } static void crypto_ccm_free(struct aead_instance *inst) @@ -509,7 +509,7 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl, { struct crypto_attr_type *algt; struct aead_instance *inst; - struct crypto_alg *ctr; + struct skcipher_alg *ctr; struct crypto_alg *cipher; struct ccm_instance_ctx *ictx; int err; @@ -544,39 +544,40 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl, goto err_free_inst; crypto_set_skcipher_spawn(&ictx->ctr, aead_crypto_instance(inst)); - err = crypto_grab_skcipher(&ictx->ctr, ctr_name, 0, - crypto_requires_sync(algt->type, - algt->mask)); + err = crypto_grab_skcipher2(&ictx->ctr, ctr_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); if (err) goto err_drop_cipher; - ctr = crypto_skcipher_spawn_alg(&ictx->ctr); + ctr = crypto_spawn_skcipher_alg(&ictx->ctr); /* Not a stream cipher? */ err = -EINVAL; - if (ctr->cra_blocksize != 1) + if (ctr->base.cra_blocksize != 1) goto err_drop_ctr; /* We want the real thing! */ - if (ctr->cra_ablkcipher.ivsize != 16) + if (crypto_skcipher_alg_ivsize(ctr) != 16) goto err_drop_ctr; err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, - "ccm_base(%s,%s)", ctr->cra_driver_name, + "ccm_base(%s,%s)", ctr->base.cra_driver_name, cipher->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) goto err_drop_ctr; memcpy(inst->alg.base.cra_name, full_name, CRYPTO_MAX_ALG_NAME); - inst->alg.base.cra_flags = ctr->cra_flags & CRYPTO_ALG_ASYNC; + inst->alg.base.cra_flags = ctr->base.cra_flags & CRYPTO_ALG_ASYNC; inst->alg.base.cra_priority = (cipher->cra_priority + - ctr->cra_priority) / 2; + ctr->base.cra_priority) / 2; inst->alg.base.cra_blocksize = 1; inst->alg.base.cra_alignmask = cipher->cra_alignmask | - ctr->cra_alignmask | + ctr->base.cra_alignmask | (__alignof__(u32) - 1); inst->alg.ivsize = 16; + inst->alg.chunksize = crypto_skcipher_alg_chunksize(ctr); inst->alg.maxauthsize = 16; inst->alg.base.cra_ctxsize = sizeof(struct crypto_ccm_ctx); inst->alg.init = crypto_ccm_init_tfm; @@ -863,6 +864,7 @@ static int crypto_rfc4309_create(struct crypto_template *tmpl, inst->alg.base.cra_alignmask = alg->base.cra_alignmask; inst->alg.ivsize = 8; + inst->alg.chunksize = crypto_aead_alg_chunksize(alg); inst->alg.maxauthsize = 16; inst->alg.base.cra_ctxsize = sizeof(struct crypto_rfc4309_ctx); From 16f37ecdd068884fc4f4177f83ac77910f4fc113 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:39 +0800 Subject: [PATCH 154/180] crypto: gcm - Use skcipher This patch converts gcm to use the new skcipher interface as opposed to ablkcipher. Signed-off-by: Herbert Xu --- crypto/gcm.c | 111 +++++++++++++++++++++++++++------------------------ 1 file changed, 58 insertions(+), 53 deletions(-) diff --git a/crypto/gcm.c b/crypto/gcm.c index d9ea5f9c05741..70a892e87ccb5 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -29,7 +29,7 @@ struct gcm_instance_ctx { }; struct crypto_gcm_ctx { - struct crypto_ablkcipher *ctr; + struct crypto_skcipher *ctr; struct crypto_ahash *ghash; }; @@ -50,7 +50,7 @@ struct crypto_rfc4543_instance_ctx { struct crypto_rfc4543_ctx { struct crypto_aead *child; - struct crypto_blkcipher *null; + struct crypto_skcipher *null; u8 nonce[4]; }; @@ -74,7 +74,7 @@ struct crypto_gcm_req_priv_ctx { struct crypto_gcm_ghash_ctx ghash_ctx; union { struct ahash_request ahreq; - struct ablkcipher_request abreq; + struct skcipher_request skreq; } u; }; @@ -114,7 +114,7 @@ static int crypto_gcm_setkey(struct crypto_aead *aead, const u8 *key, { struct crypto_gcm_ctx *ctx = crypto_aead_ctx(aead); struct crypto_ahash *ghash = ctx->ghash; - struct crypto_ablkcipher *ctr = ctx->ctr; + struct crypto_skcipher *ctr = ctx->ctr; struct { be128 hash; u8 iv[8]; @@ -122,35 +122,35 @@ static int crypto_gcm_setkey(struct crypto_aead *aead, const u8 *key, struct crypto_gcm_setkey_result result; struct scatterlist sg[1]; - struct ablkcipher_request req; + struct skcipher_request req; } *data; int err; - crypto_ablkcipher_clear_flags(ctr, CRYPTO_TFM_REQ_MASK); - crypto_ablkcipher_set_flags(ctr, crypto_aead_get_flags(aead) & - CRYPTO_TFM_REQ_MASK); - err = crypto_ablkcipher_setkey(ctr, key, keylen); - crypto_aead_set_flags(aead, crypto_ablkcipher_get_flags(ctr) & + crypto_skcipher_clear_flags(ctr, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(ctr, crypto_aead_get_flags(aead) & + CRYPTO_TFM_REQ_MASK); + err = crypto_skcipher_setkey(ctr, key, keylen); + crypto_aead_set_flags(aead, crypto_skcipher_get_flags(ctr) & CRYPTO_TFM_RES_MASK); if (err) return err; - data = kzalloc(sizeof(*data) + crypto_ablkcipher_reqsize(ctr), + data = kzalloc(sizeof(*data) + crypto_skcipher_reqsize(ctr), GFP_KERNEL); if (!data) return -ENOMEM; init_completion(&data->result.completion); sg_init_one(data->sg, &data->hash, sizeof(data->hash)); - ablkcipher_request_set_tfm(&data->req, ctr); - ablkcipher_request_set_callback(&data->req, CRYPTO_TFM_REQ_MAY_SLEEP | - CRYPTO_TFM_REQ_MAY_BACKLOG, - crypto_gcm_setkey_done, - &data->result); - ablkcipher_request_set_crypt(&data->req, data->sg, data->sg, - sizeof(data->hash), data->iv); - - err = crypto_ablkcipher_encrypt(&data->req); + skcipher_request_set_tfm(&data->req, ctr); + skcipher_request_set_callback(&data->req, CRYPTO_TFM_REQ_MAY_SLEEP | + CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_gcm_setkey_done, + &data->result); + skcipher_request_set_crypt(&data->req, data->sg, data->sg, + sizeof(data->hash), data->iv); + + err = crypto_skcipher_encrypt(&data->req); if (err == -EINPROGRESS || err == -EBUSY) { err = wait_for_completion_interruptible( &data->result.completion); @@ -223,13 +223,13 @@ static void crypto_gcm_init_crypt(struct aead_request *req, struct crypto_aead *aead = crypto_aead_reqtfm(req); struct crypto_gcm_ctx *ctx = crypto_aead_ctx(aead); struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); - struct ablkcipher_request *ablk_req = &pctx->u.abreq; + struct skcipher_request *skreq = &pctx->u.skreq; struct scatterlist *dst; dst = req->src == req->dst ? pctx->src : pctx->dst; - ablkcipher_request_set_tfm(ablk_req, ctx->ctr); - ablkcipher_request_set_crypt(ablk_req, pctx->src, dst, + skcipher_request_set_tfm(skreq, ctx->ctr); + skcipher_request_set_crypt(skreq, pctx->src, dst, cryptlen + sizeof(pctx->auth_tag), pctx->iv); } @@ -494,14 +494,14 @@ static void gcm_encrypt_done(struct crypto_async_request *areq, int err) static int crypto_gcm_encrypt(struct aead_request *req) { struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); - struct ablkcipher_request *abreq = &pctx->u.abreq; + struct skcipher_request *skreq = &pctx->u.skreq; u32 flags = aead_request_flags(req); crypto_gcm_init_common(req); crypto_gcm_init_crypt(req, req->cryptlen); - ablkcipher_request_set_callback(abreq, flags, gcm_encrypt_done, req); + skcipher_request_set_callback(skreq, flags, gcm_encrypt_done, req); - return crypto_ablkcipher_encrypt(abreq) ?: + return crypto_skcipher_encrypt(skreq) ?: gcm_encrypt_continue(req, flags); } @@ -533,12 +533,12 @@ static void gcm_decrypt_done(struct crypto_async_request *areq, int err) static int gcm_dec_hash_continue(struct aead_request *req, u32 flags) { struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); - struct ablkcipher_request *abreq = &pctx->u.abreq; + struct skcipher_request *skreq = &pctx->u.skreq; struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx; crypto_gcm_init_crypt(req, gctx->cryptlen); - ablkcipher_request_set_callback(abreq, flags, gcm_decrypt_done, req); - return crypto_ablkcipher_decrypt(abreq) ?: crypto_gcm_verify(req); + skcipher_request_set_callback(skreq, flags, gcm_decrypt_done, req); + return crypto_skcipher_decrypt(skreq) ?: crypto_gcm_verify(req); } static int crypto_gcm_decrypt(struct aead_request *req) @@ -566,7 +566,7 @@ static int crypto_gcm_init_tfm(struct crypto_aead *tfm) struct aead_instance *inst = aead_alg_instance(tfm); struct gcm_instance_ctx *ictx = aead_instance_ctx(inst); struct crypto_gcm_ctx *ctx = crypto_aead_ctx(tfm); - struct crypto_ablkcipher *ctr; + struct crypto_skcipher *ctr; struct crypto_ahash *ghash; unsigned long align; int err; @@ -575,7 +575,7 @@ static int crypto_gcm_init_tfm(struct crypto_aead *tfm) if (IS_ERR(ghash)) return PTR_ERR(ghash); - ctr = crypto_spawn_skcipher(&ictx->ctr); + ctr = crypto_spawn_skcipher2(&ictx->ctr); err = PTR_ERR(ctr); if (IS_ERR(ctr)) goto err_free_hash; @@ -587,8 +587,8 @@ static int crypto_gcm_init_tfm(struct crypto_aead *tfm) align &= ~(crypto_tfm_ctx_alignment() - 1); crypto_aead_set_reqsize(tfm, align + offsetof(struct crypto_gcm_req_priv_ctx, u) + - max(sizeof(struct ablkcipher_request) + - crypto_ablkcipher_reqsize(ctr), + max(sizeof(struct skcipher_request) + + crypto_skcipher_reqsize(ctr), sizeof(struct ahash_request) + crypto_ahash_reqsize(ghash))); @@ -604,7 +604,7 @@ static void crypto_gcm_exit_tfm(struct crypto_aead *tfm) struct crypto_gcm_ctx *ctx = crypto_aead_ctx(tfm); crypto_free_ahash(ctx->ghash); - crypto_free_ablkcipher(ctx->ctr); + crypto_free_skcipher(ctx->ctr); } static void crypto_gcm_free(struct aead_instance *inst) @@ -624,7 +624,7 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl, { struct crypto_attr_type *algt; struct aead_instance *inst; - struct crypto_alg *ctr; + struct skcipher_alg *ctr; struct crypto_alg *ghash_alg; struct hash_alg_common *ghash; struct gcm_instance_ctx *ctx; @@ -663,41 +663,42 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl, goto err_drop_ghash; crypto_set_skcipher_spawn(&ctx->ctr, aead_crypto_instance(inst)); - err = crypto_grab_skcipher(&ctx->ctr, ctr_name, 0, - crypto_requires_sync(algt->type, - algt->mask)); + err = crypto_grab_skcipher2(&ctx->ctr, ctr_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); if (err) goto err_drop_ghash; - ctr = crypto_skcipher_spawn_alg(&ctx->ctr); + ctr = crypto_spawn_skcipher_alg(&ctx->ctr); /* We only support 16-byte blocks. */ - if (ctr->cra_ablkcipher.ivsize != 16) + if (crypto_skcipher_alg_ivsize(ctr) != 16) goto out_put_ctr; /* Not a stream cipher? */ err = -EINVAL; - if (ctr->cra_blocksize != 1) + if (ctr->base.cra_blocksize != 1) goto out_put_ctr; err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, - "gcm_base(%s,%s)", ctr->cra_driver_name, + "gcm_base(%s,%s)", ctr->base.cra_driver_name, ghash_alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) goto out_put_ctr; memcpy(inst->alg.base.cra_name, full_name, CRYPTO_MAX_ALG_NAME); - inst->alg.base.cra_flags = (ghash->base.cra_flags | ctr->cra_flags) & - CRYPTO_ALG_ASYNC; + inst->alg.base.cra_flags = (ghash->base.cra_flags | + ctr->base.cra_flags) & CRYPTO_ALG_ASYNC; inst->alg.base.cra_priority = (ghash->base.cra_priority + - ctr->cra_priority) / 2; + ctr->base.cra_priority) / 2; inst->alg.base.cra_blocksize = 1; inst->alg.base.cra_alignmask = ghash->base.cra_alignmask | - ctr->cra_alignmask; + ctr->base.cra_alignmask; inst->alg.base.cra_ctxsize = sizeof(struct crypto_gcm_ctx); inst->alg.ivsize = 12; + inst->alg.chunksize = crypto_skcipher_alg_chunksize(ctr); inst->alg.maxauthsize = 16; inst->alg.init = crypto_gcm_init_tfm; inst->alg.exit = crypto_gcm_exit_tfm; @@ -982,6 +983,7 @@ static int crypto_rfc4106_create(struct crypto_template *tmpl, inst->alg.base.cra_ctxsize = sizeof(struct crypto_rfc4106_ctx); inst->alg.ivsize = 8; + inst->alg.chunksize = crypto_aead_alg_chunksize(alg); inst->alg.maxauthsize = crypto_aead_alg_maxauthsize(alg); inst->alg.init = crypto_rfc4106_init_tfm; @@ -1086,11 +1088,13 @@ static int crypto_rfc4543_copy_src_to_dst(struct aead_request *req, bool enc) unsigned int authsize = crypto_aead_authsize(aead); unsigned int nbytes = req->assoclen + req->cryptlen - (enc ? 0 : authsize); - struct blkcipher_desc desc = { - .tfm = ctx->null, - }; + SKCIPHER_REQUEST_ON_STACK(nreq, ctx->null); + + skcipher_request_set_tfm(nreq, ctx->null); + skcipher_request_set_callback(nreq, req->base.flags, NULL, NULL); + skcipher_request_set_crypt(nreq, req->src, req->dst, nbytes, NULL); - return crypto_blkcipher_encrypt(&desc, req->dst, req->src, nbytes); + return crypto_skcipher_encrypt(nreq); } static int crypto_rfc4543_encrypt(struct aead_request *req) @@ -1110,7 +1114,7 @@ static int crypto_rfc4543_init_tfm(struct crypto_aead *tfm) struct crypto_aead_spawn *spawn = &ictx->aead; struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(tfm); struct crypto_aead *aead; - struct crypto_blkcipher *null; + struct crypto_skcipher *null; unsigned long align; int err = 0; @@ -1118,7 +1122,7 @@ static int crypto_rfc4543_init_tfm(struct crypto_aead *tfm) if (IS_ERR(aead)) return PTR_ERR(aead); - null = crypto_get_default_null_skcipher(); + null = crypto_get_default_null_skcipher2(); err = PTR_ERR(null); if (IS_ERR(null)) goto err_free_aead; @@ -1146,7 +1150,7 @@ static void crypto_rfc4543_exit_tfm(struct crypto_aead *tfm) struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(tfm); crypto_free_aead(ctx->child); - crypto_put_default_null_skcipher(); + crypto_put_default_null_skcipher2(); } static void crypto_rfc4543_free(struct aead_instance *inst) @@ -1221,6 +1225,7 @@ static int crypto_rfc4543_create(struct crypto_template *tmpl, inst->alg.base.cra_ctxsize = sizeof(struct crypto_rfc4543_ctx); inst->alg.ivsize = 8; + inst->alg.chunksize = crypto_aead_alg_chunksize(alg); inst->alg.maxauthsize = crypto_aead_alg_maxauthsize(alg); inst->alg.init = crypto_rfc4543_init_tfm; From 1e1f006112b50640d7fc11851346f906d223df09 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:40 +0800 Subject: [PATCH 155/180] crypto: chacha20poly1305 - Use skcipher This patch converts chacha20poly1305 to use the new skcipher interface as opposed to ablkcipher. It also fixes a buglet where we may end up with an async poly1305 when the user asks for a async algorithm. This shouldn't be a problem yet as there aren't any async implementations of poly1305 out there. Signed-off-by: Herbert Xu --- crypto/chacha20poly1305.c | 89 ++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 43 deletions(-) diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c index 7b6b935cef23e..e899ef51dc8ed 100644 --- a/crypto/chacha20poly1305.c +++ b/crypto/chacha20poly1305.c @@ -31,7 +31,7 @@ struct chachapoly_instance_ctx { }; struct chachapoly_ctx { - struct crypto_ablkcipher *chacha; + struct crypto_skcipher *chacha; struct crypto_ahash *poly; /* key bytes we use for the ChaCha20 IV */ unsigned int saltlen; @@ -53,7 +53,7 @@ struct poly_req { struct chacha_req { u8 iv[CHACHA20_IV_SIZE]; struct scatterlist src[1]; - struct ablkcipher_request req; /* must be last member */ + struct skcipher_request req; /* must be last member */ }; struct chachapoly_req_ctx { @@ -144,12 +144,12 @@ static int chacha_decrypt(struct aead_request *req) dst = scatterwalk_ffwd(rctx->dst, req->dst, req->assoclen); } - ablkcipher_request_set_callback(&creq->req, aead_request_flags(req), - chacha_decrypt_done, req); - ablkcipher_request_set_tfm(&creq->req, ctx->chacha); - ablkcipher_request_set_crypt(&creq->req, src, dst, - rctx->cryptlen, creq->iv); - err = crypto_ablkcipher_decrypt(&creq->req); + skcipher_request_set_callback(&creq->req, aead_request_flags(req), + chacha_decrypt_done, req); + skcipher_request_set_tfm(&creq->req, ctx->chacha); + skcipher_request_set_crypt(&creq->req, src, dst, + rctx->cryptlen, creq->iv); + err = crypto_skcipher_decrypt(&creq->req); if (err) return err; @@ -393,13 +393,13 @@ static int poly_genkey(struct aead_request *req) chacha_iv(creq->iv, req, 0); - ablkcipher_request_set_callback(&creq->req, aead_request_flags(req), - poly_genkey_done, req); - ablkcipher_request_set_tfm(&creq->req, ctx->chacha); - ablkcipher_request_set_crypt(&creq->req, creq->src, creq->src, - POLY1305_KEY_SIZE, creq->iv); + skcipher_request_set_callback(&creq->req, aead_request_flags(req), + poly_genkey_done, req); + skcipher_request_set_tfm(&creq->req, ctx->chacha); + skcipher_request_set_crypt(&creq->req, creq->src, creq->src, + POLY1305_KEY_SIZE, creq->iv); - err = crypto_ablkcipher_decrypt(&creq->req); + err = crypto_skcipher_decrypt(&creq->req); if (err) return err; @@ -433,12 +433,12 @@ static int chacha_encrypt(struct aead_request *req) dst = scatterwalk_ffwd(rctx->dst, req->dst, req->assoclen); } - ablkcipher_request_set_callback(&creq->req, aead_request_flags(req), - chacha_encrypt_done, req); - ablkcipher_request_set_tfm(&creq->req, ctx->chacha); - ablkcipher_request_set_crypt(&creq->req, src, dst, - req->cryptlen, creq->iv); - err = crypto_ablkcipher_encrypt(&creq->req); + skcipher_request_set_callback(&creq->req, aead_request_flags(req), + chacha_encrypt_done, req); + skcipher_request_set_tfm(&creq->req, ctx->chacha); + skcipher_request_set_crypt(&creq->req, src, dst, + req->cryptlen, creq->iv); + err = crypto_skcipher_encrypt(&creq->req); if (err) return err; @@ -500,13 +500,13 @@ static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key, keylen -= ctx->saltlen; memcpy(ctx->salt, key + keylen, ctx->saltlen); - crypto_ablkcipher_clear_flags(ctx->chacha, CRYPTO_TFM_REQ_MASK); - crypto_ablkcipher_set_flags(ctx->chacha, crypto_aead_get_flags(aead) & - CRYPTO_TFM_REQ_MASK); + crypto_skcipher_clear_flags(ctx->chacha, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(ctx->chacha, crypto_aead_get_flags(aead) & + CRYPTO_TFM_REQ_MASK); - err = crypto_ablkcipher_setkey(ctx->chacha, key, keylen); - crypto_aead_set_flags(aead, crypto_ablkcipher_get_flags(ctx->chacha) & - CRYPTO_TFM_RES_MASK); + err = crypto_skcipher_setkey(ctx->chacha, key, keylen); + crypto_aead_set_flags(aead, crypto_skcipher_get_flags(ctx->chacha) & + CRYPTO_TFM_RES_MASK); return err; } @@ -524,7 +524,7 @@ static int chachapoly_init(struct crypto_aead *tfm) struct aead_instance *inst = aead_alg_instance(tfm); struct chachapoly_instance_ctx *ictx = aead_instance_ctx(inst); struct chachapoly_ctx *ctx = crypto_aead_ctx(tfm); - struct crypto_ablkcipher *chacha; + struct crypto_skcipher *chacha; struct crypto_ahash *poly; unsigned long align; @@ -532,7 +532,7 @@ static int chachapoly_init(struct crypto_aead *tfm) if (IS_ERR(poly)) return PTR_ERR(poly); - chacha = crypto_spawn_skcipher(&ictx->chacha); + chacha = crypto_spawn_skcipher2(&ictx->chacha); if (IS_ERR(chacha)) { crypto_free_ahash(poly); return PTR_ERR(chacha); @@ -548,8 +548,8 @@ static int chachapoly_init(struct crypto_aead *tfm) tfm, align + offsetof(struct chachapoly_req_ctx, u) + max(offsetof(struct chacha_req, req) + - sizeof(struct ablkcipher_request) + - crypto_ablkcipher_reqsize(chacha), + sizeof(struct skcipher_request) + + crypto_skcipher_reqsize(chacha), offsetof(struct poly_req, req) + sizeof(struct ahash_request) + crypto_ahash_reqsize(poly))); @@ -562,7 +562,7 @@ static void chachapoly_exit(struct crypto_aead *tfm) struct chachapoly_ctx *ctx = crypto_aead_ctx(tfm); crypto_free_ahash(ctx->poly); - crypto_free_ablkcipher(ctx->chacha); + crypto_free_skcipher(ctx->chacha); } static void chachapoly_free(struct aead_instance *inst) @@ -579,7 +579,7 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, { struct crypto_attr_type *algt; struct aead_instance *inst; - struct crypto_alg *chacha; + struct skcipher_alg *chacha; struct crypto_alg *poly; struct hash_alg_common *poly_hash; struct chachapoly_instance_ctx *ctx; @@ -605,7 +605,9 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, poly = crypto_find_alg(poly_name, &crypto_ahash_type, CRYPTO_ALG_TYPE_HASH, - CRYPTO_ALG_TYPE_AHASH_MASK); + CRYPTO_ALG_TYPE_AHASH_MASK | + crypto_requires_sync(algt->type, + algt->mask)); if (IS_ERR(poly)) return PTR_ERR(poly); @@ -623,20 +625,20 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, goto err_free_inst; crypto_set_skcipher_spawn(&ctx->chacha, aead_crypto_instance(inst)); - err = crypto_grab_skcipher(&ctx->chacha, chacha_name, 0, - crypto_requires_sync(algt->type, - algt->mask)); + err = crypto_grab_skcipher2(&ctx->chacha, chacha_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); if (err) goto err_drop_poly; - chacha = crypto_skcipher_spawn_alg(&ctx->chacha); + chacha = crypto_spawn_skcipher_alg(&ctx->chacha); err = -EINVAL; /* Need 16-byte IV size, including Initial Block Counter value */ - if (chacha->cra_ablkcipher.ivsize != CHACHA20_IV_SIZE) + if (crypto_skcipher_alg_ivsize(chacha) != CHACHA20_IV_SIZE) goto out_drop_chacha; /* Not a stream cipher? */ - if (chacha->cra_blocksize != 1) + if (chacha->base.cra_blocksize != 1) goto out_drop_chacha; err = -ENAMETOOLONG; @@ -645,20 +647,21 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, poly_name) >= CRYPTO_MAX_ALG_NAME) goto out_drop_chacha; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, - "%s(%s,%s)", name, chacha->cra_driver_name, + "%s(%s,%s)", name, chacha->base.cra_driver_name, poly->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) goto out_drop_chacha; - inst->alg.base.cra_flags = (chacha->cra_flags | poly->cra_flags) & + inst->alg.base.cra_flags = (chacha->base.cra_flags | poly->cra_flags) & CRYPTO_ALG_ASYNC; - inst->alg.base.cra_priority = (chacha->cra_priority + + inst->alg.base.cra_priority = (chacha->base.cra_priority + poly->cra_priority) / 2; inst->alg.base.cra_blocksize = 1; - inst->alg.base.cra_alignmask = chacha->cra_alignmask | + inst->alg.base.cra_alignmask = chacha->base.cra_alignmask | poly->cra_alignmask; inst->alg.base.cra_ctxsize = sizeof(struct chachapoly_ctx) + ctx->saltlen; inst->alg.ivsize = ivsize; + inst->alg.chunksize = crypto_skcipher_alg_chunksize(chacha); inst->alg.maxauthsize = POLY1305_DIGEST_SIZE; inst->alg.init = chachapoly_init; inst->alg.exit = chachapoly_exit; From ca0494c093371b1ca3cd4c8e14e1e7a19e6e21b6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:42 +0800 Subject: [PATCH 156/180] crypto: aead - Add skcipher null for IV generators This patch adds an skcipher null object alongside the existing null blkcipher so that IV generators using it can switch over to skcipher. Signed-off-by: Herbert Xu --- crypto/aead.c | 10 +++++++++- include/crypto/internal/geniv.h | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/crypto/aead.c b/crypto/aead.c index b155cbc3a0dd9..a5d9a83f90e4d 100644 --- a/crypto/aead.c +++ b/crypto/aead.c @@ -294,10 +294,15 @@ int aead_init_geniv(struct crypto_aead *aead) if (err) goto out; + ctx->sknull = crypto_get_default_null_skcipher2(); + err = PTR_ERR(ctx->sknull); + if (IS_ERR(ctx->sknull)) + goto out; + ctx->null = crypto_get_default_null_skcipher(); err = PTR_ERR(ctx->null); if (IS_ERR(ctx->null)) - goto out; + goto drop_sknull; child = crypto_spawn_aead(aead_instance_ctx(inst)); err = PTR_ERR(child); @@ -315,6 +320,8 @@ int aead_init_geniv(struct crypto_aead *aead) drop_null: crypto_put_default_null_skcipher(); +drop_sknull: + crypto_put_default_null_skcipher2(); goto out; } EXPORT_SYMBOL_GPL(aead_init_geniv); @@ -325,6 +332,7 @@ void aead_exit_geniv(struct crypto_aead *tfm) crypto_free_aead(ctx->child); crypto_put_default_null_skcipher(); + crypto_put_default_null_skcipher2(); } EXPORT_SYMBOL_GPL(aead_exit_geniv); diff --git a/include/crypto/internal/geniv.h b/include/crypto/internal/geniv.h index 59333635e712d..e8447c9c14e6e 100644 --- a/include/crypto/internal/geniv.h +++ b/include/crypto/internal/geniv.h @@ -21,6 +21,7 @@ struct aead_geniv_ctx { spinlock_t lock; struct crypto_aead *child; struct crypto_blkcipher *null; + struct crypto_skcipher *sknull; u8 salt[] __attribute__ ((aligned(__alignof__(u32)))); }; From 0e8bff47f6d3e863bf1829e020000c249c59ecd2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:43 +0800 Subject: [PATCH 157/180] crypto: echainiv - Use skcipher This patch replaces use of the obsolete blkcipher with skcipher. Signed-off-by: Herbert Xu --- crypto/echainiv.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/crypto/echainiv.c b/crypto/echainiv.c index b96a84560b677..1b01fe98e91f8 100644 --- a/crypto/echainiv.c +++ b/crypto/echainiv.c @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -112,13 +113,16 @@ static int echainiv_encrypt(struct aead_request *req) info = req->iv; if (req->src != req->dst) { - struct blkcipher_desc desc = { - .tfm = ctx->null, - }; + SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull); - err = crypto_blkcipher_encrypt( - &desc, req->dst, req->src, - req->assoclen + req->cryptlen); + skcipher_request_set_tfm(nreq, ctx->sknull); + skcipher_request_set_callback(nreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(nreq, req->src, req->dst, + req->assoclen + req->cryptlen, + NULL); + + err = crypto_skcipher_encrypt(nreq); if (err) return err; } From ef22871f20e6da987e32719a4ba5e3a9b7d81ae8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:45 +0800 Subject: [PATCH 158/180] crypto: seqiv - Use skcipher This patch replaces use of the obsolete blkcipher with skcipher. Signed-off-by: Herbert Xu --- crypto/seqiv.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/crypto/seqiv.c b/crypto/seqiv.c index 15a749a5cab72..a859b3ae239d4 100644 --- a/crypto/seqiv.c +++ b/crypto/seqiv.c @@ -165,12 +165,16 @@ static int seqiv_aead_encrypt(struct aead_request *req) info = req->iv; if (req->src != req->dst) { - struct blkcipher_desc desc = { - .tfm = ctx->null, - }; + SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull); - err = crypto_blkcipher_encrypt(&desc, req->dst, req->src, - req->assoclen + req->cryptlen); + skcipher_request_set_tfm(nreq, ctx->sknull); + skcipher_request_set_callback(nreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(nreq, req->src, req->dst, + req->assoclen + req->cryptlen, + NULL); + + err = crypto_skcipher_encrypt(nreq); if (err) return err; } From da721302a78807e5da2902677fbe116fe052068f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:46 +0800 Subject: [PATCH 159/180] crypto: aead - Remove blkcipher null for IV generators The blkcipher null object is no longer used and can now be removed. Signed-off-by: Herbert Xu --- crypto/aead.c | 8 -------- include/crypto/internal/geniv.h | 1 - 2 files changed, 9 deletions(-) diff --git a/crypto/aead.c b/crypto/aead.c index a5d9a83f90e4d..3f5c5ff004abc 100644 --- a/crypto/aead.c +++ b/crypto/aead.c @@ -299,11 +299,6 @@ int aead_init_geniv(struct crypto_aead *aead) if (IS_ERR(ctx->sknull)) goto out; - ctx->null = crypto_get_default_null_skcipher(); - err = PTR_ERR(ctx->null); - if (IS_ERR(ctx->null)) - goto drop_sknull; - child = crypto_spawn_aead(aead_instance_ctx(inst)); err = PTR_ERR(child); if (IS_ERR(child)) @@ -319,8 +314,6 @@ int aead_init_geniv(struct crypto_aead *aead) return err; drop_null: - crypto_put_default_null_skcipher(); -drop_sknull: crypto_put_default_null_skcipher2(); goto out; } @@ -331,7 +324,6 @@ void aead_exit_geniv(struct crypto_aead *tfm) struct aead_geniv_ctx *ctx = crypto_aead_ctx(tfm); crypto_free_aead(ctx->child); - crypto_put_default_null_skcipher(); crypto_put_default_null_skcipher2(); } EXPORT_SYMBOL_GPL(aead_exit_geniv); diff --git a/include/crypto/internal/geniv.h b/include/crypto/internal/geniv.h index e8447c9c14e6e..2bcfb931bc5bc 100644 --- a/include/crypto/internal/geniv.h +++ b/include/crypto/internal/geniv.h @@ -20,7 +20,6 @@ struct aead_geniv_ctx { spinlock_t lock; struct crypto_aead *child; - struct crypto_blkcipher *null; struct crypto_skcipher *sknull; u8 salt[] __attribute__ ((aligned(__alignof__(u32)))); }; From 499a66e6b689b13d1a4108bec5d7dcdc829a27a8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:47 +0800 Subject: [PATCH 160/180] crypto: null - Remove default null blkcipher The default null blkcipher is no longer used and can now be removed. Signed-off-by: Herbert Xu --- crypto/crypto_null.c | 49 ++++++------------------------------------- include/crypto/null.h | 14 ++++++++++--- 2 files changed, 17 insertions(+), 46 deletions(-) diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c index c3f683910d072..20ff2c746e0ba 100644 --- a/crypto/crypto_null.c +++ b/crypto/crypto_null.c @@ -26,10 +26,8 @@ #include static DEFINE_MUTEX(crypto_default_null_skcipher_lock); -static struct crypto_blkcipher *crypto_default_null_skcipher; +static struct crypto_skcipher *crypto_default_null_skcipher; static int crypto_default_null_skcipher_refcnt; -static struct crypto_skcipher *crypto_default_null_skcipher2; -static int crypto_default_null_skcipher2_refcnt; static int null_compress(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen) @@ -155,15 +153,16 @@ MODULE_ALIAS_CRYPTO("compress_null"); MODULE_ALIAS_CRYPTO("digest_null"); MODULE_ALIAS_CRYPTO("cipher_null"); -struct crypto_blkcipher *crypto_get_default_null_skcipher(void) +struct crypto_skcipher *crypto_get_default_null_skcipher(void) { - struct crypto_blkcipher *tfm; + struct crypto_skcipher *tfm; mutex_lock(&crypto_default_null_skcipher_lock); tfm = crypto_default_null_skcipher; if (!tfm) { - tfm = crypto_alloc_blkcipher("ecb(cipher_null)", 0, 0); + tfm = crypto_alloc_skcipher("ecb(cipher_null)", + 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) goto unlock; @@ -183,49 +182,13 @@ void crypto_put_default_null_skcipher(void) { mutex_lock(&crypto_default_null_skcipher_lock); if (!--crypto_default_null_skcipher_refcnt) { - crypto_free_blkcipher(crypto_default_null_skcipher); + crypto_free_skcipher(crypto_default_null_skcipher); crypto_default_null_skcipher = NULL; } mutex_unlock(&crypto_default_null_skcipher_lock); } EXPORT_SYMBOL_GPL(crypto_put_default_null_skcipher); -struct crypto_skcipher *crypto_get_default_null_skcipher2(void) -{ - struct crypto_skcipher *tfm; - - mutex_lock(&crypto_default_null_skcipher_lock); - tfm = crypto_default_null_skcipher2; - - if (!tfm) { - tfm = crypto_alloc_skcipher("ecb(cipher_null)", - 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) - goto unlock; - - crypto_default_null_skcipher2 = tfm; - } - - crypto_default_null_skcipher2_refcnt++; - -unlock: - mutex_unlock(&crypto_default_null_skcipher_lock); - - return tfm; -} -EXPORT_SYMBOL_GPL(crypto_get_default_null_skcipher2); - -void crypto_put_default_null_skcipher2(void) -{ - mutex_lock(&crypto_default_null_skcipher_lock); - if (!--crypto_default_null_skcipher2_refcnt) { - crypto_free_skcipher(crypto_default_null_skcipher2); - crypto_default_null_skcipher2 = NULL; - } - mutex_unlock(&crypto_default_null_skcipher_lock); -} -EXPORT_SYMBOL_GPL(crypto_put_default_null_skcipher2); - static int __init crypto_null_mod_init(void) { int ret = 0; diff --git a/include/crypto/null.h b/include/crypto/null.h index dda87cbb62b2a..3f0c59fb0a61d 100644 --- a/include/crypto/null.h +++ b/include/crypto/null.h @@ -8,9 +8,17 @@ #define NULL_DIGEST_SIZE 0 #define NULL_IV_SIZE 0 -struct crypto_blkcipher *crypto_get_default_null_skcipher(void); +struct crypto_skcipher *crypto_get_default_null_skcipher(void); void crypto_put_default_null_skcipher(void); -struct crypto_skcipher *crypto_get_default_null_skcipher2(void); -void crypto_put_default_null_skcipher2(void); + +static inline struct crypto_skcipher *crypto_get_default_null_skcipher2(void) +{ + return crypto_get_default_null_skcipher(); +} + +static inline void crypto_put_default_null_skcipher2(void) +{ + crypto_put_default_null_skcipher(); +} #endif From 0605c41cc53ca13775d202de0de33864a46162ba Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:48 +0800 Subject: [PATCH 161/180] crypto: cts - Convert to skcipher This patch converts cts over to the skcipher interface. It also optimises the implementation to use one CBC operation for all but the last block, which is then processed separately. Signed-off-by: Herbert Xu --- crypto/cts.c | 495 +++++++++++++++++++++++++++++---------------------- 1 file changed, 285 insertions(+), 210 deletions(-) diff --git a/crypto/cts.c b/crypto/cts.c index e467ec0acf9f0..51976187b2bf8 100644 --- a/crypto/cts.c +++ b/crypto/cts.c @@ -40,7 +40,7 @@ * rfc3962 includes errata information in its Appendix A. */ -#include +#include #include #include #include @@ -51,289 +51,364 @@ #include struct crypto_cts_ctx { - struct crypto_blkcipher *child; + struct crypto_skcipher *child; }; -static int crypto_cts_setkey(struct crypto_tfm *parent, const u8 *key, - unsigned int keylen) +struct crypto_cts_reqctx { + struct scatterlist sg[2]; + unsigned offset; + struct skcipher_request subreq; +}; + +static inline u8 *crypto_cts_reqctx_space(struct skcipher_request *req) { - struct crypto_cts_ctx *ctx = crypto_tfm_ctx(parent); - struct crypto_blkcipher *child = ctx->child; - int err; + struct crypto_cts_reqctx *rctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_skcipher *child = ctx->child; - crypto_blkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_blkcipher_set_flags(child, crypto_tfm_get_flags(parent) & - CRYPTO_TFM_REQ_MASK); - err = crypto_blkcipher_setkey(child, key, keylen); - crypto_tfm_set_flags(parent, crypto_blkcipher_get_flags(child) & - CRYPTO_TFM_RES_MASK); - return err; + return PTR_ALIGN((u8 *)(rctx + 1) + crypto_skcipher_reqsize(child), + crypto_skcipher_alignmask(tfm) + 1); } -static int cts_cbc_encrypt(struct crypto_cts_ctx *ctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int offset, - unsigned int nbytes) +static int crypto_cts_setkey(struct crypto_skcipher *parent, const u8 *key, + unsigned int keylen) { - int bsize = crypto_blkcipher_blocksize(desc->tfm); - u8 tmp[bsize], tmp2[bsize]; - struct blkcipher_desc lcldesc; - struct scatterlist sgsrc[1], sgdst[1]; - int lastn = nbytes - bsize; - u8 iv[bsize]; - u8 s[bsize * 2], d[bsize * 2]; + struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(parent); + struct crypto_skcipher *child = ctx->child; int err; - if (lastn < 0) - return -EINVAL; + crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); + err = crypto_skcipher_setkey(child, key, keylen); + crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) & + CRYPTO_TFM_RES_MASK); + return err; +} - sg_init_table(sgsrc, 1); - sg_init_table(sgdst, 1); +static void cts_cbc_crypt_done(struct crypto_async_request *areq, int err) +{ + struct skcipher_request *req = areq->data; - memset(s, 0, sizeof(s)); - scatterwalk_map_and_copy(s, src, offset, nbytes, 0); + if (err == -EINPROGRESS) + return; - memcpy(iv, desc->info, bsize); + skcipher_request_complete(req, err); +} - lcldesc.tfm = ctx->child; - lcldesc.info = iv; - lcldesc.flags = desc->flags; +static int cts_cbc_encrypt(struct skcipher_request *req) +{ + struct crypto_cts_reqctx *rctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct skcipher_request *subreq = &rctx->subreq; + int bsize = crypto_skcipher_blocksize(tfm); + u8 d[bsize * 2] __attribute__ ((aligned(__alignof__(u32)))); + struct scatterlist *sg; + unsigned int offset; + int lastn; + + offset = rctx->offset; + lastn = req->cryptlen - offset; + + sg = scatterwalk_ffwd(rctx->sg, req->dst, offset - bsize); + scatterwalk_map_and_copy(d + bsize, sg, 0, bsize, 0); + + memset(d, 0, bsize); + scatterwalk_map_and_copy(d, req->src, offset, lastn, 0); + + scatterwalk_map_and_copy(d, sg, 0, bsize + lastn, 1); + memzero_explicit(d, sizeof(d)); + + skcipher_request_set_callback(subreq, req->base.flags & + CRYPTO_TFM_REQ_MAY_BACKLOG, + cts_cbc_crypt_done, req); + skcipher_request_set_crypt(subreq, sg, sg, bsize, req->iv); + return crypto_skcipher_encrypt(subreq); +} - sg_set_buf(&sgsrc[0], s, bsize); - sg_set_buf(&sgdst[0], tmp, bsize); - err = crypto_blkcipher_encrypt_iv(&lcldesc, sgdst, sgsrc, bsize); +static void crypto_cts_encrypt_done(struct crypto_async_request *areq, int err) +{ + struct skcipher_request *req = areq->data; - memcpy(d + bsize, tmp, lastn); + if (err) + goto out; - lcldesc.info = tmp; + err = cts_cbc_encrypt(req); + if (err == -EINPROGRESS || + (err == -EBUSY && req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + return; - sg_set_buf(&sgsrc[0], s + bsize, bsize); - sg_set_buf(&sgdst[0], tmp2, bsize); - err = crypto_blkcipher_encrypt_iv(&lcldesc, sgdst, sgsrc, bsize); +out: + skcipher_request_complete(req, err); +} - memcpy(d, tmp2, bsize); +static int crypto_cts_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_cts_reqctx *rctx = skcipher_request_ctx(req); + struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_request *subreq = &rctx->subreq; + int bsize = crypto_skcipher_blocksize(tfm); + unsigned int nbytes = req->cryptlen; + int cbc_blocks = (nbytes + bsize - 1) / bsize - 1; + unsigned int offset; + + skcipher_request_set_tfm(subreq, ctx->child); + + if (cbc_blocks <= 0) { + skcipher_request_set_callback(subreq, req->base.flags, + req->base.complete, + req->base.data); + skcipher_request_set_crypt(subreq, req->src, req->dst, nbytes, + req->iv); + return crypto_skcipher_encrypt(subreq); + } - scatterwalk_map_and_copy(d, dst, offset, nbytes, 1); + offset = cbc_blocks * bsize; + rctx->offset = offset; - memcpy(desc->info, tmp2, bsize); + skcipher_request_set_callback(subreq, req->base.flags, + crypto_cts_encrypt_done, req); + skcipher_request_set_crypt(subreq, req->src, req->dst, + offset, req->iv); - return err; + return crypto_skcipher_encrypt(subreq) ?: + cts_cbc_encrypt(req); } -static int crypto_cts_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int cts_cbc_decrypt(struct skcipher_request *req) { - struct crypto_cts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - int bsize = crypto_blkcipher_blocksize(desc->tfm); - int tot_blocks = (nbytes + bsize - 1) / bsize; - int cbc_blocks = tot_blocks > 2 ? tot_blocks - 2 : 0; - struct blkcipher_desc lcldesc; - int err; + struct crypto_cts_reqctx *rctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct skcipher_request *subreq = &rctx->subreq; + int bsize = crypto_skcipher_blocksize(tfm); + u8 d[bsize * 2] __attribute__ ((aligned(__alignof__(u32)))); + struct scatterlist *sg; + unsigned int offset; + u8 *space; + int lastn; + + offset = rctx->offset; + lastn = req->cryptlen - offset; + + sg = scatterwalk_ffwd(rctx->sg, req->dst, offset - bsize); + + /* 1. Decrypt Cn-1 (s) to create Dn */ + scatterwalk_map_and_copy(d + bsize, sg, 0, bsize, 0); + space = crypto_cts_reqctx_space(req); + crypto_xor(d + bsize, space, bsize); + /* 2. Pad Cn with zeros at the end to create C of length BB */ + memset(d, 0, bsize); + scatterwalk_map_and_copy(d, req->src, offset, lastn, 0); + /* 3. Exclusive-or Dn with C to create Xn */ + /* 4. Select the first Ln bytes of Xn to create Pn */ + crypto_xor(d + bsize, d, lastn); + + /* 5. Append the tail (BB - Ln) bytes of Xn to Cn to create En */ + memcpy(d + lastn, d + bsize + lastn, bsize - lastn); + /* 6. Decrypt En to create Pn-1 */ - lcldesc.tfm = ctx->child; - lcldesc.info = desc->info; - lcldesc.flags = desc->flags; - - if (tot_blocks == 1) { - err = crypto_blkcipher_encrypt_iv(&lcldesc, dst, src, bsize); - } else if (nbytes <= bsize * 2) { - err = cts_cbc_encrypt(ctx, desc, dst, src, 0, nbytes); - } else { - /* do normal function for tot_blocks - 2 */ - err = crypto_blkcipher_encrypt_iv(&lcldesc, dst, src, - cbc_blocks * bsize); - if (err == 0) { - /* do cts for final two blocks */ - err = cts_cbc_encrypt(ctx, desc, dst, src, - cbc_blocks * bsize, - nbytes - (cbc_blocks * bsize)); - } - } + scatterwalk_map_and_copy(d, sg, 0, bsize + lastn, 1); + memzero_explicit(d, sizeof(d)); - return err; + skcipher_request_set_callback(subreq, req->base.flags & + CRYPTO_TFM_REQ_MAY_BACKLOG, + cts_cbc_crypt_done, req); + + skcipher_request_set_crypt(subreq, sg, sg, bsize, space); + return crypto_skcipher_decrypt(subreq); } -static int cts_cbc_decrypt(struct crypto_cts_ctx *ctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int offset, - unsigned int nbytes) +static void crypto_cts_decrypt_done(struct crypto_async_request *areq, int err) { - int bsize = crypto_blkcipher_blocksize(desc->tfm); - u8 tmp[bsize]; - struct blkcipher_desc lcldesc; - struct scatterlist sgsrc[1], sgdst[1]; - int lastn = nbytes - bsize; - u8 iv[bsize]; - u8 s[bsize * 2], d[bsize * 2]; - int err; - - if (lastn < 0) - return -EINVAL; + struct skcipher_request *req = areq->data; - sg_init_table(sgsrc, 1); - sg_init_table(sgdst, 1); + if (err) + goto out; - scatterwalk_map_and_copy(s, src, offset, nbytes, 0); + err = cts_cbc_decrypt(req); + if (err == -EINPROGRESS || + (err == -EBUSY && req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + return; - lcldesc.tfm = ctx->child; - lcldesc.info = iv; - lcldesc.flags = desc->flags; +out: + skcipher_request_complete(req, err); +} - /* 1. Decrypt Cn-1 (s) to create Dn (tmp)*/ - memset(iv, 0, sizeof(iv)); - sg_set_buf(&sgsrc[0], s, bsize); - sg_set_buf(&sgdst[0], tmp, bsize); - err = crypto_blkcipher_decrypt_iv(&lcldesc, sgdst, sgsrc, bsize); - if (err) - return err; - /* 2. Pad Cn with zeros at the end to create C of length BB */ - memset(iv, 0, sizeof(iv)); - memcpy(iv, s + bsize, lastn); - /* 3. Exclusive-or Dn (tmp) with C (iv) to create Xn (tmp) */ - crypto_xor(tmp, iv, bsize); - /* 4. Select the first Ln bytes of Xn (tmp) to create Pn */ - memcpy(d + bsize, tmp, lastn); - - /* 5. Append the tail (BB - Ln) bytes of Xn (tmp) to Cn to create En */ - memcpy(s + bsize + lastn, tmp + lastn, bsize - lastn); - /* 6. Decrypt En to create Pn-1 */ - memzero_explicit(iv, sizeof(iv)); +static int crypto_cts_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_cts_reqctx *rctx = skcipher_request_ctx(req); + struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_request *subreq = &rctx->subreq; + int bsize = crypto_skcipher_blocksize(tfm); + unsigned int nbytes = req->cryptlen; + int cbc_blocks = (nbytes + bsize - 1) / bsize - 1; + unsigned int offset; + u8 *space; + + skcipher_request_set_tfm(subreq, ctx->child); + + if (cbc_blocks <= 0) { + skcipher_request_set_callback(subreq, req->base.flags, + req->base.complete, + req->base.data); + skcipher_request_set_crypt(subreq, req->src, req->dst, nbytes, + req->iv); + return crypto_skcipher_decrypt(subreq); + } - sg_set_buf(&sgsrc[0], s + bsize, bsize); - sg_set_buf(&sgdst[0], d, bsize); - err = crypto_blkcipher_decrypt_iv(&lcldesc, sgdst, sgsrc, bsize); + skcipher_request_set_callback(subreq, req->base.flags, + crypto_cts_decrypt_done, req); - /* XOR with previous block */ - crypto_xor(d, desc->info, bsize); + space = crypto_cts_reqctx_space(req); - scatterwalk_map_and_copy(d, dst, offset, nbytes, 1); + offset = cbc_blocks * bsize; + rctx->offset = offset; - memcpy(desc->info, s, bsize); - return err; -} + if (cbc_blocks <= 1) + memcpy(space, req->iv, bsize); + else + scatterwalk_map_and_copy(space, req->src, offset - 2 * bsize, + bsize, 0); -static int crypto_cts_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - struct crypto_cts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - int bsize = crypto_blkcipher_blocksize(desc->tfm); - int tot_blocks = (nbytes + bsize - 1) / bsize; - int cbc_blocks = tot_blocks > 2 ? tot_blocks - 2 : 0; - struct blkcipher_desc lcldesc; - int err; + skcipher_request_set_crypt(subreq, req->src, req->dst, + offset, req->iv); - lcldesc.tfm = ctx->child; - lcldesc.info = desc->info; - lcldesc.flags = desc->flags; - - if (tot_blocks == 1) { - err = crypto_blkcipher_decrypt_iv(&lcldesc, dst, src, bsize); - } else if (nbytes <= bsize * 2) { - err = cts_cbc_decrypt(ctx, desc, dst, src, 0, nbytes); - } else { - /* do normal function for tot_blocks - 2 */ - err = crypto_blkcipher_decrypt_iv(&lcldesc, dst, src, - cbc_blocks * bsize); - if (err == 0) { - /* do cts for final two blocks */ - err = cts_cbc_decrypt(ctx, desc, dst, src, - cbc_blocks * bsize, - nbytes - (cbc_blocks * bsize)); - } - } - return err; + return crypto_skcipher_decrypt(subreq) ?: + cts_cbc_decrypt(req); } -static int crypto_cts_init_tfm(struct crypto_tfm *tfm) +static int crypto_cts_init_tfm(struct crypto_skcipher *tfm) { - struct crypto_instance *inst = (void *)tfm->__crt_alg; - struct crypto_spawn *spawn = crypto_instance_ctx(inst); - struct crypto_cts_ctx *ctx = crypto_tfm_ctx(tfm); - struct crypto_blkcipher *cipher; - - cipher = crypto_spawn_blkcipher(spawn); + struct skcipher_instance *inst = skcipher_alg_instance(tfm); + struct crypto_skcipher_spawn *spawn = skcipher_instance_ctx(inst); + struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_skcipher *cipher; + unsigned reqsize; + unsigned bsize; + unsigned align; + + cipher = crypto_spawn_skcipher2(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); ctx->child = cipher; + + align = crypto_skcipher_alignmask(tfm); + bsize = crypto_skcipher_blocksize(cipher); + reqsize = ALIGN(sizeof(struct crypto_cts_reqctx) + + crypto_skcipher_reqsize(cipher), + crypto_tfm_ctx_alignment()) + + (align & ~(crypto_tfm_ctx_alignment() - 1)) + bsize; + + crypto_skcipher_set_reqsize(tfm, reqsize); + return 0; } -static void crypto_cts_exit_tfm(struct crypto_tfm *tfm) +static void crypto_cts_exit_tfm(struct crypto_skcipher *tfm) { - struct crypto_cts_ctx *ctx = crypto_tfm_ctx(tfm); - crypto_free_blkcipher(ctx->child); + struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(tfm); + + crypto_free_skcipher(ctx->child); } -static struct crypto_instance *crypto_cts_alloc(struct rtattr **tb) +static void crypto_cts_free(struct skcipher_instance *inst) { - struct crypto_instance *inst; - struct crypto_alg *alg; + crypto_drop_skcipher(skcipher_instance_ctx(inst)); + kfree(inst); +} + +static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb) +{ + struct crypto_skcipher_spawn *spawn; + struct skcipher_instance *inst; + struct crypto_attr_type *algt; + struct skcipher_alg *alg; + const char *cipher_name; int err; - err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER); + algt = crypto_get_attr_type(tb); + if (IS_ERR(algt)) + return PTR_ERR(algt); + + if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask) + return -EINVAL; + + cipher_name = crypto_attr_alg_name(tb[1]); + if (IS_ERR(cipher_name)) + return PTR_ERR(cipher_name); + + inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + spawn = skcipher_instance_ctx(inst); + + crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst)); + err = crypto_grab_skcipher2(spawn, cipher_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); if (err) - return ERR_PTR(err); + goto err_free_inst; - alg = crypto_attr_alg(tb[1], CRYPTO_ALG_TYPE_BLKCIPHER, - CRYPTO_ALG_TYPE_MASK); - if (IS_ERR(alg)) - return ERR_CAST(alg); + alg = crypto_spawn_skcipher_alg(spawn); - inst = ERR_PTR(-EINVAL); - if (!is_power_of_2(alg->cra_blocksize)) - goto out_put_alg; + err = -EINVAL; + if (crypto_skcipher_alg_ivsize(alg) != alg->base.cra_blocksize) + goto err_drop_spawn; - if (strncmp(alg->cra_name, "cbc(", 4)) - goto out_put_alg; + if (strncmp(alg->base.cra_name, "cbc(", 4)) + goto err_drop_spawn; - inst = crypto_alloc_instance("cts", alg); - if (IS_ERR(inst)) - goto out_put_alg; + err = crypto_inst_setname(skcipher_crypto_instance(inst), "cts", + &alg->base); + if (err) + goto err_drop_spawn; - inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER; - inst->alg.cra_priority = alg->cra_priority; - inst->alg.cra_blocksize = alg->cra_blocksize; - inst->alg.cra_alignmask = alg->cra_alignmask; - inst->alg.cra_type = &crypto_blkcipher_type; + inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC; + inst->alg.base.cra_priority = alg->base.cra_priority; + inst->alg.base.cra_blocksize = alg->base.cra_blocksize; + inst->alg.base.cra_alignmask = alg->base.cra_alignmask; /* We access the data as u32s when xoring. */ - inst->alg.cra_alignmask |= __alignof__(u32) - 1; + inst->alg.base.cra_alignmask |= __alignof__(u32) - 1; - inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize; - inst->alg.cra_blkcipher.min_keysize = alg->cra_blkcipher.min_keysize; - inst->alg.cra_blkcipher.max_keysize = alg->cra_blkcipher.max_keysize; + inst->alg.ivsize = alg->base.cra_blocksize; + inst->alg.chunksize = crypto_skcipher_alg_chunksize(alg); + inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg); + inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg); - inst->alg.cra_ctxsize = sizeof(struct crypto_cts_ctx); + inst->alg.base.cra_ctxsize = sizeof(struct crypto_cts_ctx); - inst->alg.cra_init = crypto_cts_init_tfm; - inst->alg.cra_exit = crypto_cts_exit_tfm; + inst->alg.init = crypto_cts_init_tfm; + inst->alg.exit = crypto_cts_exit_tfm; - inst->alg.cra_blkcipher.setkey = crypto_cts_setkey; - inst->alg.cra_blkcipher.encrypt = crypto_cts_encrypt; - inst->alg.cra_blkcipher.decrypt = crypto_cts_decrypt; + inst->alg.setkey = crypto_cts_setkey; + inst->alg.encrypt = crypto_cts_encrypt; + inst->alg.decrypt = crypto_cts_decrypt; -out_put_alg: - crypto_mod_put(alg); - return inst; -} + inst->free = crypto_cts_free; -static void crypto_cts_free(struct crypto_instance *inst) -{ - crypto_drop_spawn(crypto_instance_ctx(inst)); + err = skcipher_register_instance(tmpl, inst); + if (err) + goto err_drop_spawn; + +out: + return err; + +err_drop_spawn: + crypto_drop_skcipher(spawn); +err_free_inst: kfree(inst); + goto out; } static struct crypto_template crypto_cts_tmpl = { .name = "cts", - .alloc = crypto_cts_alloc, - .free = crypto_cts_free, + .create = crypto_cts_create, .module = THIS_MODULE, }; From 6cf80a296575723aed6ce6c695581540202bfc6b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:49 +0800 Subject: [PATCH 162/180] crypto: user - Remove crypto_lookup_skcipher call As there are no more kernel users of built-in IV generators we can remove the special lookup for skciphers. Signed-off-by: Herbert Xu --- crypto/crypto_user.c | 37 +------------------------------------ 1 file changed, 1 insertion(+), 36 deletions(-) diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index d28513fb5a90b..c24a40c8a4bfd 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -378,32 +378,6 @@ static int crypto_del_alg(struct sk_buff *skb, struct nlmsghdr *nlh, return err; } -static struct crypto_alg *crypto_user_skcipher_alg(const char *name, u32 type, - u32 mask) -{ - int err; - struct crypto_alg *alg; - - type = crypto_skcipher_type(type); - mask = crypto_skcipher_mask(mask); - - for (;;) { - alg = crypto_lookup_skcipher(name, type, mask); - if (!IS_ERR(alg)) - return alg; - - err = PTR_ERR(alg); - if (err != -EAGAIN) - break; - if (fatal_signal_pending(current)) { - err = -EINTR; - break; - } - } - - return ERR_PTR(err); -} - static int crypto_add_alg(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { @@ -436,16 +410,7 @@ static int crypto_add_alg(struct sk_buff *skb, struct nlmsghdr *nlh, else name = p->cru_name; - switch (p->cru_type & p->cru_mask & CRYPTO_ALG_TYPE_MASK) { - case CRYPTO_ALG_TYPE_GIVCIPHER: - case CRYPTO_ALG_TYPE_BLKCIPHER: - case CRYPTO_ALG_TYPE_ABLKCIPHER: - alg = crypto_user_skcipher_alg(name, p->cru_type, p->cru_mask); - break; - default: - alg = crypto_alg_mod_lookup(name, p->cru_type, p->cru_mask); - } - + alg = crypto_alg_mod_lookup(name, p->cru_type, p->cru_mask); if (IS_ERR(alg)) return PTR_ERR(alg); From 3a01d0ee2b991c8c267620e63a4ab47cd8c30cc4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:50 +0800 Subject: [PATCH 163/180] crypto: skcipher - Remove top-level givcipher interface This patch removes the old crypto_grab_skcipher helper and replaces it with crypto_grab_skcipher2. As this is the final entry point into givcipher this patch also removes all traces of the top-level givcipher interface, including all implicit IV generators such as chainiv. The bottom-level givcipher interface remains until the drivers using it are converted. Signed-off-by: Herbert Xu --- crypto/Makefile | 2 - crypto/ablkcipher.c | 222 -------------------- crypto/blkcipher.c | 185 ----------------- crypto/chainiv.c | 317 ----------------------------- crypto/eseqiv.c | 242 ---------------------- crypto/seqiv.c | 162 +-------------- crypto/skcipher.c | 4 +- include/crypto/internal/skcipher.h | 63 ++---- include/crypto/skcipher.h | 76 ------- include/linux/crypto.h | 19 -- 10 files changed, 18 insertions(+), 1274 deletions(-) delete mode 100644 crypto/chainiv.c delete mode 100644 crypto/eseqiv.c diff --git a/crypto/Makefile b/crypto/Makefile index df1bcfb090d21..99cc64ac70efc 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -20,8 +20,6 @@ crypto_blkcipher-y := ablkcipher.o crypto_blkcipher-y += blkcipher.o crypto_blkcipher-y += skcipher.o obj-$(CONFIG_CRYPTO_BLKCIPHER2) += crypto_blkcipher.o -obj-$(CONFIG_CRYPTO_BLKCIPHER2) += chainiv.o -obj-$(CONFIG_CRYPTO_BLKCIPHER2) += eseqiv.o obj-$(CONFIG_CRYPTO_SEQIV) += seqiv.o obj-$(CONFIG_CRYPTO_ECHAINIV) += echainiv.o diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c index 6b80516778c64..d676fc59521a0 100644 --- a/crypto/ablkcipher.c +++ b/crypto/ablkcipher.c @@ -16,8 +16,6 @@ #include #include #include -#include -#include #include #include #include @@ -348,16 +346,6 @@ static unsigned int crypto_ablkcipher_ctxsize(struct crypto_alg *alg, u32 type, return alg->cra_ctxsize; } -int skcipher_null_givencrypt(struct skcipher_givcrypt_request *req) -{ - return crypto_ablkcipher_encrypt(&req->creq); -} - -int skcipher_null_givdecrypt(struct skcipher_givcrypt_request *req) -{ - return crypto_ablkcipher_decrypt(&req->creq); -} - static int crypto_init_ablkcipher_ops(struct crypto_tfm *tfm, u32 type, u32 mask) { @@ -370,10 +358,6 @@ static int crypto_init_ablkcipher_ops(struct crypto_tfm *tfm, u32 type, crt->setkey = setkey; crt->encrypt = alg->encrypt; crt->decrypt = alg->decrypt; - if (!alg->ivsize) { - crt->givencrypt = skcipher_null_givencrypt; - crt->givdecrypt = skcipher_null_givdecrypt; - } crt->base = __crypto_ablkcipher_cast(tfm); crt->ivsize = alg->ivsize; @@ -435,11 +419,6 @@ const struct crypto_type crypto_ablkcipher_type = { }; EXPORT_SYMBOL_GPL(crypto_ablkcipher_type); -static int no_givdecrypt(struct skcipher_givcrypt_request *req) -{ - return -ENOSYS; -} - static int crypto_init_givcipher_ops(struct crypto_tfm *tfm, u32 type, u32 mask) { @@ -453,8 +432,6 @@ static int crypto_init_givcipher_ops(struct crypto_tfm *tfm, u32 type, alg->setkey : setkey; crt->encrypt = alg->encrypt; crt->decrypt = alg->decrypt; - crt->givencrypt = alg->givencrypt ?: no_givdecrypt; - crt->givdecrypt = alg->givdecrypt ?: no_givdecrypt; crt->base = __crypto_ablkcipher_cast(tfm); crt->ivsize = alg->ivsize; @@ -515,202 +492,3 @@ const struct crypto_type crypto_givcipher_type = { .report = crypto_givcipher_report, }; EXPORT_SYMBOL_GPL(crypto_givcipher_type); - -const char *crypto_default_geniv(const struct crypto_alg *alg) -{ - if (((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == - CRYPTO_ALG_TYPE_BLKCIPHER ? alg->cra_blkcipher.ivsize : - alg->cra_ablkcipher.ivsize) != - alg->cra_blocksize) - return "chainiv"; - - return "eseqiv"; -} - -static int crypto_givcipher_default(struct crypto_alg *alg, u32 type, u32 mask) -{ - struct rtattr *tb[3]; - struct { - struct rtattr attr; - struct crypto_attr_type data; - } ptype; - struct { - struct rtattr attr; - struct crypto_attr_alg data; - } palg; - struct crypto_template *tmpl; - struct crypto_instance *inst; - struct crypto_alg *larval; - const char *geniv; - int err; - - larval = crypto_larval_lookup(alg->cra_driver_name, - (type & ~CRYPTO_ALG_TYPE_MASK) | - CRYPTO_ALG_TYPE_GIVCIPHER, - mask | CRYPTO_ALG_TYPE_MASK); - err = PTR_ERR(larval); - if (IS_ERR(larval)) - goto out; - - err = -EAGAIN; - if (!crypto_is_larval(larval)) - goto drop_larval; - - ptype.attr.rta_len = sizeof(ptype); - ptype.attr.rta_type = CRYPTOA_TYPE; - ptype.data.type = type | CRYPTO_ALG_GENIV; - /* GENIV tells the template that we're making a default geniv. */ - ptype.data.mask = mask | CRYPTO_ALG_GENIV; - tb[0] = &ptype.attr; - - palg.attr.rta_len = sizeof(palg); - palg.attr.rta_type = CRYPTOA_ALG; - /* Must use the exact name to locate ourselves. */ - memcpy(palg.data.name, alg->cra_driver_name, CRYPTO_MAX_ALG_NAME); - tb[1] = &palg.attr; - - tb[2] = NULL; - - if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == - CRYPTO_ALG_TYPE_BLKCIPHER) - geniv = alg->cra_blkcipher.geniv; - else - geniv = alg->cra_ablkcipher.geniv; - - if (!geniv) - geniv = crypto_default_geniv(alg); - - tmpl = crypto_lookup_template(geniv); - err = -ENOENT; - if (!tmpl) - goto kill_larval; - - if (tmpl->create) { - err = tmpl->create(tmpl, tb); - if (err) - goto put_tmpl; - goto ok; - } - - inst = tmpl->alloc(tb); - err = PTR_ERR(inst); - if (IS_ERR(inst)) - goto put_tmpl; - - err = crypto_register_instance(tmpl, inst); - if (err) { - tmpl->free(inst); - goto put_tmpl; - } - -ok: - /* Redo the lookup to use the instance we just registered. */ - err = -EAGAIN; - -put_tmpl: - crypto_tmpl_put(tmpl); -kill_larval: - crypto_larval_kill(larval); -drop_larval: - crypto_mod_put(larval); -out: - crypto_mod_put(alg); - return err; -} - -struct crypto_alg *crypto_lookup_skcipher(const char *name, u32 type, u32 mask) -{ - struct crypto_alg *alg; - - alg = crypto_alg_mod_lookup(name, type, mask); - if (IS_ERR(alg)) - return alg; - - if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == - CRYPTO_ALG_TYPE_GIVCIPHER) - return alg; - - if (!((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == - CRYPTO_ALG_TYPE_BLKCIPHER ? alg->cra_blkcipher.ivsize : - alg->cra_ablkcipher.ivsize)) - return alg; - - crypto_mod_put(alg); - alg = crypto_alg_mod_lookup(name, type | CRYPTO_ALG_TESTED, - mask & ~CRYPTO_ALG_TESTED); - if (IS_ERR(alg)) - return alg; - - if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == - CRYPTO_ALG_TYPE_GIVCIPHER) { - if (~alg->cra_flags & (type ^ ~mask) & CRYPTO_ALG_TESTED) { - crypto_mod_put(alg); - alg = ERR_PTR(-ENOENT); - } - return alg; - } - - BUG_ON(!((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == - CRYPTO_ALG_TYPE_BLKCIPHER ? alg->cra_blkcipher.ivsize : - alg->cra_ablkcipher.ivsize)); - - return ERR_PTR(crypto_givcipher_default(alg, type, mask)); -} -EXPORT_SYMBOL_GPL(crypto_lookup_skcipher); - -int crypto_grab_skcipher(struct crypto_skcipher_spawn *spawn, const char *name, - u32 type, u32 mask) -{ - struct crypto_alg *alg; - int err; - - type = crypto_skcipher_type(type); - mask = crypto_skcipher_mask(mask); - - alg = crypto_lookup_skcipher(name, type, mask); - if (IS_ERR(alg)) - return PTR_ERR(alg); - - err = crypto_init_spawn(&spawn->base, alg, spawn->base.inst, mask); - crypto_mod_put(alg); - return err; -} -EXPORT_SYMBOL_GPL(crypto_grab_skcipher); - -struct crypto_ablkcipher *crypto_alloc_ablkcipher(const char *alg_name, - u32 type, u32 mask) -{ - struct crypto_tfm *tfm; - int err; - - type = crypto_skcipher_type(type); - mask = crypto_skcipher_mask(mask); - - for (;;) { - struct crypto_alg *alg; - - alg = crypto_lookup_skcipher(alg_name, type, mask); - if (IS_ERR(alg)) { - err = PTR_ERR(alg); - goto err; - } - - tfm = __crypto_alloc_tfm(alg, type, mask); - if (!IS_ERR(tfm)) - return __crypto_ablkcipher_cast(tfm); - - crypto_mod_put(alg); - err = PTR_ERR(tfm); - -err: - if (err != -EAGAIN) - break; - if (fatal_signal_pending(current)) { - err = -EINTR; - break; - } - } - - return ERR_PTR(err); -} -EXPORT_SYMBOL_GPL(crypto_alloc_ablkcipher); diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c index 8cc1622b2ee00..3699995301084 100644 --- a/crypto/blkcipher.c +++ b/crypto/blkcipher.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -466,10 +465,6 @@ static int crypto_init_blkcipher_ops_async(struct crypto_tfm *tfm) crt->setkey = async_setkey; crt->encrypt = async_encrypt; crt->decrypt = async_decrypt; - if (!alg->ivsize) { - crt->givencrypt = skcipher_null_givencrypt; - crt->givdecrypt = skcipher_null_givdecrypt; - } crt->base = __crypto_ablkcipher_cast(tfm); crt->ivsize = alg->ivsize; @@ -560,185 +555,5 @@ const struct crypto_type crypto_blkcipher_type = { }; EXPORT_SYMBOL_GPL(crypto_blkcipher_type); -static int crypto_grab_nivcipher(struct crypto_skcipher_spawn *spawn, - const char *name, u32 type, u32 mask) -{ - struct crypto_alg *alg; - int err; - - type = crypto_skcipher_type(type); - mask = crypto_skcipher_mask(mask)| CRYPTO_ALG_GENIV; - - alg = crypto_alg_mod_lookup(name, type, mask); - if (IS_ERR(alg)) - return PTR_ERR(alg); - - err = crypto_init_spawn(&spawn->base, alg, spawn->base.inst, mask); - crypto_mod_put(alg); - return err; -} - -struct crypto_instance *skcipher_geniv_alloc(struct crypto_template *tmpl, - struct rtattr **tb, u32 type, - u32 mask) -{ - struct { - int (*setkey)(struct crypto_ablkcipher *tfm, const u8 *key, - unsigned int keylen); - int (*encrypt)(struct ablkcipher_request *req); - int (*decrypt)(struct ablkcipher_request *req); - - unsigned int min_keysize; - unsigned int max_keysize; - unsigned int ivsize; - - const char *geniv; - } balg; - const char *name; - struct crypto_skcipher_spawn *spawn; - struct crypto_attr_type *algt; - struct crypto_instance *inst; - struct crypto_alg *alg; - int err; - - algt = crypto_get_attr_type(tb); - if (IS_ERR(algt)) - return ERR_CAST(algt); - - if ((algt->type ^ (CRYPTO_ALG_TYPE_GIVCIPHER | CRYPTO_ALG_GENIV)) & - algt->mask) - return ERR_PTR(-EINVAL); - - name = crypto_attr_alg_name(tb[1]); - if (IS_ERR(name)) - return ERR_CAST(name); - - inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); - if (!inst) - return ERR_PTR(-ENOMEM); - - spawn = crypto_instance_ctx(inst); - - /* Ignore async algorithms if necessary. */ - mask |= crypto_requires_sync(algt->type, algt->mask); - - crypto_set_skcipher_spawn(spawn, inst); - err = crypto_grab_nivcipher(spawn, name, type, mask); - if (err) - goto err_free_inst; - - alg = crypto_skcipher_spawn_alg(spawn); - - if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == - CRYPTO_ALG_TYPE_BLKCIPHER) { - balg.ivsize = alg->cra_blkcipher.ivsize; - balg.min_keysize = alg->cra_blkcipher.min_keysize; - balg.max_keysize = alg->cra_blkcipher.max_keysize; - - balg.setkey = async_setkey; - balg.encrypt = async_encrypt; - balg.decrypt = async_decrypt; - - balg.geniv = alg->cra_blkcipher.geniv; - } else { - balg.ivsize = alg->cra_ablkcipher.ivsize; - balg.min_keysize = alg->cra_ablkcipher.min_keysize; - balg.max_keysize = alg->cra_ablkcipher.max_keysize; - - balg.setkey = alg->cra_ablkcipher.setkey; - balg.encrypt = alg->cra_ablkcipher.encrypt; - balg.decrypt = alg->cra_ablkcipher.decrypt; - - balg.geniv = alg->cra_ablkcipher.geniv; - } - - err = -EINVAL; - if (!balg.ivsize) - goto err_drop_alg; - - /* - * This is only true if we're constructing an algorithm with its - * default IV generator. For the default generator we elide the - * template name and double-check the IV generator. - */ - if (algt->mask & CRYPTO_ALG_GENIV) { - if (!balg.geniv) - balg.geniv = crypto_default_geniv(alg); - err = -EAGAIN; - if (strcmp(tmpl->name, balg.geniv)) - goto err_drop_alg; - - memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME); - memcpy(inst->alg.cra_driver_name, alg->cra_driver_name, - CRYPTO_MAX_ALG_NAME); - } else { - err = -ENAMETOOLONG; - if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME, - "%s(%s)", tmpl->name, alg->cra_name) >= - CRYPTO_MAX_ALG_NAME) - goto err_drop_alg; - if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, - "%s(%s)", tmpl->name, alg->cra_driver_name) >= - CRYPTO_MAX_ALG_NAME) - goto err_drop_alg; - } - - inst->alg.cra_flags = CRYPTO_ALG_TYPE_GIVCIPHER | CRYPTO_ALG_GENIV; - inst->alg.cra_flags |= alg->cra_flags & CRYPTO_ALG_ASYNC; - inst->alg.cra_priority = alg->cra_priority; - inst->alg.cra_blocksize = alg->cra_blocksize; - inst->alg.cra_alignmask = alg->cra_alignmask; - inst->alg.cra_type = &crypto_givcipher_type; - - inst->alg.cra_ablkcipher.ivsize = balg.ivsize; - inst->alg.cra_ablkcipher.min_keysize = balg.min_keysize; - inst->alg.cra_ablkcipher.max_keysize = balg.max_keysize; - inst->alg.cra_ablkcipher.geniv = balg.geniv; - - inst->alg.cra_ablkcipher.setkey = balg.setkey; - inst->alg.cra_ablkcipher.encrypt = balg.encrypt; - inst->alg.cra_ablkcipher.decrypt = balg.decrypt; - -out: - return inst; - -err_drop_alg: - crypto_drop_skcipher(spawn); -err_free_inst: - kfree(inst); - inst = ERR_PTR(err); - goto out; -} -EXPORT_SYMBOL_GPL(skcipher_geniv_alloc); - -void skcipher_geniv_free(struct crypto_instance *inst) -{ - crypto_drop_skcipher(crypto_instance_ctx(inst)); - kfree(inst); -} -EXPORT_SYMBOL_GPL(skcipher_geniv_free); - -int skcipher_geniv_init(struct crypto_tfm *tfm) -{ - struct crypto_instance *inst = (void *)tfm->__crt_alg; - struct crypto_ablkcipher *cipher; - - cipher = crypto_spawn_skcipher(crypto_instance_ctx(inst)); - if (IS_ERR(cipher)) - return PTR_ERR(cipher); - - tfm->crt_ablkcipher.base = cipher; - tfm->crt_ablkcipher.reqsize += crypto_ablkcipher_reqsize(cipher); - - return 0; -} -EXPORT_SYMBOL_GPL(skcipher_geniv_init); - -void skcipher_geniv_exit(struct crypto_tfm *tfm) -{ - crypto_free_ablkcipher(tfm->crt_ablkcipher.base); -} -EXPORT_SYMBOL_GPL(skcipher_geniv_exit); - MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Generic block chaining cipher type"); diff --git a/crypto/chainiv.c b/crypto/chainiv.c deleted file mode 100644 index b4340018c8d47..0000000000000 --- a/crypto/chainiv.c +++ /dev/null @@ -1,317 +0,0 @@ -/* - * chainiv: Chain IV Generator - * - * Generate IVs simply be using the last block of the previous encryption. - * This is mainly useful for CBC with a synchronous algorithm. - * - * Copyright (c) 2007 Herbert Xu - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -enum { - CHAINIV_STATE_INUSE = 0, -}; - -struct chainiv_ctx { - spinlock_t lock; - char iv[]; -}; - -struct async_chainiv_ctx { - unsigned long state; - - spinlock_t lock; - int err; - - struct crypto_queue queue; - struct work_struct postponed; - - char iv[]; -}; - -static int chainiv_givencrypt(struct skcipher_givcrypt_request *req) -{ - struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req); - struct chainiv_ctx *ctx = crypto_ablkcipher_ctx(geniv); - struct ablkcipher_request *subreq = skcipher_givcrypt_reqctx(req); - unsigned int ivsize; - int err; - - ablkcipher_request_set_tfm(subreq, skcipher_geniv_cipher(geniv)); - ablkcipher_request_set_callback(subreq, req->creq.base.flags & - ~CRYPTO_TFM_REQ_MAY_SLEEP, - req->creq.base.complete, - req->creq.base.data); - ablkcipher_request_set_crypt(subreq, req->creq.src, req->creq.dst, - req->creq.nbytes, req->creq.info); - - spin_lock_bh(&ctx->lock); - - ivsize = crypto_ablkcipher_ivsize(geniv); - - memcpy(req->giv, ctx->iv, ivsize); - memcpy(subreq->info, ctx->iv, ivsize); - - err = crypto_ablkcipher_encrypt(subreq); - if (err) - goto unlock; - - memcpy(ctx->iv, subreq->info, ivsize); - -unlock: - spin_unlock_bh(&ctx->lock); - - return err; -} - -static int chainiv_init_common(struct crypto_tfm *tfm, char iv[]) -{ - struct crypto_ablkcipher *geniv = __crypto_ablkcipher_cast(tfm); - int err = 0; - - tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request); - - if (iv) { - err = crypto_rng_get_bytes(crypto_default_rng, iv, - crypto_ablkcipher_ivsize(geniv)); - crypto_put_default_rng(); - } - - return err ?: skcipher_geniv_init(tfm); -} - -static int chainiv_init(struct crypto_tfm *tfm) -{ - struct crypto_ablkcipher *geniv = __crypto_ablkcipher_cast(tfm); - struct chainiv_ctx *ctx = crypto_tfm_ctx(tfm); - char *iv; - - spin_lock_init(&ctx->lock); - - iv = NULL; - if (!crypto_get_default_rng()) { - crypto_ablkcipher_crt(geniv)->givencrypt = chainiv_givencrypt; - iv = ctx->iv; - } - - return chainiv_init_common(tfm, iv); -} - -static int async_chainiv_schedule_work(struct async_chainiv_ctx *ctx) -{ - int queued; - int err = ctx->err; - - if (!ctx->queue.qlen) { - smp_mb__before_atomic(); - clear_bit(CHAINIV_STATE_INUSE, &ctx->state); - - if (!ctx->queue.qlen || - test_and_set_bit(CHAINIV_STATE_INUSE, &ctx->state)) - goto out; - } - - queued = queue_work(kcrypto_wq, &ctx->postponed); - BUG_ON(!queued); - -out: - return err; -} - -static int async_chainiv_postpone_request(struct skcipher_givcrypt_request *req) -{ - struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req); - struct async_chainiv_ctx *ctx = crypto_ablkcipher_ctx(geniv); - int err; - - spin_lock_bh(&ctx->lock); - err = skcipher_enqueue_givcrypt(&ctx->queue, req); - spin_unlock_bh(&ctx->lock); - - if (test_and_set_bit(CHAINIV_STATE_INUSE, &ctx->state)) - return err; - - ctx->err = err; - return async_chainiv_schedule_work(ctx); -} - -static int async_chainiv_givencrypt_tail(struct skcipher_givcrypt_request *req) -{ - struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req); - struct async_chainiv_ctx *ctx = crypto_ablkcipher_ctx(geniv); - struct ablkcipher_request *subreq = skcipher_givcrypt_reqctx(req); - unsigned int ivsize = crypto_ablkcipher_ivsize(geniv); - - memcpy(req->giv, ctx->iv, ivsize); - memcpy(subreq->info, ctx->iv, ivsize); - - ctx->err = crypto_ablkcipher_encrypt(subreq); - if (ctx->err) - goto out; - - memcpy(ctx->iv, subreq->info, ivsize); - -out: - return async_chainiv_schedule_work(ctx); -} - -static int async_chainiv_givencrypt(struct skcipher_givcrypt_request *req) -{ - struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req); - struct async_chainiv_ctx *ctx = crypto_ablkcipher_ctx(geniv); - struct ablkcipher_request *subreq = skcipher_givcrypt_reqctx(req); - - ablkcipher_request_set_tfm(subreq, skcipher_geniv_cipher(geniv)); - ablkcipher_request_set_callback(subreq, req->creq.base.flags, - req->creq.base.complete, - req->creq.base.data); - ablkcipher_request_set_crypt(subreq, req->creq.src, req->creq.dst, - req->creq.nbytes, req->creq.info); - - if (test_and_set_bit(CHAINIV_STATE_INUSE, &ctx->state)) - goto postpone; - - if (ctx->queue.qlen) { - clear_bit(CHAINIV_STATE_INUSE, &ctx->state); - goto postpone; - } - - return async_chainiv_givencrypt_tail(req); - -postpone: - return async_chainiv_postpone_request(req); -} - -static void async_chainiv_do_postponed(struct work_struct *work) -{ - struct async_chainiv_ctx *ctx = container_of(work, - struct async_chainiv_ctx, - postponed); - struct skcipher_givcrypt_request *req; - struct ablkcipher_request *subreq; - int err; - - /* Only handle one request at a time to avoid hogging keventd. */ - spin_lock_bh(&ctx->lock); - req = skcipher_dequeue_givcrypt(&ctx->queue); - spin_unlock_bh(&ctx->lock); - - if (!req) { - async_chainiv_schedule_work(ctx); - return; - } - - subreq = skcipher_givcrypt_reqctx(req); - subreq->base.flags |= CRYPTO_TFM_REQ_MAY_SLEEP; - - err = async_chainiv_givencrypt_tail(req); - - local_bh_disable(); - skcipher_givcrypt_complete(req, err); - local_bh_enable(); -} - -static int async_chainiv_init(struct crypto_tfm *tfm) -{ - struct crypto_ablkcipher *geniv = __crypto_ablkcipher_cast(tfm); - struct async_chainiv_ctx *ctx = crypto_tfm_ctx(tfm); - char *iv; - - spin_lock_init(&ctx->lock); - - crypto_init_queue(&ctx->queue, 100); - INIT_WORK(&ctx->postponed, async_chainiv_do_postponed); - - iv = NULL; - if (!crypto_get_default_rng()) { - crypto_ablkcipher_crt(geniv)->givencrypt = - async_chainiv_givencrypt; - iv = ctx->iv; - } - - return chainiv_init_common(tfm, iv); -} - -static void async_chainiv_exit(struct crypto_tfm *tfm) -{ - struct async_chainiv_ctx *ctx = crypto_tfm_ctx(tfm); - - BUG_ON(test_bit(CHAINIV_STATE_INUSE, &ctx->state) || ctx->queue.qlen); - - skcipher_geniv_exit(tfm); -} - -static struct crypto_template chainiv_tmpl; - -static struct crypto_instance *chainiv_alloc(struct rtattr **tb) -{ - struct crypto_attr_type *algt; - struct crypto_instance *inst; - - algt = crypto_get_attr_type(tb); - if (IS_ERR(algt)) - return ERR_CAST(algt); - - inst = skcipher_geniv_alloc(&chainiv_tmpl, tb, 0, 0); - if (IS_ERR(inst)) - goto out; - - inst->alg.cra_init = chainiv_init; - inst->alg.cra_exit = skcipher_geniv_exit; - - inst->alg.cra_ctxsize = sizeof(struct chainiv_ctx); - - if (!crypto_requires_sync(algt->type, algt->mask)) { - inst->alg.cra_flags |= CRYPTO_ALG_ASYNC; - - inst->alg.cra_init = async_chainiv_init; - inst->alg.cra_exit = async_chainiv_exit; - - inst->alg.cra_ctxsize = sizeof(struct async_chainiv_ctx); - } - - inst->alg.cra_ctxsize += inst->alg.cra_ablkcipher.ivsize; - -out: - return inst; -} - -static struct crypto_template chainiv_tmpl = { - .name = "chainiv", - .alloc = chainiv_alloc, - .free = skcipher_geniv_free, - .module = THIS_MODULE, -}; - -static int __init chainiv_module_init(void) -{ - return crypto_register_template(&chainiv_tmpl); -} - -static void chainiv_module_exit(void) -{ - crypto_unregister_template(&chainiv_tmpl); -} - -module_init(chainiv_module_init); -module_exit(chainiv_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Chain IV Generator"); -MODULE_ALIAS_CRYPTO("chainiv"); diff --git a/crypto/eseqiv.c b/crypto/eseqiv.c deleted file mode 100644 index 16dda72fc4f88..0000000000000 --- a/crypto/eseqiv.c +++ /dev/null @@ -1,242 +0,0 @@ -/* - * eseqiv: Encrypted Sequence Number IV Generator - * - * This generator generates an IV based on a sequence number by xoring it - * with a salt and then encrypting it with the same key as used to encrypt - * the plain text. This algorithm requires that the block size be equal - * to the IV size. It is mainly useful for CBC. - * - * Copyright (c) 2007 Herbert Xu - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct eseqiv_request_ctx { - struct scatterlist src[2]; - struct scatterlist dst[2]; - char tail[]; -}; - -struct eseqiv_ctx { - spinlock_t lock; - unsigned int reqoff; - char salt[]; -}; - -static void eseqiv_complete2(struct skcipher_givcrypt_request *req) -{ - struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req); - struct eseqiv_request_ctx *reqctx = skcipher_givcrypt_reqctx(req); - - memcpy(req->giv, PTR_ALIGN((u8 *)reqctx->tail, - crypto_ablkcipher_alignmask(geniv) + 1), - crypto_ablkcipher_ivsize(geniv)); -} - -static void eseqiv_complete(struct crypto_async_request *base, int err) -{ - struct skcipher_givcrypt_request *req = base->data; - - if (err) - goto out; - - eseqiv_complete2(req); - -out: - skcipher_givcrypt_complete(req, err); -} - -static int eseqiv_givencrypt(struct skcipher_givcrypt_request *req) -{ - struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req); - struct eseqiv_ctx *ctx = crypto_ablkcipher_ctx(geniv); - struct eseqiv_request_ctx *reqctx = skcipher_givcrypt_reqctx(req); - struct ablkcipher_request *subreq; - crypto_completion_t compl; - void *data; - struct scatterlist *osrc, *odst; - struct scatterlist *dst; - struct page *srcp; - struct page *dstp; - u8 *giv; - u8 *vsrc; - u8 *vdst; - __be64 seq; - unsigned int ivsize; - unsigned int len; - int err; - - subreq = (void *)(reqctx->tail + ctx->reqoff); - ablkcipher_request_set_tfm(subreq, skcipher_geniv_cipher(geniv)); - - giv = req->giv; - compl = req->creq.base.complete; - data = req->creq.base.data; - - osrc = req->creq.src; - odst = req->creq.dst; - srcp = sg_page(osrc); - dstp = sg_page(odst); - vsrc = PageHighMem(srcp) ? NULL : page_address(srcp) + osrc->offset; - vdst = PageHighMem(dstp) ? NULL : page_address(dstp) + odst->offset; - - ivsize = crypto_ablkcipher_ivsize(geniv); - - if (vsrc != giv + ivsize && vdst != giv + ivsize) { - giv = PTR_ALIGN((u8 *)reqctx->tail, - crypto_ablkcipher_alignmask(geniv) + 1); - compl = eseqiv_complete; - data = req; - } - - ablkcipher_request_set_callback(subreq, req->creq.base.flags, compl, - data); - - sg_init_table(reqctx->src, 2); - sg_set_buf(reqctx->src, giv, ivsize); - scatterwalk_crypto_chain(reqctx->src, osrc, vsrc == giv + ivsize, 2); - - dst = reqctx->src; - if (osrc != odst) { - sg_init_table(reqctx->dst, 2); - sg_set_buf(reqctx->dst, giv, ivsize); - scatterwalk_crypto_chain(reqctx->dst, odst, vdst == giv + ivsize, 2); - - dst = reqctx->dst; - } - - ablkcipher_request_set_crypt(subreq, reqctx->src, dst, - req->creq.nbytes + ivsize, - req->creq.info); - - memcpy(req->creq.info, ctx->salt, ivsize); - - len = ivsize; - if (ivsize > sizeof(u64)) { - memset(req->giv, 0, ivsize - sizeof(u64)); - len = sizeof(u64); - } - seq = cpu_to_be64(req->seq); - memcpy(req->giv + ivsize - len, &seq, len); - - err = crypto_ablkcipher_encrypt(subreq); - if (err) - goto out; - - if (giv != req->giv) - eseqiv_complete2(req); - -out: - return err; -} - -static int eseqiv_init(struct crypto_tfm *tfm) -{ - struct crypto_ablkcipher *geniv = __crypto_ablkcipher_cast(tfm); - struct eseqiv_ctx *ctx = crypto_ablkcipher_ctx(geniv); - unsigned long alignmask; - unsigned int reqsize; - int err; - - spin_lock_init(&ctx->lock); - - alignmask = crypto_tfm_ctx_alignment() - 1; - reqsize = sizeof(struct eseqiv_request_ctx); - - if (alignmask & reqsize) { - alignmask &= reqsize; - alignmask--; - } - - alignmask = ~alignmask; - alignmask &= crypto_ablkcipher_alignmask(geniv); - - reqsize += alignmask; - reqsize += crypto_ablkcipher_ivsize(geniv); - reqsize = ALIGN(reqsize, crypto_tfm_ctx_alignment()); - - ctx->reqoff = reqsize - sizeof(struct eseqiv_request_ctx); - - tfm->crt_ablkcipher.reqsize = reqsize + - sizeof(struct ablkcipher_request); - - err = 0; - if (!crypto_get_default_rng()) { - crypto_ablkcipher_crt(geniv)->givencrypt = eseqiv_givencrypt; - err = crypto_rng_get_bytes(crypto_default_rng, ctx->salt, - crypto_ablkcipher_ivsize(geniv)); - crypto_put_default_rng(); - } - - return err ?: skcipher_geniv_init(tfm); -} - -static struct crypto_template eseqiv_tmpl; - -static struct crypto_instance *eseqiv_alloc(struct rtattr **tb) -{ - struct crypto_instance *inst; - int err; - - inst = skcipher_geniv_alloc(&eseqiv_tmpl, tb, 0, 0); - if (IS_ERR(inst)) - goto out; - - err = -EINVAL; - if (inst->alg.cra_ablkcipher.ivsize != inst->alg.cra_blocksize) - goto free_inst; - - inst->alg.cra_init = eseqiv_init; - inst->alg.cra_exit = skcipher_geniv_exit; - - inst->alg.cra_ctxsize = sizeof(struct eseqiv_ctx); - inst->alg.cra_ctxsize += inst->alg.cra_ablkcipher.ivsize; - -out: - return inst; - -free_inst: - skcipher_geniv_free(inst); - inst = ERR_PTR(err); - goto out; -} - -static struct crypto_template eseqiv_tmpl = { - .name = "eseqiv", - .alloc = eseqiv_alloc, - .free = skcipher_geniv_free, - .module = THIS_MODULE, -}; - -static int __init eseqiv_module_init(void) -{ - return crypto_register_template(&eseqiv_tmpl); -} - -static void __exit eseqiv_module_exit(void) -{ - crypto_unregister_template(&eseqiv_tmpl); -} - -module_init(eseqiv_module_init); -module_exit(eseqiv_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Encrypted Sequence Number IV Generator"); -MODULE_ALIAS_CRYPTO("eseqiv"); diff --git a/crypto/seqiv.c b/crypto/seqiv.c index a859b3ae239d4..c7049231861f0 100644 --- a/crypto/seqiv.c +++ b/crypto/seqiv.c @@ -14,50 +14,17 @@ */ #include -#include -#include #include +#include #include #include #include #include #include -#include #include -struct seqiv_ctx { - spinlock_t lock; - u8 salt[] __attribute__ ((aligned(__alignof__(u32)))); -}; - static void seqiv_free(struct crypto_instance *inst); -static void seqiv_complete2(struct skcipher_givcrypt_request *req, int err) -{ - struct ablkcipher_request *subreq = skcipher_givcrypt_reqctx(req); - struct crypto_ablkcipher *geniv; - - if (err == -EINPROGRESS) - return; - - if (err) - goto out; - - geniv = skcipher_givcrypt_reqtfm(req); - memcpy(req->creq.info, subreq->info, crypto_ablkcipher_ivsize(geniv)); - -out: - kfree(subreq->info); -} - -static void seqiv_complete(struct crypto_async_request *base, int err) -{ - struct skcipher_givcrypt_request *req = base->data; - - seqiv_complete2(req, err); - skcipher_givcrypt_complete(req, err); -} - static void seqiv_aead_encrypt_complete2(struct aead_request *req, int err) { struct aead_request *subreq = aead_request_ctx(req); @@ -85,65 +52,6 @@ static void seqiv_aead_encrypt_complete(struct crypto_async_request *base, aead_request_complete(req, err); } -static void seqiv_geniv(struct seqiv_ctx *ctx, u8 *info, u64 seq, - unsigned int ivsize) -{ - unsigned int len = ivsize; - - if (ivsize > sizeof(u64)) { - memset(info, 0, ivsize - sizeof(u64)); - len = sizeof(u64); - } - seq = cpu_to_be64(seq); - memcpy(info + ivsize - len, &seq, len); - crypto_xor(info, ctx->salt, ivsize); -} - -static int seqiv_givencrypt(struct skcipher_givcrypt_request *req) -{ - struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req); - struct seqiv_ctx *ctx = crypto_ablkcipher_ctx(geniv); - struct ablkcipher_request *subreq = skcipher_givcrypt_reqctx(req); - crypto_completion_t compl; - void *data; - u8 *info; - unsigned int ivsize; - int err; - - ablkcipher_request_set_tfm(subreq, skcipher_geniv_cipher(geniv)); - - compl = req->creq.base.complete; - data = req->creq.base.data; - info = req->creq.info; - - ivsize = crypto_ablkcipher_ivsize(geniv); - - if (unlikely(!IS_ALIGNED((unsigned long)info, - crypto_ablkcipher_alignmask(geniv) + 1))) { - info = kmalloc(ivsize, req->creq.base.flags & - CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL: - GFP_ATOMIC); - if (!info) - return -ENOMEM; - - compl = seqiv_complete; - data = req; - } - - ablkcipher_request_set_callback(subreq, req->creq.base.flags, compl, - data); - ablkcipher_request_set_crypt(subreq, req->creq.src, req->creq.dst, - req->creq.nbytes, info); - - seqiv_geniv(ctx, info, req->seq, ivsize); - memcpy(req->giv, info, ivsize); - - err = crypto_ablkcipher_encrypt(subreq); - if (unlikely(info != req->creq.info)) - seqiv_complete2(req, err); - return err; -} - static int seqiv_aead_encrypt(struct aead_request *req) { struct crypto_aead *geniv = crypto_aead_reqtfm(req); @@ -233,62 +141,6 @@ static int seqiv_aead_decrypt(struct aead_request *req) return crypto_aead_decrypt(subreq); } -static int seqiv_init(struct crypto_tfm *tfm) -{ - struct crypto_ablkcipher *geniv = __crypto_ablkcipher_cast(tfm); - struct seqiv_ctx *ctx = crypto_ablkcipher_ctx(geniv); - int err; - - spin_lock_init(&ctx->lock); - - tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request); - - err = 0; - if (!crypto_get_default_rng()) { - crypto_ablkcipher_crt(geniv)->givencrypt = seqiv_givencrypt; - err = crypto_rng_get_bytes(crypto_default_rng, ctx->salt, - crypto_ablkcipher_ivsize(geniv)); - crypto_put_default_rng(); - } - - return err ?: skcipher_geniv_init(tfm); -} - -static int seqiv_ablkcipher_create(struct crypto_template *tmpl, - struct rtattr **tb) -{ - struct crypto_instance *inst; - int err; - - inst = skcipher_geniv_alloc(tmpl, tb, 0, 0); - - if (IS_ERR(inst)) - return PTR_ERR(inst); - - err = -EINVAL; - if (inst->alg.cra_ablkcipher.ivsize < sizeof(u64)) - goto free_inst; - - inst->alg.cra_init = seqiv_init; - inst->alg.cra_exit = skcipher_geniv_exit; - - inst->alg.cra_ctxsize += inst->alg.cra_ablkcipher.ivsize; - inst->alg.cra_ctxsize += sizeof(struct seqiv_ctx); - - inst->alg.cra_alignmask |= __alignof__(u32) - 1; - - err = crypto_register_instance(tmpl, inst); - if (err) - goto free_inst; - -out: - return err; - -free_inst: - skcipher_geniv_free(inst); - goto out; -} - static int seqiv_aead_create(struct crypto_template *tmpl, struct rtattr **tb) { struct aead_instance *inst; @@ -334,26 +186,20 @@ static int seqiv_aead_create(struct crypto_template *tmpl, struct rtattr **tb) static int seqiv_create(struct crypto_template *tmpl, struct rtattr **tb) { struct crypto_attr_type *algt; - int err; algt = crypto_get_attr_type(tb); if (IS_ERR(algt)) return PTR_ERR(algt); if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & CRYPTO_ALG_TYPE_MASK) - err = seqiv_ablkcipher_create(tmpl, tb); - else - err = seqiv_aead_create(tmpl, tb); + return -EINVAL; - return err; + return seqiv_aead_create(tmpl, tb); } static void seqiv_free(struct crypto_instance *inst) { - if ((inst->alg.cra_flags ^ CRYPTO_ALG_TYPE_AEAD) & CRYPTO_ALG_TYPE_MASK) - skcipher_geniv_free(inst); - else - aead_geniv_free(aead_instance(inst)); + aead_geniv_free(aead_instance(inst)); } static struct crypto_template seqiv_tmpl = { diff --git a/crypto/skcipher.c b/crypto/skcipher.c index d248008e7f7bc..f7d0018dcaee6 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -325,13 +325,13 @@ static const struct crypto_type crypto_skcipher_type2 = { .tfmsize = offsetof(struct crypto_skcipher, base), }; -int crypto_grab_skcipher2(struct crypto_skcipher_spawn *spawn, +int crypto_grab_skcipher(struct crypto_skcipher_spawn *spawn, const char *name, u32 type, u32 mask) { spawn->base.frontend = &crypto_skcipher_type2; return crypto_grab_spawn(&spawn->base, name, type, mask); } -EXPORT_SYMBOL_GPL(crypto_grab_skcipher2); +EXPORT_SYMBOL_GPL(crypto_grab_skcipher); struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name, u32 type, u32 mask) diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h index ce6619c339fee..a21a95e1a375a 100644 --- a/include/crypto/internal/skcipher.h +++ b/include/crypto/internal/skcipher.h @@ -67,8 +67,12 @@ static inline void crypto_set_skcipher_spawn( int crypto_grab_skcipher(struct crypto_skcipher_spawn *spawn, const char *name, u32 type, u32 mask); -int crypto_grab_skcipher2(struct crypto_skcipher_spawn *spawn, - const char *name, u32 type, u32 mask); + +static inline int crypto_grab_skcipher2(struct crypto_skcipher_spawn *spawn, + const char *name, u32 type, u32 mask) +{ + return crypto_grab_skcipher(spawn, name, type, mask); +} struct crypto_alg *crypto_lookup_skcipher(const char *name, u32 type, u32 mask); @@ -77,30 +81,28 @@ static inline void crypto_drop_skcipher(struct crypto_skcipher_spawn *spawn) crypto_drop_spawn(&spawn->base); } -static inline struct crypto_alg *crypto_skcipher_spawn_alg( +static inline struct skcipher_alg *crypto_skcipher_spawn_alg( struct crypto_skcipher_spawn *spawn) { - return spawn->base.alg; + return container_of(spawn->base.alg, struct skcipher_alg, base); } static inline struct skcipher_alg *crypto_spawn_skcipher_alg( struct crypto_skcipher_spawn *spawn) { - return container_of(spawn->base.alg, struct skcipher_alg, base); + return crypto_skcipher_spawn_alg(spawn); } -static inline struct crypto_ablkcipher *crypto_spawn_skcipher( +static inline struct crypto_skcipher *crypto_spawn_skcipher( struct crypto_skcipher_spawn *spawn) { - return __crypto_ablkcipher_cast( - crypto_spawn_tfm(&spawn->base, crypto_skcipher_type(0), - crypto_skcipher_mask(0))); + return crypto_spawn_tfm2(&spawn->base); } static inline struct crypto_skcipher *crypto_spawn_skcipher2( struct crypto_skcipher_spawn *spawn) { - return crypto_spawn_tfm2(&spawn->base); + return crypto_spawn_skcipher(spawn); } static inline void crypto_skcipher_set_reqsize( @@ -116,53 +118,12 @@ void crypto_unregister_skciphers(struct skcipher_alg *algs, int count); int skcipher_register_instance(struct crypto_template *tmpl, struct skcipher_instance *inst); -int skcipher_null_givencrypt(struct skcipher_givcrypt_request *req); -int skcipher_null_givdecrypt(struct skcipher_givcrypt_request *req); -const char *crypto_default_geniv(const struct crypto_alg *alg); - -struct crypto_instance *skcipher_geniv_alloc(struct crypto_template *tmpl, - struct rtattr **tb, u32 type, - u32 mask); -void skcipher_geniv_free(struct crypto_instance *inst); -int skcipher_geniv_init(struct crypto_tfm *tfm); -void skcipher_geniv_exit(struct crypto_tfm *tfm); - -static inline struct crypto_ablkcipher *skcipher_geniv_cipher( - struct crypto_ablkcipher *geniv) -{ - return crypto_ablkcipher_crt(geniv)->base; -} - -static inline int skcipher_enqueue_givcrypt( - struct crypto_queue *queue, struct skcipher_givcrypt_request *request) -{ - return ablkcipher_enqueue_request(queue, &request->creq); -} - -static inline struct skcipher_givcrypt_request *skcipher_dequeue_givcrypt( - struct crypto_queue *queue) -{ - return skcipher_givcrypt_cast(crypto_dequeue_request(queue)); -} - -static inline void *skcipher_givcrypt_reqctx( - struct skcipher_givcrypt_request *req) -{ - return ablkcipher_request_ctx(&req->creq); -} - static inline void ablkcipher_request_complete(struct ablkcipher_request *req, int err) { req->base.complete(&req->base, err); } -static inline void skcipher_givcrypt_complete( - struct skcipher_givcrypt_request *req, int err) -{ - ablkcipher_request_complete(&req->creq, err); -} - static inline u32 ablkcipher_request_flags(struct ablkcipher_request *req) { return req->base.flags; diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h index a381f57ea6952..59c8f6c593e6b 100644 --- a/include/crypto/skcipher.h +++ b/include/crypto/skcipher.h @@ -139,82 +139,6 @@ struct skcipher_alg { crypto_skcipher_reqsize(tfm)] CRYPTO_MINALIGN_ATTR; \ struct skcipher_request *name = (void *)__##name##_desc -static inline struct crypto_ablkcipher *skcipher_givcrypt_reqtfm( - struct skcipher_givcrypt_request *req) -{ - return crypto_ablkcipher_reqtfm(&req->creq); -} - -static inline int crypto_skcipher_givencrypt( - struct skcipher_givcrypt_request *req) -{ - struct ablkcipher_tfm *crt = - crypto_ablkcipher_crt(skcipher_givcrypt_reqtfm(req)); - return crt->givencrypt(req); -}; - -static inline int crypto_skcipher_givdecrypt( - struct skcipher_givcrypt_request *req) -{ - struct ablkcipher_tfm *crt = - crypto_ablkcipher_crt(skcipher_givcrypt_reqtfm(req)); - return crt->givdecrypt(req); -}; - -static inline void skcipher_givcrypt_set_tfm( - struct skcipher_givcrypt_request *req, struct crypto_ablkcipher *tfm) -{ - req->creq.base.tfm = crypto_ablkcipher_tfm(tfm); -} - -static inline struct skcipher_givcrypt_request *skcipher_givcrypt_cast( - struct crypto_async_request *req) -{ - return container_of(ablkcipher_request_cast(req), - struct skcipher_givcrypt_request, creq); -} - -static inline struct skcipher_givcrypt_request *skcipher_givcrypt_alloc( - struct crypto_ablkcipher *tfm, gfp_t gfp) -{ - struct skcipher_givcrypt_request *req; - - req = kmalloc(sizeof(struct skcipher_givcrypt_request) + - crypto_ablkcipher_reqsize(tfm), gfp); - - if (likely(req)) - skcipher_givcrypt_set_tfm(req, tfm); - - return req; -} - -static inline void skcipher_givcrypt_free(struct skcipher_givcrypt_request *req) -{ - kfree(req); -} - -static inline void skcipher_givcrypt_set_callback( - struct skcipher_givcrypt_request *req, u32 flags, - crypto_completion_t compl, void *data) -{ - ablkcipher_request_set_callback(&req->creq, flags, compl, data); -} - -static inline void skcipher_givcrypt_set_crypt( - struct skcipher_givcrypt_request *req, - struct scatterlist *src, struct scatterlist *dst, - unsigned int nbytes, void *iv) -{ - ablkcipher_request_set_crypt(&req->creq, src, dst, nbytes, iv); -} - -static inline void skcipher_givcrypt_set_giv( - struct skcipher_givcrypt_request *req, u8 *giv, u64 seq) -{ - req->giv = giv; - req->seq = seq; -} - /** * DOC: Symmetric Key Cipher API * diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 37a652d1639d3..7cee5551625b6 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -488,8 +488,6 @@ struct ablkcipher_tfm { unsigned int keylen); int (*encrypt)(struct ablkcipher_request *req); int (*decrypt)(struct ablkcipher_request *req); - int (*givencrypt)(struct skcipher_givcrypt_request *req); - int (*givdecrypt)(struct skcipher_givcrypt_request *req); struct crypto_ablkcipher *base; @@ -714,23 +712,6 @@ static inline u32 crypto_skcipher_mask(u32 mask) * state information is unused by the kernel crypto API. */ -/** - * crypto_alloc_ablkcipher() - allocate asynchronous block cipher handle - * @alg_name: is the cra_name / name or cra_driver_name / driver name of the - * ablkcipher cipher - * @type: specifies the type of the cipher - * @mask: specifies the mask for the cipher - * - * Allocate a cipher handle for an ablkcipher. The returned struct - * crypto_ablkcipher is the cipher handle that is required for any subsequent - * API invocation for that ablkcipher. - * - * Return: allocated cipher handle in case of success; IS_ERR() is true in case - * of an error, PTR_ERR() returns the error code. - */ -struct crypto_ablkcipher *crypto_alloc_ablkcipher(const char *alg_name, - u32 type, u32 mask); - static inline struct crypto_tfm *crypto_ablkcipher_tfm( struct crypto_ablkcipher *tfm) { From 7c001a8650e67cb293120044c3f62756258ed27c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:52 +0800 Subject: [PATCH 164/180] crypto: omap - Stop using crypto scatterwalk_bytes_sglen We already have a generic function sg_nents_for_len which does the same thing. This patch switches omap over to it and also adds error handling in case the SG list is short. Signed-off-by: Herbert Xu --- drivers/crypto/omap-aes.c | 16 ++++++++++------ drivers/crypto/omap-des.c | 14 ++++++++++---- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 8178632de788f..4ab53a6043126 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -578,10 +578,12 @@ static int omap_aes_copy_sgs(struct omap_aes_dev *dd) sg_init_table(&dd->in_sgl, 1); sg_set_buf(&dd->in_sgl, buf_in, total); dd->in_sg = &dd->in_sgl; + dd->in_sg_len = 1; sg_init_table(&dd->out_sgl, 1); sg_set_buf(&dd->out_sgl, buf_out, total); dd->out_sg = &dd->out_sgl; + dd->out_sg_len = 1; return 0; } @@ -602,7 +604,6 @@ static int omap_aes_prepare_req(struct crypto_engine *engine, crypto_ablkcipher_reqtfm(req)); struct omap_aes_dev *dd = omap_aes_find_dev(ctx); struct omap_aes_reqctx *rctx; - int len; if (!dd) return -ENODEV; @@ -614,6 +615,14 @@ static int omap_aes_prepare_req(struct crypto_engine *engine, dd->in_sg = req->src; dd->out_sg = req->dst; + dd->in_sg_len = sg_nents_for_len(dd->in_sg, dd->total); + if (dd->in_sg_len < 0) + return dd->in_sg_len; + + dd->out_sg_len = sg_nents_for_len(dd->out_sg, dd->total); + if (dd->out_sg_len < 0) + return dd->out_sg_len; + if (omap_aes_check_aligned(dd->in_sg, dd->total) || omap_aes_check_aligned(dd->out_sg, dd->total)) { if (omap_aes_copy_sgs(dd)) @@ -623,11 +632,6 @@ static int omap_aes_prepare_req(struct crypto_engine *engine, dd->sgs_copied = 0; } - len = ALIGN(dd->total, AES_BLOCK_SIZE); - dd->in_sg_len = scatterwalk_bytes_sglen(dd->in_sg, len); - dd->out_sg_len = scatterwalk_bytes_sglen(dd->out_sg, len); - BUG_ON(dd->in_sg_len < 0 || dd->out_sg_len < 0); - rctx = ablkcipher_request_ctx(req); ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req)); rctx->mode &= FLAGS_MODE_MASK; diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c index 3eedb03111ba9..5691434ffb2d3 100644 --- a/drivers/crypto/omap-des.c +++ b/drivers/crypto/omap-des.c @@ -560,10 +560,12 @@ static int omap_des_copy_sgs(struct omap_des_dev *dd) sg_init_table(&dd->in_sgl, 1); sg_set_buf(&dd->in_sgl, buf_in, dd->total); dd->in_sg = &dd->in_sgl; + dd->in_sg_len = 1; sg_init_table(&dd->out_sgl, 1); sg_set_buf(&dd->out_sgl, buf_out, dd->total); dd->out_sg = &dd->out_sgl; + dd->out_sg_len = 1; return 0; } @@ -595,6 +597,14 @@ static int omap_des_prepare_req(struct crypto_engine *engine, dd->in_sg = req->src; dd->out_sg = req->dst; + dd->in_sg_len = sg_nents_for_len(dd->in_sg, dd->total); + if (dd->in_sg_len < 0) + return dd->in_sg_len; + + dd->out_sg_len = sg_nents_for_len(dd->out_sg, dd->total); + if (dd->out_sg_len < 0) + return dd->out_sg_len; + if (omap_des_copy_needed(dd->in_sg) || omap_des_copy_needed(dd->out_sg)) { if (omap_des_copy_sgs(dd)) @@ -604,10 +614,6 @@ static int omap_des_prepare_req(struct crypto_engine *engine, dd->sgs_copied = 0; } - dd->in_sg_len = scatterwalk_bytes_sglen(dd->in_sg, dd->total); - dd->out_sg_len = scatterwalk_bytes_sglen(dd->out_sg, dd->total); - BUG_ON(dd->in_sg_len < 0 || dd->out_sg_len < 0); - rctx = ablkcipher_request_ctx(req); ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req)); rctx->mode &= FLAGS_MODE_MASK; From 5506f53c7cc17c4ad5e69e5512a35faf77182986 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:53 +0800 Subject: [PATCH 165/180] crypto: scatterwalk - Remove scatterwalk_bytes_sglen This patch removes the now unused scatterwalk_bytes_sglen. Anyone using this out-of-tree should switch over to sg_nents_for_len. Signed-off-by: Herbert Xu --- crypto/scatterwalk.c | 22 ---------------------- include/crypto/scatterwalk.h | 2 -- 2 files changed, 24 deletions(-) diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c index ea5815c5e1281..03ca4aef1b9d4 100644 --- a/crypto/scatterwalk.c +++ b/crypto/scatterwalk.c @@ -125,28 +125,6 @@ void scatterwalk_map_and_copy(void *buf, struct scatterlist *sg, } EXPORT_SYMBOL_GPL(scatterwalk_map_and_copy); -int scatterwalk_bytes_sglen(struct scatterlist *sg, int num_bytes) -{ - int offset = 0, n = 0; - - /* num_bytes is too small */ - if (num_bytes < sg->length) - return -1; - - do { - offset += sg->length; - n++; - sg = sg_next(sg); - - /* num_bytes is too large */ - if (unlikely(!sg && (num_bytes < offset))) - return -1; - } while (sg && (num_bytes > offset)); - - return n; -} -EXPORT_SYMBOL_GPL(scatterwalk_bytes_sglen); - struct scatterlist *scatterwalk_ffwd(struct scatterlist dst[2], struct scatterlist *src, unsigned int len) diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h index 35f99b68d0370..7e1a33645eb5f 100644 --- a/include/crypto/scatterwalk.h +++ b/include/crypto/scatterwalk.h @@ -92,8 +92,6 @@ void scatterwalk_done(struct scatter_walk *walk, int out, int more); void scatterwalk_map_and_copy(void *buf, struct scatterlist *sg, unsigned int start, unsigned int nbytes, int out); -int scatterwalk_bytes_sglen(struct scatterlist *sg, int num_bytes); - struct scatterlist *scatterwalk_ffwd(struct scatterlist dst[2], struct scatterlist *src, unsigned int len); From 85eccddee401ae81067e763516889780b5545160 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:55 +0800 Subject: [PATCH 166/180] crypto: scatterwalk - Add no-copy support to copychunks The function ablkcipher_done_slow is pretty much identical to scatterwalk_copychunks except that it doesn't actually copy as the processing hasn't been completed yet. This patch allows scatterwalk_copychunks to be used in this case by specifying out == 2. Signed-off-by: Herbert Xu --- crypto/scatterwalk.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c index 03ca4aef1b9d4..e124ce26feed4 100644 --- a/crypto/scatterwalk.c +++ b/crypto/scatterwalk.c @@ -87,9 +87,11 @@ void scatterwalk_copychunks(void *buf, struct scatter_walk *walk, if (len_this_page > nbytes) len_this_page = nbytes; - vaddr = scatterwalk_map(walk); - memcpy_dir(buf, vaddr, len_this_page, out); - scatterwalk_unmap(vaddr); + if (out != 2) { + vaddr = scatterwalk_map(walk); + memcpy_dir(buf, vaddr, len_this_page, out); + scatterwalk_unmap(vaddr); + } scatterwalk_advance(walk, len_this_page); @@ -99,7 +101,7 @@ void scatterwalk_copychunks(void *buf, struct scatter_walk *walk, buf += len_this_page; nbytes -= len_this_page; - scatterwalk_pagedone(walk, out, 1); + scatterwalk_pagedone(walk, out & 1, 1); } } EXPORT_SYMBOL_GPL(scatterwalk_copychunks); From 4140139734459db4dffc50dad7b9e21ed79a8dd9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:56 +0800 Subject: [PATCH 167/180] crypto: api - Optimise away crypto_yield when hard preemption is on When hard preemption is enabled there is no need to explicitly call crypto_yield. This patch eliminates it if that is the case. Signed-off-by: Herbert Xu --- include/crypto/algapi.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 0483f652ac279..8637cdfe382a4 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -442,8 +442,10 @@ static inline int crypto_memneq(const void *a, const void *b, size_t size) static inline void crypto_yield(u32 flags) { +#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PREEMPT_VOLUNTARY) if (flags & CRYPTO_TFM_REQ_MAY_SLEEP) cond_resched(); +#endif } #endif /* _CRYPTO_ALGAPI_H */ From 5f070e81bee35f1b7bd1477bb223a873ff657803 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:57 +0800 Subject: [PATCH 168/180] crypto: scatterwalk - Fix test in scatterwalk_done When there is more data to be processed, the current test in scatterwalk_done may prevent us from calling pagedone even when we should. In particular, if we're on an SG entry spanning multiple pages where the last page is not a full page, we will incorrectly skip calling pagedone on the second last page. This patch fixes this by adding a separate test for whether we've reached the end of a page. Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu --- crypto/scatterwalk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c index e124ce26feed4..52aae29747942 100644 --- a/crypto/scatterwalk.c +++ b/crypto/scatterwalk.c @@ -72,7 +72,8 @@ static void scatterwalk_pagedone(struct scatter_walk *walk, int out, void scatterwalk_done(struct scatter_walk *walk, int out, int more) { - if (!(scatterwalk_pagelen(walk) & (PAGE_SIZE - 1)) || !more) + if (!more || walk->offset >= walk->sg->offset + walk->sg->length || + !(walk->offset & (PAGE_SIZE - 1))) scatterwalk_pagedone(walk, out, more); } EXPORT_SYMBOL_GPL(scatterwalk_done); From 28cf86fafdd663cfcad3c5a5fe9869f1fa01b472 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:58 +0800 Subject: [PATCH 169/180] crypto: scatterwalk - Remove unnecessary advance in scatterwalk_pagedone The offset advance in scatterwalk_pagedone not only is unnecessary, but it was also buggy when it was needed by scatterwalk_copychunks. As the latter has long ago been fixed to call scatterwalk_advance directly, we can remove this unnecessary offset adjustment. Signed-off-by: Herbert Xu --- crypto/scatterwalk.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c index 52aae29747942..2ec5368ed6496 100644 --- a/crypto/scatterwalk.c +++ b/crypto/scatterwalk.c @@ -62,12 +62,8 @@ static void scatterwalk_pagedone(struct scatter_walk *walk, int out, flush_dcache_page(page); } - if (more) { - walk->offset += PAGE_SIZE - 1; - walk->offset &= PAGE_MASK; - if (walk->offset >= walk->sg->offset + walk->sg->length) - scatterwalk_start(walk, sg_next(walk->sg)); - } + if (more && walk->offset >= walk->sg->offset + walk->sg->length) + scatterwalk_start(walk, sg_next(walk->sg)); } void scatterwalk_done(struct scatter_walk *walk, int out, int more) From 2ee732d57496b8365819dfb958bc1ff04fcd4cac Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:59 +0800 Subject: [PATCH 170/180] crypto: scatterwalk - Remove unnecessary BUG in scatterwalk_start Nothing bad will happen even if sg->length is zero, so there is no point in keeping this BUG_ON in scatterwalk_start. Signed-off-by: Herbert Xu --- crypto/scatterwalk.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c index 2ec5368ed6496..ddffbb3ee7120 100644 --- a/crypto/scatterwalk.c +++ b/crypto/scatterwalk.c @@ -33,9 +33,6 @@ static inline void memcpy_dir(void *buf, void *sgdata, size_t nbytes, int out) void scatterwalk_start(struct scatter_walk *walk, struct scatterlist *sg) { walk->sg = sg; - - BUG_ON(!sg->length); - walk->offset = sg->offset; } EXPORT_SYMBOL_GPL(scatterwalk_start); From ac02725812cb3a814cfe1fdc2a8a59db073e7e66 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:18:00 +0800 Subject: [PATCH 171/180] crypto: scatterwalk - Inline start/map/done This patch inlines the functions scatterwalk_start, scatterwalk_map and scatterwalk_done as they're all tiny and mostly used by the block cipher walker. Signed-off-by: Herbert Xu --- crypto/scatterwalk.c | 43 --------------------------------- include/crypto/scatterwalk.h | 46 +++++++++++++++++++++++++++++++----- 2 files changed, 40 insertions(+), 49 deletions(-) diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c index ddffbb3ee7120..52ce17a3dd630 100644 --- a/crypto/scatterwalk.c +++ b/crypto/scatterwalk.c @@ -18,8 +18,6 @@ #include #include #include -#include -#include #include static inline void memcpy_dir(void *buf, void *sgdata, size_t nbytes, int out) @@ -30,47 +28,6 @@ static inline void memcpy_dir(void *buf, void *sgdata, size_t nbytes, int out) memcpy(dst, src, nbytes); } -void scatterwalk_start(struct scatter_walk *walk, struct scatterlist *sg) -{ - walk->sg = sg; - walk->offset = sg->offset; -} -EXPORT_SYMBOL_GPL(scatterwalk_start); - -void *scatterwalk_map(struct scatter_walk *walk) -{ - return kmap_atomic(scatterwalk_page(walk)) + - offset_in_page(walk->offset); -} -EXPORT_SYMBOL_GPL(scatterwalk_map); - -static void scatterwalk_pagedone(struct scatter_walk *walk, int out, - unsigned int more) -{ - if (out) { - struct page *page; - - page = sg_page(walk->sg) + ((walk->offset - 1) >> PAGE_SHIFT); - /* Test ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE first as - * PageSlab cannot be optimised away per se due to - * use of volatile pointer. - */ - if (ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE && !PageSlab(page)) - flush_dcache_page(page); - } - - if (more && walk->offset >= walk->sg->offset + walk->sg->length) - scatterwalk_start(walk, sg_next(walk->sg)); -} - -void scatterwalk_done(struct scatter_walk *walk, int out, int more) -{ - if (!more || walk->offset >= walk->sg->offset + walk->sg->length || - !(walk->offset & (PAGE_SIZE - 1))) - scatterwalk_pagedone(walk, out, more); -} -EXPORT_SYMBOL_GPL(scatterwalk_done); - void scatterwalk_copychunks(void *buf, struct scatter_walk *walk, size_t nbytes, int out) { diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h index 7e1a33645eb5f..880e6be9e95e1 100644 --- a/include/crypto/scatterwalk.h +++ b/include/crypto/scatterwalk.h @@ -16,14 +16,10 @@ #ifndef _CRYPTO_SCATTERWALK_H #define _CRYPTO_SCATTERWALK_H -#include #include -#include #include #include -#include #include -#include static inline void scatterwalk_crypto_chain(struct scatterlist *head, struct scatterlist *sg, @@ -83,11 +79,49 @@ static inline void scatterwalk_unmap(void *vaddr) kunmap_atomic(vaddr); } -void scatterwalk_start(struct scatter_walk *walk, struct scatterlist *sg); +static inline void scatterwalk_start(struct scatter_walk *walk, + struct scatterlist *sg) +{ + walk->sg = sg; + walk->offset = sg->offset; +} + +static inline void *scatterwalk_map(struct scatter_walk *walk) +{ + return kmap_atomic(scatterwalk_page(walk)) + + offset_in_page(walk->offset); +} + +static inline void scatterwalk_pagedone(struct scatter_walk *walk, int out, + unsigned int more) +{ + if (out) { + struct page *page; + + page = sg_page(walk->sg) + ((walk->offset - 1) >> PAGE_SHIFT); + /* Test ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE first as + * PageSlab cannot be optimised away per se due to + * use of volatile pointer. + */ + if (ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE && !PageSlab(page)) + flush_dcache_page(page); + } + + if (more && walk->offset >= walk->sg->offset + walk->sg->length) + scatterwalk_start(walk, sg_next(walk->sg)); +} + +static inline void scatterwalk_done(struct scatter_walk *walk, int out, + int more) +{ + if (!more || walk->offset >= walk->sg->offset + walk->sg->length || + !(walk->offset & (PAGE_SIZE - 1))) + scatterwalk_pagedone(walk, out, more); +} + void scatterwalk_copychunks(void *buf, struct scatter_walk *walk, size_t nbytes, int out); void *scatterwalk_map(struct scatter_walk *walk); -void scatterwalk_done(struct scatter_walk *walk, int out, int more); void scatterwalk_map_and_copy(void *buf, struct scatterlist *sg, unsigned int start, unsigned int nbytes, int out); From a6d7bfd0ff21f258913dd5e626d2bd70ab3942df Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Thu, 14 Jul 2016 20:39:18 -0700 Subject: [PATCH 172/180] crypto: rsa-pkcs1pad - fix rsa-pkcs1pad request struct To allow for child request context the struct akcipher_request child_req needs to be at the end of the structure. Signed-off-by: Tadeusz Struk Signed-off-by: Herbert Xu --- crypto/rsa-pkcs1pad.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 880d3db57a252..877019a6d3ea8 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -101,10 +101,9 @@ struct pkcs1pad_inst_ctx { }; struct pkcs1pad_request { - struct akcipher_request child_req; - struct scatterlist in_sg[2], out_sg[1]; uint8_t *in_buf, *out_buf; + struct akcipher_request child_req; }; static int pkcs1pad_set_pub_key(struct crypto_akcipher *tfm, const void *key, From e514cc0a492a3f39ef71b31590a7ef67537ee04b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 15 Jul 2016 14:09:13 +0300 Subject: [PATCH 173/180] crypto: nx - off by one bug in nx_of_update_msc() The props->ap[] array is defined like this: struct alg_props ap[NX_MAX_FC][NX_MAX_MODE][3]; So we can see that if msc->fc and msc->mode are == to NX_MAX_FC or NX_MAX_MODE then we're off by one. Fixes: ae0222b7289d ('powerpc/crypto: nx driver code supporting nx encryption') Signed-off-by: Dan Carpenter Signed-off-by: Herbert Xu --- drivers/crypto/nx/nx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c index 0794f1cc00182..42f0f229f7f77 100644 --- a/drivers/crypto/nx/nx.c +++ b/drivers/crypto/nx/nx.c @@ -392,7 +392,7 @@ static void nx_of_update_msc(struct device *dev, ((bytes_so_far + sizeof(struct msc_triplet)) <= lenp) && i < msc->triplets; i++) { - if (msc->fc > NX_MAX_FC || msc->mode > NX_MAX_MODE) { + if (msc->fc >= NX_MAX_FC || msc->mode >= NX_MAX_MODE) { dev_err(dev, "unknown function code/mode " "combo: %d/%d (ignored)\n", msc->fc, msc->mode); From aa6416ee4676e5c235d4c7315340c24457e39430 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Mon, 18 Jul 2016 11:32:24 +0200 Subject: [PATCH 174/180] crypto: marvell - Fix wrong flag used for GFP in mv_cesa_dma_add_iv_op Use the parameter 'gfp_flags' instead of 'flag' as second argument of dma_pool_alloc(). The parameter 'flag' is for the TDMA descriptor, its content has no sense for the allocator. Fixes: bac8e805a30d ("crypto: marvell - Copy IV vectors by DMA...") Signed-off-by: Romain Perier Acked-by: Boris Brezillon Signed-off-by: Herbert Xu --- drivers/crypto/marvell/tdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c index b0f9f5050dc8f..86a065bcc1872 100644 --- a/drivers/crypto/marvell/tdma.c +++ b/drivers/crypto/marvell/tdma.c @@ -221,7 +221,7 @@ int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src, if (IS_ERR(tdma)) return PTR_ERR(tdma); - iv = dma_pool_alloc(cesa_dev->dma->iv_pool, flags, &dma_handle); + iv = dma_pool_alloc(cesa_dev->dma->iv_pool, gfp_flags, &dma_handle); if (!iv) return -ENOMEM; From 15226e4804e244c2dd51db2a3a2c2b5c9dd65874 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 18 Jul 2016 18:20:10 +0800 Subject: [PATCH 175/180] crypto: testmgr - Print akcipher algorithm name When an akcipher test fails, we don't know which algorithm failed because the name is not printed. This patch fixes this. Signed-off-by: Herbert Xu --- crypto/testmgr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 769cc2a88040c..5c9d5a5e7b651 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -2034,6 +2034,8 @@ static int test_akcipher_one(struct crypto_akcipher *tfm, static int test_akcipher(struct crypto_akcipher *tfm, const char *alg, struct akcipher_testvec *vecs, unsigned int tcount) { + const char *algo = + crypto_tfm_alg_driver_name(crypto_akcipher_tfm(tfm)); int ret, i; for (i = 0; i < tcount; i++) { @@ -2041,8 +2043,8 @@ static int test_akcipher(struct crypto_akcipher *tfm, const char *alg, if (!ret) continue; - pr_err("alg: akcipher: test failed on vector %d, err=%d\n", - i + 1, ret); + pr_err("alg: akcipher: test %d failed for %s, err=%d\n", + i + 1, algo, ret); return ret; } return 0; From 5c562338dea29dc09415f39676e7107fa48ce763 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 19 Jul 2016 00:59:30 +0800 Subject: [PATCH 176/180] crypto: skcipher - Add comment for skcipher_alg->base This patch adds a missing comment for the base parameter in struct skcipher_alg. Reported-by: kbuild test robot Signed-off-by: Herbert Xu --- include/crypto/skcipher.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h index 59c8f6c593e6b..cc4d98a7892e2 100644 --- a/include/crypto/skcipher.h +++ b/include/crypto/skcipher.h @@ -115,6 +115,7 @@ struct crypto_skcipher { * IV of exactly that size to perform the encrypt or decrypt operation. * @chunksize: Equal to the block size except for stream ciphers such as * CTR where it is set to the underlying block size. + * @base: Definition of a generic crypto algorithm. * * All fields except @ivsize are mandatory and must be filled. */ From 11c6e16ee13ab68b8ff04c17ab41611a4fcc5c81 Mon Sep 17 00:00:00 2001 From: Paulo Flabiano Smorigo Date: Mon, 18 Jul 2016 12:26:25 -0300 Subject: [PATCH 177/180] crypto: vmx - Adding asm subroutines for XTS This patch add XTS subroutines using VMX-crypto driver. It gives a boost of 20 times using XTS. These code has been adopted from OpenSSL project in collaboration with the original author (Andy Polyakov ). Signed-off-by: Leonidas S. Barbosa Signed-off-by: Paulo Flabiano Smorigo Signed-off-by: Herbert Xu --- drivers/crypto/vmx/aesp8-ppc.h | 4 + drivers/crypto/vmx/aesp8-ppc.pl | 1865 ++++++++++++++++++++++++++++++- 2 files changed, 1867 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/vmx/aesp8-ppc.h b/drivers/crypto/vmx/aesp8-ppc.h index 4cd34ee54a94d..01972e16a6c0b 100644 --- a/drivers/crypto/vmx/aesp8-ppc.h +++ b/drivers/crypto/vmx/aesp8-ppc.h @@ -19,3 +19,7 @@ void aes_p8_cbc_encrypt(const u8 *in, u8 *out, size_t len, void aes_p8_ctr32_encrypt_blocks(const u8 *in, u8 *out, size_t len, const struct aes_key *key, const u8 *iv); +void aes_p8_xts_encrypt(const u8 *in, u8 *out, size_t len, + const struct aes_key *key1, const struct aes_key *key2, u8 *iv); +void aes_p8_xts_decrypt(const u8 *in, u8 *out, size_t len, + const struct aes_key *key1, const struct aes_key *key2, u8 *iv); diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl index 228053921b3f0..813ffcc4d17cc 100644 --- a/drivers/crypto/vmx/aesp8-ppc.pl +++ b/drivers/crypto/vmx/aesp8-ppc.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov for the OpenSSL @@ -20,6 +27,19 @@ # instructions are interleaved. It's reckoned that eventual # misalignment penalties at page boundaries are in average lower # than additional overhead in pure AltiVec approach. +# +# May 2016 +# +# Add XTS subroutine, 9x on little- and 12x improvement on big-endian +# systems were measured. +# +###################################################################### +# Current large-block performance in cycles per byte processed with +# 128-bit key (less is better). +# +# CBC en-/decrypt CTR XTS +# POWER8[le] 3.96/0.72 0.74 1.1 +# POWER8[be] 3.75/0.65 0.66 1.0 $flavour = shift; @@ -1875,6 +1895,1847 @@ () ___ }} }}} +######################################################################### +{{{ # XTS procedures # +# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, # +# const AES_KEY *key1, const AES_KEY *key2, # +# [const] unsigned char iv[16]); # +# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which # +# input tweak value is assumed to be encrypted already, and last tweak # +# value, one suitable for consecutive call on same chunk of data, is # +# written back to original buffer. In addition, in "tweak chaining" # +# mode only complete input blocks are processed. # + +my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10)); +my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2)); +my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7)); +my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12)); +my $taillen = $key2; + + ($inp,$idx) = ($idx,$inp); # reassign + +$code.=<<___; +.globl .${prefix}_xts_encrypt + mr $inp,r3 # reassign + li r3,-1 + ${UCMP}i $len,16 + bltlr- + + lis r0,0xfff0 + mfspr r12,256 # save vrsave + li r11,0 + mtspr 256,r0 + + vspltisb $seven,0x07 # 0x070707..07 + le?lvsl $leperm,r11,r11 + le?vspltisb $tmp,0x0f + le?vxor $leperm,$leperm,$seven + + li $idx,15 + lvx $tweak,0,$ivp # load [unaligned] iv + lvsl $inpperm,0,$ivp + lvx $inptail,$idx,$ivp + le?vxor $inpperm,$inpperm,$tmp + vperm $tweak,$tweak,$inptail,$inpperm + + neg r11,$inp + lvsr $inpperm,0,r11 # prepare for unaligned load + lvx $inout,0,$inp + addi $inp,$inp,15 # 15 is not typo + le?vxor $inpperm,$inpperm,$tmp + + ${UCMP}i $key2,0 # key2==NULL? + beq Lxts_enc_no_key2 + + ?lvsl $keyperm,0,$key2 # prepare for unaligned key + lwz $rounds,240($key2) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + + lvx $rndkey0,0,$key2 + lvx $rndkey1,$idx,$key2 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $tweak,$tweak,$rndkey0 + lvx $rndkey0,$idx,$key2 + addi $idx,$idx,16 + mtctr $rounds + +Ltweak_xts_enc: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $tweak,$tweak,$rndkey1 + lvx $rndkey1,$idx,$key2 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $tweak,$tweak,$rndkey0 + lvx $rndkey0,$idx,$key2 + addi $idx,$idx,16 + bdnz Ltweak_xts_enc + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $tweak,$tweak,$rndkey1 + lvx $rndkey1,$idx,$key2 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipherlast $tweak,$tweak,$rndkey0 + + li $ivp,0 # don't chain the tweak + b Lxts_enc + +Lxts_enc_no_key2: + li $idx,-16 + and $len,$len,$idx # in "tweak chaining" + # mode only complete + # blocks are processed +Lxts_enc: + lvx $inptail,0,$inp + addi $inp,$inp,16 + + ?lvsl $keyperm,0,$key1 # prepare for unaligned key + lwz $rounds,240($key1) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + + vslb $eighty7,$seven,$seven # 0x808080..80 + vor $eighty7,$eighty7,$seven # 0x878787..87 + vspltisb $tmp,1 # 0x010101..01 + vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 + + ${UCMP}i $len,96 + bge _aesp8_xts_encrypt6x + + andi. $taillen,$len,15 + subic r0,$len,32 + subi $taillen,$taillen,16 + subfe r0,r0,r0 + and r0,r0,$taillen + add $inp,$inp,r0 + + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$tweak + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + mtctr $rounds + b Loop_xts_enc + +.align 5 +Loop_xts_enc: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + bdnz Loop_xts_enc + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $rndkey0,$rndkey0,$tweak + vcipherlast $output,$inout,$rndkey0 + + le?vperm $tmp,$output,$output,$leperm + be?nop + le?stvx_u $tmp,0,$out + be?stvx_u $output,0,$out + addi $out,$out,16 + + subic. $len,$len,16 + beq Lxts_enc_done + + vmr $inout,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + + subic r0,$len,32 + subfe r0,r0,r0 + and r0,r0,$taillen + add $inp,$inp,r0 + + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak,$tweak,$tmp + + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$tweak + vxor $output,$output,$rndkey0 # just in case $len<16 + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + + mtctr $rounds + ${UCMP}i $len,16 + bge Loop_xts_enc + + vxor $output,$output,$tweak + lvsr $inpperm,0,$len # $inpperm is no longer needed + vxor $inptail,$inptail,$inptail # $inptail is no longer needed + vspltisb $tmp,-1 + vperm $inptail,$inptail,$tmp,$inpperm + vsel $inout,$inout,$output,$inptail + + subi r11,$out,17 + subi $out,$out,16 + mtctr $len + li $len,16 +Loop_xts_enc_steal: + lbzu r0,1(r11) + stb r0,16(r11) + bdnz Loop_xts_enc_steal + + mtctr $rounds + b Loop_xts_enc # one more time... + +Lxts_enc_done: + ${UCMP}i $ivp,0 + beq Lxts_enc_ret + + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak,$tweak,$tmp + + le?vperm $tweak,$tweak,$tweak,$leperm + stvx_u $tweak,0,$ivp + +Lxts_enc_ret: + mtspr 256,r12 # restore vrsave + li r3,0 + blr + .long 0 + .byte 0,12,0x04,0,0x80,6,6,0 + .long 0 +.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt + +.globl .${prefix}_xts_decrypt +.align 5 +.${prefix}_xts_decrypt: + mr $inp,r3 # reassign + li r3,-1 + ${UCMP}i $len,16 + bltlr- + + lis r0,0xfff8 + mfspr r12,256 # save vrsave + li r11,0 + mtspr 256,r0 + + andi. r0,$len,15 + neg r0,r0 + andi. r0,r0,16 + sub $len,$len,r0 + + vspltisb $seven,0x07 # 0x070707..07 + le?lvsl $leperm,r11,r11 + le?vspltisb $tmp,0x0f + le?vxor $leperm,$leperm,$seven + + li $idx,15 + lvx $tweak,0,$ivp # load [unaligned] iv + lvsl $inpperm,0,$ivp + lvx $inptail,$idx,$ivp + le?vxor $inpperm,$inpperm,$tmp + vperm $tweak,$tweak,$inptail,$inpperm + + neg r11,$inp + lvsr $inpperm,0,r11 # prepare for unaligned load + lvx $inout,0,$inp + addi $inp,$inp,15 # 15 is not typo + le?vxor $inpperm,$inpperm,$tmp + + ${UCMP}i $key2,0 # key2==NULL? + beq Lxts_dec_no_key2 + + ?lvsl $keyperm,0,$key2 # prepare for unaligned key + lwz $rounds,240($key2) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + + lvx $rndkey0,0,$key2 + lvx $rndkey1,$idx,$key2 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $tweak,$tweak,$rndkey0 + lvx $rndkey0,$idx,$key2 + addi $idx,$idx,16 + mtctr $rounds + +Ltweak_xts_dec: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $tweak,$tweak,$rndkey1 + lvx $rndkey1,$idx,$key2 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $tweak,$tweak,$rndkey0 + lvx $rndkey0,$idx,$key2 + addi $idx,$idx,16 + bdnz Ltweak_xts_dec + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $tweak,$tweak,$rndkey1 + lvx $rndkey1,$idx,$key2 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipherlast $tweak,$tweak,$rndkey0 + + li $ivp,0 # don't chain the tweak + b Lxts_dec + +Lxts_dec_no_key2: + neg $idx,$len + andi. $idx,$idx,15 + add $len,$len,$idx # in "tweak chaining" + # mode only complete + # blocks are processed +Lxts_dec: + lvx $inptail,0,$inp + addi $inp,$inp,16 + + ?lvsl $keyperm,0,$key1 # prepare for unaligned key + lwz $rounds,240($key1) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + + vslb $eighty7,$seven,$seven # 0x808080..80 + vor $eighty7,$eighty7,$seven # 0x878787..87 + vspltisb $tmp,1 # 0x010101..01 + vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 + + ${UCMP}i $len,96 + bge _aesp8_xts_decrypt6x + + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$tweak + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + mtctr $rounds + + ${UCMP}i $len,16 + blt Ltail_xts_dec + be?b Loop_xts_dec + +.align 5 +Loop_xts_dec: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vncipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + bdnz Loop_xts_dec + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $rndkey0,$rndkey0,$tweak + vncipherlast $output,$inout,$rndkey0 + + le?vperm $tmp,$output,$output,$leperm + be?nop + le?stvx_u $tmp,0,$out + be?stvx_u $output,0,$out + addi $out,$out,16 + + subic. $len,$len,16 + beq Lxts_dec_done + + vmr $inout,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak,$tweak,$tmp + + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$tweak + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + + mtctr $rounds + ${UCMP}i $len,16 + bge Loop_xts_dec + +Ltail_xts_dec: + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak1,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak1,$tweak1,$tmp + + subi $inp,$inp,16 + add $inp,$inp,$len + + vxor $inout,$inout,$tweak # :-( + vxor $inout,$inout,$tweak1 # :-) + +Loop_xts_dec_short: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vncipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + bdnz Loop_xts_dec_short + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $rndkey0,$rndkey0,$tweak1 + vncipherlast $output,$inout,$rndkey0 + + le?vperm $tmp,$output,$output,$leperm + be?nop + le?stvx_u $tmp,0,$out + be?stvx_u $output,0,$out + + vmr $inout,$inptail + lvx $inptail,0,$inp + #addi $inp,$inp,16 + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + + lvsr $inpperm,0,$len # $inpperm is no longer needed + vxor $inptail,$inptail,$inptail # $inptail is no longer needed + vspltisb $tmp,-1 + vperm $inptail,$inptail,$tmp,$inpperm + vsel $inout,$inout,$output,$inptail + + vxor $rndkey0,$rndkey0,$tweak + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + + subi r11,$out,1 + mtctr $len + li $len,16 +Loop_xts_dec_steal: + lbzu r0,1(r11) + stb r0,16(r11) + bdnz Loop_xts_dec_steal + + mtctr $rounds + b Loop_xts_dec # one more time... + +Lxts_dec_done: + ${UCMP}i $ivp,0 + beq Lxts_dec_ret + + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak,$tweak,$tmp + + le?vperm $tweak,$tweak,$tweak,$leperm + stvx_u $tweak,0,$ivp + +Lxts_dec_ret: + mtspr 256,r12 # restore vrsave + li r3,0 + blr + .long 0 + .byte 0,12,0x04,0,0x80,6,6,0 + .long 0 +.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt +___ +######################################################################### +{{ # Optimized XTS procedures # +my $key_=$key2; +my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31)); + $x00=0 if ($flavour =~ /osx/); +my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5)); +my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); +my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22)); +my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys + # v26-v31 last 6 round keys +my ($keyperm)=($out0); # aliases with "caller", redundant assignment +my $taillen=$x70; + +$code.=<<___; +.align 5 +_aesp8_xts_encrypt6x: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + mflr r11 + li r7,`$FRAME+8*16+15` + li r3,`$FRAME+8*16+31` + $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) + stvx v20,r7,$sp # ABI says so + addi r7,r7,32 + stvx v21,r3,$sp + addi r3,r3,32 + stvx v22,r7,$sp + addi r7,r7,32 + stvx v23,r3,$sp + addi r3,r3,32 + stvx v24,r7,$sp + addi r7,r7,32 + stvx v25,r3,$sp + addi r3,r3,32 + stvx v26,r7,$sp + addi r7,r7,32 + stvx v27,r3,$sp + addi r3,r3,32 + stvx v28,r7,$sp + addi r7,r7,32 + stvx v29,r3,$sp + addi r3,r3,32 + stvx v30,r7,$sp + stvx v31,r3,$sp + li r0,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + mtspr 256,r0 + + subi $rounds,$rounds,3 # -4 in total + + lvx $rndkey0,$x00,$key1 # load key schedule + lvx v30,$x10,$key1 + addi $key1,$key1,0x20 + lvx v31,$x00,$key1 + ?vperm $rndkey0,$rndkey0,v30,$keyperm + addi $key_,$sp,$FRAME+15 + mtctr $rounds + +Load_xts_enc_key: + ?vperm v24,v30,v31,$keyperm + lvx v30,$x10,$key1 + addi $key1,$key1,0x20 + stvx v24,$x00,$key_ # off-load round[1] + ?vperm v25,v31,v30,$keyperm + lvx v31,$x00,$key1 + stvx v25,$x10,$key_ # off-load round[2] + addi $key_,$key_,0x20 + bdnz Load_xts_enc_key + + lvx v26,$x10,$key1 + ?vperm v24,v30,v31,$keyperm + lvx v27,$x20,$key1 + stvx v24,$x00,$key_ # off-load round[3] + ?vperm v25,v31,v26,$keyperm + lvx v28,$x30,$key1 + stvx v25,$x10,$key_ # off-load round[4] + addi $key_,$sp,$FRAME+15 # rewind $key_ + ?vperm v26,v26,v27,$keyperm + lvx v29,$x40,$key1 + ?vperm v27,v27,v28,$keyperm + lvx v30,$x50,$key1 + ?vperm v28,v28,v29,$keyperm + lvx v31,$x60,$key1 + ?vperm v29,v29,v30,$keyperm + lvx $twk5,$x70,$key1 # borrow $twk5 + ?vperm v30,v30,v31,$keyperm + lvx v24,$x00,$key_ # pre-load round[1] + ?vperm v31,v31,$twk5,$keyperm + lvx v25,$x10,$key_ # pre-load round[2] + + vperm $in0,$inout,$inptail,$inpperm + subi $inp,$inp,31 # undo "caller" + vxor $twk0,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $out0,$in0,$twk0 + vxor $tweak,$tweak,$tmp + + lvx_u $in1,$x10,$inp + vxor $twk1,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in1,$in1,$in1,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out1,$in1,$twk1 + vxor $tweak,$tweak,$tmp + + lvx_u $in2,$x20,$inp + andi. $taillen,$len,15 + vxor $twk2,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in2,$in2,$in2,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out2,$in2,$twk2 + vxor $tweak,$tweak,$tmp + + lvx_u $in3,$x30,$inp + sub $len,$len,$taillen + vxor $twk3,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in3,$in3,$in3,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out3,$in3,$twk3 + vxor $tweak,$tweak,$tmp + + lvx_u $in4,$x40,$inp + subi $len,$len,0x60 + vxor $twk4,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in4,$in4,$in4,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out4,$in4,$twk4 + vxor $tweak,$tweak,$tmp + + lvx_u $in5,$x50,$inp + addi $inp,$inp,0x60 + vxor $twk5,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in5,$in5,$in5,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out5,$in5,$twk5 + vxor $tweak,$tweak,$tmp + + vxor v31,v31,$rndkey0 + mtctr $rounds + b Loop_xts_enc6x + +.align 5 +Loop_xts_enc6x: + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + vcipher $out5,$out5,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + vcipher $out5,$out5,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_xts_enc6x + + subic $len,$len,96 # $len-=96 + vxor $in0,$twk0,v31 # xor with last round key + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk0,$tweak,$rndkey0 + vaddubm $tweak,$tweak,$tweak + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vsldoi $tmp,$tmp,$tmp,15 + vcipher $out4,$out4,v24 + vcipher $out5,$out5,v24 + + subfe. r0,r0,r0 # borrow?-1:0 + vand $tmp,$tmp,$eighty7 + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vxor $tweak,$tweak,$tmp + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vxor $in1,$twk1,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk1,$tweak,$rndkey0 + vcipher $out4,$out4,v25 + vcipher $out5,$out5,v25 + + and r0,r0,$len + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vcipher $out0,$out0,v26 + vcipher $out1,$out1,v26 + vand $tmp,$tmp,$eighty7 + vcipher $out2,$out2,v26 + vcipher $out3,$out3,v26 + vxor $tweak,$tweak,$tmp + vcipher $out4,$out4,v26 + vcipher $out5,$out5,v26 + + add $inp,$inp,r0 # $inp is adjusted in such + # way that at exit from the + # loop inX-in5 are loaded + # with last "words" + vxor $in2,$twk2,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk2,$tweak,$rndkey0 + vaddubm $tweak,$tweak,$tweak + vcipher $out0,$out0,v27 + vcipher $out1,$out1,v27 + vsldoi $tmp,$tmp,$tmp,15 + vcipher $out2,$out2,v27 + vcipher $out3,$out3,v27 + vand $tmp,$tmp,$eighty7 + vcipher $out4,$out4,v27 + vcipher $out5,$out5,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vxor $tweak,$tweak,$tmp + vcipher $out0,$out0,v28 + vcipher $out1,$out1,v28 + vxor $in3,$twk3,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk3,$tweak,$rndkey0 + vcipher $out2,$out2,v28 + vcipher $out3,$out3,v28 + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vcipher $out4,$out4,v28 + vcipher $out5,$out5,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + vand $tmp,$tmp,$eighty7 + + vcipher $out0,$out0,v29 + vcipher $out1,$out1,v29 + vxor $tweak,$tweak,$tmp + vcipher $out2,$out2,v29 + vcipher $out3,$out3,v29 + vxor $in4,$twk4,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk4,$tweak,$rndkey0 + vcipher $out4,$out4,v29 + vcipher $out5,$out5,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + + vcipher $out0,$out0,v30 + vcipher $out1,$out1,v30 + vand $tmp,$tmp,$eighty7 + vcipher $out2,$out2,v30 + vcipher $out3,$out3,v30 + vxor $tweak,$tweak,$tmp + vcipher $out4,$out4,v30 + vcipher $out5,$out5,v30 + vxor $in5,$twk5,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk5,$tweak,$rndkey0 + + vcipherlast $out0,$out0,$in0 + lvx_u $in0,$x00,$inp # load next input block + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vcipherlast $out1,$out1,$in1 + lvx_u $in1,$x10,$inp + vcipherlast $out2,$out2,$in2 + le?vperm $in0,$in0,$in0,$leperm + lvx_u $in2,$x20,$inp + vand $tmp,$tmp,$eighty7 + vcipherlast $out3,$out3,$in3 + le?vperm $in1,$in1,$in1,$leperm + lvx_u $in3,$x30,$inp + vcipherlast $out4,$out4,$in4 + le?vperm $in2,$in2,$in2,$leperm + lvx_u $in4,$x40,$inp + vxor $tweak,$tweak,$tmp + vcipherlast $tmp,$out5,$in5 # last block might be needed + # in stealing mode + le?vperm $in3,$in3,$in3,$leperm + lvx_u $in5,$x50,$inp + addi $inp,$inp,0x60 + le?vperm $in4,$in4,$in4,$leperm + le?vperm $in5,$in5,$in5,$leperm + + le?vperm $out0,$out0,$out0,$leperm + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk0 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + vxor $out1,$in1,$twk1 + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + vxor $out2,$in2,$twk2 + le?vperm $out4,$out4,$out4,$leperm + stvx_u $out3,$x30,$out + vxor $out3,$in3,$twk3 + le?vperm $out5,$tmp,$tmp,$leperm + stvx_u $out4,$x40,$out + vxor $out4,$in4,$twk4 + le?stvx_u $out5,$x50,$out + be?stvx_u $tmp, $x50,$out + vxor $out5,$in5,$twk5 + addi $out,$out,0x60 + + mtctr $rounds + beq Loop_xts_enc6x # did $len-=96 borrow? + + addic. $len,$len,0x60 + beq Lxts_enc6x_zero + cmpwi $len,0x20 + blt Lxts_enc6x_one + nop + beq Lxts_enc6x_two + cmpwi $len,0x40 + blt Lxts_enc6x_three + nop + beq Lxts_enc6x_four + +Lxts_enc6x_five: + vxor $out0,$in1,$twk0 + vxor $out1,$in2,$twk1 + vxor $out2,$in3,$twk2 + vxor $out3,$in4,$twk3 + vxor $out4,$in5,$twk4 + + bl _aesp8_xts_enc5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk5 # unused tweak + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + vxor $tmp,$out4,$twk5 # last block prep for stealing + le?vperm $out4,$out4,$out4,$leperm + stvx_u $out3,$x30,$out + stvx_u $out4,$x40,$out + addi $out,$out,0x50 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_four: + vxor $out0,$in2,$twk0 + vxor $out1,$in3,$twk1 + vxor $out2,$in4,$twk2 + vxor $out3,$in5,$twk3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_enc5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk4 # unused tweak + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + vxor $tmp,$out3,$twk4 # last block prep for stealing + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + stvx_u $out3,$x30,$out + addi $out,$out,0x40 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_three: + vxor $out0,$in3,$twk0 + vxor $out1,$in4,$twk1 + vxor $out2,$in5,$twk2 + vxor $out3,$out3,$out3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_enc5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk3 # unused tweak + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $tmp,$out2,$twk3 # last block prep for stealing + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + stvx_u $out2,$x20,$out + addi $out,$out,0x30 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_two: + vxor $out0,$in4,$twk0 + vxor $out1,$in5,$twk1 + vxor $out2,$out2,$out2 + vxor $out3,$out3,$out3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_enc5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk2 # unused tweak + vxor $tmp,$out1,$twk2 # last block prep for stealing + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + stvx_u $out1,$x10,$out + addi $out,$out,0x20 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_one: + vxor $out0,$in5,$twk0 + nop +Loop_xts_enc1x: + vcipher $out0,$out0,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vcipher $out0,$out0,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_xts_enc1x + + add $inp,$inp,$taillen + cmpwi $taillen,0 + vcipher $out0,$out0,v24 + + subi $inp,$inp,16 + vcipher $out0,$out0,v25 + + lvsr $inpperm,0,$taillen + vcipher $out0,$out0,v26 + + lvx_u $in0,0,$inp + vcipher $out0,$out0,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vcipher $out0,$out0,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + + vcipher $out0,$out0,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $twk0,$twk0,v31 + + le?vperm $in0,$in0,$in0,$leperm + vcipher $out0,$out0,v30 + + vperm $in0,$in0,$in0,$inpperm + vcipherlast $out0,$out0,$twk0 + + vmr $twk0,$twk1 # unused tweak + vxor $tmp,$out0,$twk1 # last block prep for stealing + le?vperm $out0,$out0,$out0,$leperm + stvx_u $out0,$x00,$out # store output + addi $out,$out,0x10 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_zero: + cmpwi $taillen,0 + beq Lxts_enc6x_done + + add $inp,$inp,$taillen + subi $inp,$inp,16 + lvx_u $in0,0,$inp + lvsr $inpperm,0,$taillen # $in5 is no more + le?vperm $in0,$in0,$in0,$leperm + vperm $in0,$in0,$in0,$inpperm + vxor $tmp,$tmp,$twk0 +Lxts_enc6x_steal: + vxor $in0,$in0,$twk0 + vxor $out0,$out0,$out0 + vspltisb $out1,-1 + vperm $out0,$out0,$out1,$inpperm + vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember? + + subi r30,$out,17 + subi $out,$out,16 + mtctr $taillen +Loop_xts_enc6x_steal: + lbzu r0,1(r30) + stb r0,16(r30) + bdnz Loop_xts_enc6x_steal + + li $taillen,0 + mtctr $rounds + b Loop_xts_enc1x # one more time... + +.align 4 +Lxts_enc6x_done: + ${UCMP}i $ivp,0 + beq Lxts_enc6x_ret + + vxor $tweak,$twk0,$rndkey0 + le?vperm $tweak,$tweak,$tweak,$leperm + stvx_u $tweak,0,$ivp + +Lxts_enc6x_ret: + mtlr r11 + li r10,`$FRAME+15` + li r11,`$FRAME+31` + stvx $seven,r10,$sp # wipe copies of round keys + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,0x04,1,0x80,6,6,0 + .long 0 + +.align 5 +_aesp8_xts_enc5x: + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + lvx v25,$x10,$key_ # round[4] + bdnz _aesp8_xts_enc5x + + add $inp,$inp,$taillen + cmpwi $taillen,0 + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + + subi $inp,$inp,16 + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + vxor $twk0,$twk0,v31 + + vcipher $out0,$out0,v26 + lvsr $inpperm,r0,$taillen # $in5 is no more + vcipher $out1,$out1,v26 + vcipher $out2,$out2,v26 + vcipher $out3,$out3,v26 + vcipher $out4,$out4,v26 + vxor $in1,$twk1,v31 + + vcipher $out0,$out0,v27 + lvx_u $in0,0,$inp + vcipher $out1,$out1,v27 + vcipher $out2,$out2,v27 + vcipher $out3,$out3,v27 + vcipher $out4,$out4,v27 + vxor $in2,$twk2,v31 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vcipher $out0,$out0,v28 + vcipher $out1,$out1,v28 + vcipher $out2,$out2,v28 + vcipher $out3,$out3,v28 + vcipher $out4,$out4,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + vxor $in3,$twk3,v31 + + vcipher $out0,$out0,v29 + le?vperm $in0,$in0,$in0,$leperm + vcipher $out1,$out1,v29 + vcipher $out2,$out2,v29 + vcipher $out3,$out3,v29 + vcipher $out4,$out4,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $in4,$twk4,v31 + + vcipher $out0,$out0,v30 + vperm $in0,$in0,$in0,$inpperm + vcipher $out1,$out1,v30 + vcipher $out2,$out2,v30 + vcipher $out3,$out3,v30 + vcipher $out4,$out4,v30 + + vcipherlast $out0,$out0,$twk0 + vcipherlast $out1,$out1,$in1 + vcipherlast $out2,$out2,$in2 + vcipherlast $out3,$out3,$in3 + vcipherlast $out4,$out4,$in4 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + +.align 5 +_aesp8_xts_decrypt6x: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + mflr r11 + li r7,`$FRAME+8*16+15` + li r3,`$FRAME+8*16+31` + $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) + stvx v20,r7,$sp # ABI says so + addi r7,r7,32 + stvx v21,r3,$sp + addi r3,r3,32 + stvx v22,r7,$sp + addi r7,r7,32 + stvx v23,r3,$sp + addi r3,r3,32 + stvx v24,r7,$sp + addi r7,r7,32 + stvx v25,r3,$sp + addi r3,r3,32 + stvx v26,r7,$sp + addi r7,r7,32 + stvx v27,r3,$sp + addi r3,r3,32 + stvx v28,r7,$sp + addi r7,r7,32 + stvx v29,r3,$sp + addi r3,r3,32 + stvx v30,r7,$sp + stvx v31,r3,$sp + li r0,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + mtspr 256,r0 + + subi $rounds,$rounds,3 # -4 in total + + lvx $rndkey0,$x00,$key1 # load key schedule + lvx v30,$x10,$key1 + addi $key1,$key1,0x20 + lvx v31,$x00,$key1 + ?vperm $rndkey0,$rndkey0,v30,$keyperm + addi $key_,$sp,$FRAME+15 + mtctr $rounds + +Load_xts_dec_key: + ?vperm v24,v30,v31,$keyperm + lvx v30,$x10,$key1 + addi $key1,$key1,0x20 + stvx v24,$x00,$key_ # off-load round[1] + ?vperm v25,v31,v30,$keyperm + lvx v31,$x00,$key1 + stvx v25,$x10,$key_ # off-load round[2] + addi $key_,$key_,0x20 + bdnz Load_xts_dec_key + + lvx v26,$x10,$key1 + ?vperm v24,v30,v31,$keyperm + lvx v27,$x20,$key1 + stvx v24,$x00,$key_ # off-load round[3] + ?vperm v25,v31,v26,$keyperm + lvx v28,$x30,$key1 + stvx v25,$x10,$key_ # off-load round[4] + addi $key_,$sp,$FRAME+15 # rewind $key_ + ?vperm v26,v26,v27,$keyperm + lvx v29,$x40,$key1 + ?vperm v27,v27,v28,$keyperm + lvx v30,$x50,$key1 + ?vperm v28,v28,v29,$keyperm + lvx v31,$x60,$key1 + ?vperm v29,v29,v30,$keyperm + lvx $twk5,$x70,$key1 # borrow $twk5 + ?vperm v30,v30,v31,$keyperm + lvx v24,$x00,$key_ # pre-load round[1] + ?vperm v31,v31,$twk5,$keyperm + lvx v25,$x10,$key_ # pre-load round[2] + + vperm $in0,$inout,$inptail,$inpperm + subi $inp,$inp,31 # undo "caller" + vxor $twk0,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $out0,$in0,$twk0 + vxor $tweak,$tweak,$tmp + + lvx_u $in1,$x10,$inp + vxor $twk1,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in1,$in1,$in1,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out1,$in1,$twk1 + vxor $tweak,$tweak,$tmp + + lvx_u $in2,$x20,$inp + andi. $taillen,$len,15 + vxor $twk2,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in2,$in2,$in2,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out2,$in2,$twk2 + vxor $tweak,$tweak,$tmp + + lvx_u $in3,$x30,$inp + sub $len,$len,$taillen + vxor $twk3,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in3,$in3,$in3,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out3,$in3,$twk3 + vxor $tweak,$tweak,$tmp + + lvx_u $in4,$x40,$inp + subi $len,$len,0x60 + vxor $twk4,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in4,$in4,$in4,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out4,$in4,$twk4 + vxor $tweak,$tweak,$tmp + + lvx_u $in5,$x50,$inp + addi $inp,$inp,0x60 + vxor $twk5,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in5,$in5,$in5,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out5,$in5,$twk5 + vxor $tweak,$tweak,$tmp + + vxor v31,v31,$rndkey0 + mtctr $rounds + b Loop_xts_dec6x + +.align 5 +Loop_xts_dec6x: + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_xts_dec6x + + subic $len,$len,96 # $len-=96 + vxor $in0,$twk0,v31 # xor with last round key + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk0,$tweak,$rndkey0 + vaddubm $tweak,$tweak,$tweak + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vsldoi $tmp,$tmp,$tmp,15 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + + subfe. r0,r0,r0 # borrow?-1:0 + vand $tmp,$tmp,$eighty7 + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vxor $tweak,$tweak,$tmp + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vxor $in1,$twk1,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk1,$tweak,$rndkey0 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + + and r0,r0,$len + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vncipher $out0,$out0,v26 + vncipher $out1,$out1,v26 + vand $tmp,$tmp,$eighty7 + vncipher $out2,$out2,v26 + vncipher $out3,$out3,v26 + vxor $tweak,$tweak,$tmp + vncipher $out4,$out4,v26 + vncipher $out5,$out5,v26 + + add $inp,$inp,r0 # $inp is adjusted in such + # way that at exit from the + # loop inX-in5 are loaded + # with last "words" + vxor $in2,$twk2,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk2,$tweak,$rndkey0 + vaddubm $tweak,$tweak,$tweak + vncipher $out0,$out0,v27 + vncipher $out1,$out1,v27 + vsldoi $tmp,$tmp,$tmp,15 + vncipher $out2,$out2,v27 + vncipher $out3,$out3,v27 + vand $tmp,$tmp,$eighty7 + vncipher $out4,$out4,v27 + vncipher $out5,$out5,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vxor $tweak,$tweak,$tmp + vncipher $out0,$out0,v28 + vncipher $out1,$out1,v28 + vxor $in3,$twk3,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk3,$tweak,$rndkey0 + vncipher $out2,$out2,v28 + vncipher $out3,$out3,v28 + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vncipher $out4,$out4,v28 + vncipher $out5,$out5,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + vand $tmp,$tmp,$eighty7 + + vncipher $out0,$out0,v29 + vncipher $out1,$out1,v29 + vxor $tweak,$tweak,$tmp + vncipher $out2,$out2,v29 + vncipher $out3,$out3,v29 + vxor $in4,$twk4,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk4,$tweak,$rndkey0 + vncipher $out4,$out4,v29 + vncipher $out5,$out5,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + + vncipher $out0,$out0,v30 + vncipher $out1,$out1,v30 + vand $tmp,$tmp,$eighty7 + vncipher $out2,$out2,v30 + vncipher $out3,$out3,v30 + vxor $tweak,$tweak,$tmp + vncipher $out4,$out4,v30 + vncipher $out5,$out5,v30 + vxor $in5,$twk5,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk5,$tweak,$rndkey0 + + vncipherlast $out0,$out0,$in0 + lvx_u $in0,$x00,$inp # load next input block + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vncipherlast $out1,$out1,$in1 + lvx_u $in1,$x10,$inp + vncipherlast $out2,$out2,$in2 + le?vperm $in0,$in0,$in0,$leperm + lvx_u $in2,$x20,$inp + vand $tmp,$tmp,$eighty7 + vncipherlast $out3,$out3,$in3 + le?vperm $in1,$in1,$in1,$leperm + lvx_u $in3,$x30,$inp + vncipherlast $out4,$out4,$in4 + le?vperm $in2,$in2,$in2,$leperm + lvx_u $in4,$x40,$inp + vxor $tweak,$tweak,$tmp + vncipherlast $out5,$out5,$in5 + le?vperm $in3,$in3,$in3,$leperm + lvx_u $in5,$x50,$inp + addi $inp,$inp,0x60 + le?vperm $in4,$in4,$in4,$leperm + le?vperm $in5,$in5,$in5,$leperm + + le?vperm $out0,$out0,$out0,$leperm + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk0 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + vxor $out1,$in1,$twk1 + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + vxor $out2,$in2,$twk2 + le?vperm $out4,$out4,$out4,$leperm + stvx_u $out3,$x30,$out + vxor $out3,$in3,$twk3 + le?vperm $out5,$out5,$out5,$leperm + stvx_u $out4,$x40,$out + vxor $out4,$in4,$twk4 + stvx_u $out5,$x50,$out + vxor $out5,$in5,$twk5 + addi $out,$out,0x60 + + mtctr $rounds + beq Loop_xts_dec6x # did $len-=96 borrow? + + addic. $len,$len,0x60 + beq Lxts_dec6x_zero + cmpwi $len,0x20 + blt Lxts_dec6x_one + nop + beq Lxts_dec6x_two + cmpwi $len,0x40 + blt Lxts_dec6x_three + nop + beq Lxts_dec6x_four + +Lxts_dec6x_five: + vxor $out0,$in1,$twk0 + vxor $out1,$in2,$twk1 + vxor $out2,$in3,$twk2 + vxor $out3,$in4,$twk3 + vxor $out4,$in5,$twk4 + + bl _aesp8_xts_dec5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk5 # unused tweak + vxor $twk1,$tweak,$rndkey0 + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk1 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$leperm + stvx_u $out3,$x30,$out + stvx_u $out4,$x40,$out + addi $out,$out,0x50 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_four: + vxor $out0,$in2,$twk0 + vxor $out1,$in3,$twk1 + vxor $out2,$in4,$twk2 + vxor $out3,$in5,$twk3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_dec5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk4 # unused tweak + vmr $twk1,$twk5 + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk5 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + stvx_u $out3,$x30,$out + addi $out,$out,0x40 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_three: + vxor $out0,$in3,$twk0 + vxor $out1,$in4,$twk1 + vxor $out2,$in5,$twk2 + vxor $out3,$out3,$out3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_dec5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk3 # unused tweak + vmr $twk1,$twk4 + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk4 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + stvx_u $out2,$x20,$out + addi $out,$out,0x30 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_two: + vxor $out0,$in4,$twk0 + vxor $out1,$in5,$twk1 + vxor $out2,$out2,$out2 + vxor $out3,$out3,$out3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_dec5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk2 # unused tweak + vmr $twk1,$twk3 + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk3 + stvx_u $out1,$x10,$out + addi $out,$out,0x20 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_one: + vxor $out0,$in5,$twk0 + nop +Loop_xts_dec1x: + vncipher $out0,$out0,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_xts_dec1x + + subi r0,$taillen,1 + vncipher $out0,$out0,v24 + + andi. r0,r0,16 + cmpwi $taillen,0 + vncipher $out0,$out0,v25 + + sub $inp,$inp,r0 + vncipher $out0,$out0,v26 + + lvx_u $in0,0,$inp + vncipher $out0,$out0,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vncipher $out0,$out0,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + + vncipher $out0,$out0,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $twk0,$twk0,v31 + + le?vperm $in0,$in0,$in0,$leperm + vncipher $out0,$out0,v30 + + mtctr $rounds + vncipherlast $out0,$out0,$twk0 + + vmr $twk0,$twk1 # unused tweak + vmr $twk1,$twk2 + le?vperm $out0,$out0,$out0,$leperm + stvx_u $out0,$x00,$out # store output + addi $out,$out,0x10 + vxor $out0,$in0,$twk2 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_zero: + cmpwi $taillen,0 + beq Lxts_dec6x_done + + lvx_u $in0,0,$inp + le?vperm $in0,$in0,$in0,$leperm + vxor $out0,$in0,$twk1 +Lxts_dec6x_steal: + vncipher $out0,$out0,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Lxts_dec6x_steal + + add $inp,$inp,$taillen + vncipher $out0,$out0,v24 + + cmpwi $taillen,0 + vncipher $out0,$out0,v25 + + lvx_u $in0,0,$inp + vncipher $out0,$out0,v26 + + lvsr $inpperm,0,$taillen # $in5 is no more + vncipher $out0,$out0,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vncipher $out0,$out0,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + + vncipher $out0,$out0,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $twk1,$twk1,v31 + + le?vperm $in0,$in0,$in0,$leperm + vncipher $out0,$out0,v30 + + vperm $in0,$in0,$in0,$inpperm + vncipherlast $tmp,$out0,$twk1 + + le?vperm $out0,$tmp,$tmp,$leperm + le?stvx_u $out0,0,$out + be?stvx_u $tmp,0,$out + + vxor $out0,$out0,$out0 + vspltisb $out1,-1 + vperm $out0,$out0,$out1,$inpperm + vsel $out0,$in0,$tmp,$out0 + vxor $out0,$out0,$twk0 + + subi r30,$out,1 + mtctr $taillen +Loop_xts_dec6x_steal: + lbzu r0,1(r30) + stb r0,16(r30) + bdnz Loop_xts_dec6x_steal + + li $taillen,0 + mtctr $rounds + b Loop_xts_dec1x # one more time... + +.align 4 +Lxts_dec6x_done: + ${UCMP}i $ivp,0 + beq Lxts_dec6x_ret + + vxor $tweak,$twk0,$rndkey0 + le?vperm $tweak,$tweak,$tweak,$leperm + stvx_u $tweak,0,$ivp + +Lxts_dec6x_ret: + mtlr r11 + li r10,`$FRAME+15` + li r11,`$FRAME+31` + stvx $seven,r10,$sp # wipe copies of round keys + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,0x04,1,0x80,6,6,0 + .long 0 + +.align 5 +_aesp8_xts_dec5x: + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + lvx v25,$x10,$key_ # round[4] + bdnz _aesp8_xts_dec5x + + subi r0,$taillen,1 + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + + andi. r0,r0,16 + cmpwi $taillen,0 + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vxor $twk0,$twk0,v31 + + sub $inp,$inp,r0 + vncipher $out0,$out0,v26 + vncipher $out1,$out1,v26 + vncipher $out2,$out2,v26 + vncipher $out3,$out3,v26 + vncipher $out4,$out4,v26 + vxor $in1,$twk1,v31 + + vncipher $out0,$out0,v27 + lvx_u $in0,0,$inp + vncipher $out1,$out1,v27 + vncipher $out2,$out2,v27 + vncipher $out3,$out3,v27 + vncipher $out4,$out4,v27 + vxor $in2,$twk2,v31 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vncipher $out0,$out0,v28 + vncipher $out1,$out1,v28 + vncipher $out2,$out2,v28 + vncipher $out3,$out3,v28 + vncipher $out4,$out4,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + vxor $in3,$twk3,v31 + + vncipher $out0,$out0,v29 + le?vperm $in0,$in0,$in0,$leperm + vncipher $out1,$out1,v29 + vncipher $out2,$out2,v29 + vncipher $out3,$out3,v29 + vncipher $out4,$out4,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $in4,$twk4,v31 + + vncipher $out0,$out0,v30 + vncipher $out1,$out1,v30 + vncipher $out2,$out2,v30 + vncipher $out3,$out3,v30 + vncipher $out4,$out4,v30 + + vncipherlast $out0,$out0,$twk0 + vncipherlast $out1,$out1,$in1 + vncipherlast $out2,$out2,$in2 + vncipherlast $out3,$out3,$in3 + vncipherlast $out4,$out4,$in4 + mtctr $rounds + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 +___ +}} }}} + my $consts=1; foreach(split("\n",$code)) { s/\`([^\`]*)\`/eval($1)/geo; @@ -1898,7 +3759,7 @@ () if ($flavour =~ /le$/o) { SWITCH: for($conv) { /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; - /\?rev/ && do { @bytes=reverse(@bytes); last; }; + /\?rev/ && do { @bytes=reverse(@bytes); last; }; } } From c07f5d3da643329f38ff7c2ef2252723453dd9c4 Mon Sep 17 00:00:00 2001 From: "Leonidas S. Barbosa" Date: Mon, 18 Jul 2016 12:26:26 -0300 Subject: [PATCH 178/180] crypto: vmx - Adding support for XTS This patch add XTS support using VMX-crypto driver. Signed-off-by: Leonidas S. Barbosa Signed-off-by: Paulo Flabiano Smorigo Signed-off-by: Herbert Xu --- drivers/crypto/vmx/Makefile | 2 +- drivers/crypto/vmx/aes_xts.c | 190 +++++++++++++++++++++++++++++++++++ drivers/crypto/vmx/vmx.c | 2 + 3 files changed, 193 insertions(+), 1 deletion(-) create mode 100644 drivers/crypto/vmx/aes_xts.c diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile index d28ab96a24759..de6e241b08666 100644 --- a/drivers/crypto/vmx/Makefile +++ b/drivers/crypto/vmx/Makefile @@ -1,5 +1,5 @@ obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o -vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o ghash.o +vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) TARGET := linux-ppc64le diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c new file mode 100644 index 0000000000000..cfb25413917c3 --- /dev/null +++ b/drivers/crypto/vmx/aes_xts.c @@ -0,0 +1,190 @@ +/** + * AES XTS routines supporting VMX In-core instructions on Power 8 + * + * Copyright (C) 2015 International Business Machines Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundations; version 2 only. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY of FITNESS FOR A PARTICUPAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Author: Leonidas S. Barbosa + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "aesp8-ppc.h" + +struct p8_aes_xts_ctx { + struct crypto_blkcipher *fallback; + struct aes_key enc_key; + struct aes_key dec_key; + struct aes_key tweak_key; +}; + +static int p8_aes_xts_init(struct crypto_tfm *tfm) +{ + const char *alg; + struct crypto_blkcipher *fallback; + struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); + + if (!(alg = crypto_tfm_alg_name(tfm))) { + printk(KERN_ERR "Failed to get algorithm name.\n"); + return -ENOENT; + } + + fallback = + crypto_alloc_blkcipher(alg, 0, CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(fallback)) { + printk(KERN_ERR + "Failed to allocate transformation for '%s': %ld\n", + alg, PTR_ERR(fallback)); + return PTR_ERR(fallback); + } + printk(KERN_INFO "Using '%s' as fallback implementation.\n", + crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback)); + + crypto_blkcipher_set_flags( + fallback, + crypto_blkcipher_get_flags((struct crypto_blkcipher *)tfm)); + ctx->fallback = fallback; + + return 0; +} + +static void p8_aes_xts_exit(struct crypto_tfm *tfm) +{ + struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); + + if (ctx->fallback) { + crypto_free_blkcipher(ctx->fallback); + ctx->fallback = NULL; + } +} + +static int p8_aes_xts_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + int ret; + struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); + + ret = xts_check_key(tfm, key, keylen); + if (ret) + return ret; + + preempt_disable(); + pagefault_disable(); + enable_kernel_vsx(); + ret = aes_p8_set_encrypt_key(key + keylen/2, (keylen/2) * 8, &ctx->tweak_key); + ret += aes_p8_set_encrypt_key(key, (keylen/2) * 8, &ctx->enc_key); + ret += aes_p8_set_decrypt_key(key, (keylen/2) * 8, &ctx->dec_key); + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); + + ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen); + return ret; +} + +static int p8_aes_xts_crypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes, int enc) +{ + int ret; + u8 tweak[AES_BLOCK_SIZE]; + u8 *iv; + struct blkcipher_walk walk; + struct p8_aes_xts_ctx *ctx = + crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm)); + struct blkcipher_desc fallback_desc = { + .tfm = ctx->fallback, + .info = desc->info, + .flags = desc->flags + }; + + if (in_interrupt()) { + ret = enc ? crypto_blkcipher_encrypt(&fallback_desc, dst, src, nbytes) : + crypto_blkcipher_decrypt(&fallback_desc, dst, src, nbytes); + } else { + preempt_disable(); + pagefault_disable(); + enable_kernel_vsx(); + + blkcipher_walk_init(&walk, dst, src, nbytes); + + iv = (u8 *)walk.iv; + ret = blkcipher_walk_virt(desc, &walk); + memset(tweak, 0, AES_BLOCK_SIZE); + aes_p8_encrypt(iv, tweak, &ctx->tweak_key); + + while ((nbytes = walk.nbytes)) { + if (enc) + aes_p8_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr, + nbytes & AES_BLOCK_MASK, &ctx->enc_key, NULL, tweak); + else + aes_p8_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr, + nbytes & AES_BLOCK_MASK, &ctx->dec_key, NULL, tweak); + + nbytes &= AES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, &walk, nbytes); + } + + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); + } + return ret; +} + +static int p8_aes_xts_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return p8_aes_xts_crypt(desc, dst, src, nbytes, 1); +} + +static int p8_aes_xts_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return p8_aes_xts_crypt(desc, dst, src, nbytes, 0); +} + +struct crypto_alg p8_aes_xts_alg = { + .cra_name = "xts(aes)", + .cra_driver_name = "p8_aes_xts", + .cra_module = THIS_MODULE, + .cra_priority = 2000, + .cra_type = &crypto_blkcipher_type, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, + .cra_alignmask = 0, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct p8_aes_xts_ctx), + .cra_init = p8_aes_xts_init, + .cra_exit = p8_aes_xts_exit, + .cra_blkcipher = { + .ivsize = AES_BLOCK_SIZE, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .setkey = p8_aes_xts_setkey, + .encrypt = p8_aes_xts_encrypt, + .decrypt = p8_aes_xts_decrypt, + } +}; diff --git a/drivers/crypto/vmx/vmx.c b/drivers/crypto/vmx/vmx.c index e163d5770438a..f688c32fbcc71 100644 --- a/drivers/crypto/vmx/vmx.c +++ b/drivers/crypto/vmx/vmx.c @@ -31,10 +31,12 @@ extern struct shash_alg p8_ghash_alg; extern struct crypto_alg p8_aes_alg; extern struct crypto_alg p8_aes_cbc_alg; extern struct crypto_alg p8_aes_ctr_alg; +extern struct crypto_alg p8_aes_xts_alg; static struct crypto_alg *algs[] = { &p8_aes_alg, &p8_aes_cbc_alg, &p8_aes_ctr_alg, + &p8_aes_xts_alg, NULL, }; From 48a9343216f9c8ff3066cd12c90d161d1d9536d8 Mon Sep 17 00:00:00 2001 From: Paulo Flabiano Smorigo Date: Tue, 19 Jul 2016 10:36:26 -0300 Subject: [PATCH 179/180] crypto: vmx - Ignore generated files Ignore assembly files generated by the perl script. Signed-off-by: Paulo Flabiano Smorigo Signed-off-by: Herbert Xu --- drivers/crypto/vmx/.gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 drivers/crypto/vmx/.gitignore diff --git a/drivers/crypto/vmx/.gitignore b/drivers/crypto/vmx/.gitignore new file mode 100644 index 0000000000000..af4a7ce4738d0 --- /dev/null +++ b/drivers/crypto/vmx/.gitignore @@ -0,0 +1,2 @@ +aesp8-ppc.S +ghashp8-ppc.S From 16dee78005be9908197c707393a3b6a61a14b4fb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 20 Jul 2016 22:32:50 +0800 Subject: [PATCH 180/180] crypto: vmx - Fix aes_p8_xts_decrypt build failure We use _GLOBAL so there is no need to do the manual alignment, in fact it causes a build failure. Reported-by: Stephen Rothwell Signed-off-by: Herbert Xu --- drivers/crypto/vmx/aesp8-ppc.pl | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl index 813ffcc4d17cc..0b4a293b8a1e9 100644 --- a/drivers/crypto/vmx/aesp8-ppc.pl +++ b/drivers/crypto/vmx/aesp8-ppc.pl @@ -2125,8 +2125,6 @@ () .size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt .globl .${prefix}_xts_decrypt -.align 5 -.${prefix}_xts_decrypt: mr $inp,r3 # reassign li r3,-1 ${UCMP}i $len,16