From d31e712302c7d5ea47bdb5c1108ed1e33bf8681d Mon Sep 17 00:00:00 2001
From: "Zeng, Xin" <xin.zeng@intel.com>
Date: Fri, 23 Jun 2017 11:31:19 -0400
Subject: [PATCH 001/116] crypto: virtio - Refacotor virtio_crypto driver for
 new virito crypto services

In current virtio crypto device driver, some common data structures and
implementations that should be used by other virtio crypto algorithms
(e.g. asymmetric crypto algorithms) introduce symmetric crypto algorithms
specific implementations.
This patch refactors these pieces of code so that they can be reused by
other virtio crypto algorithms.

Acked-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Xin Zeng <xin.zeng@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/virtio/virtio_crypto_algs.c   | 109 +++++++++++++++----
 drivers/crypto/virtio/virtio_crypto_common.h |  22 +---
 drivers/crypto/virtio/virtio_crypto_core.c   |  37 ++-----
 3 files changed, 98 insertions(+), 70 deletions(-)

diff --git a/drivers/crypto/virtio/virtio_crypto_algs.c b/drivers/crypto/virtio/virtio_crypto_algs.c
index 49defda4e03dd..5035b0dc1e40c 100644
--- a/drivers/crypto/virtio/virtio_crypto_algs.c
+++ b/drivers/crypto/virtio/virtio_crypto_algs.c
@@ -27,12 +27,68 @@
 #include <uapi/linux/virtio_crypto.h>
 #include "virtio_crypto_common.h"
 
+
+struct virtio_crypto_ablkcipher_ctx {
+	struct virtio_crypto *vcrypto;
+	struct crypto_tfm *tfm;
+
+	struct virtio_crypto_sym_session_info enc_sess_info;
+	struct virtio_crypto_sym_session_info dec_sess_info;
+};
+
+struct virtio_crypto_sym_request {
+	struct virtio_crypto_request base;
+
+	/* Cipher or aead */
+	uint32_t type;
+	struct virtio_crypto_ablkcipher_ctx *ablkcipher_ctx;
+	struct ablkcipher_request *ablkcipher_req;
+	uint8_t *iv;
+	/* Encryption? */
+	bool encrypt;
+};
+
 /*
  * The algs_lock protects the below global virtio_crypto_active_devs
  * and crypto algorithms registion.
  */
 static DEFINE_MUTEX(algs_lock);
 static unsigned int virtio_crypto_active_devs;
+static void virtio_crypto_ablkcipher_finalize_req(
+	struct virtio_crypto_sym_request *vc_sym_req,
+	struct ablkcipher_request *req,
+	int err);
+
+static void virtio_crypto_dataq_sym_callback
+		(struct virtio_crypto_request *vc_req, int len)
+{
+	struct virtio_crypto_sym_request *vc_sym_req =
+		container_of(vc_req, struct virtio_crypto_sym_request, base);
+	struct ablkcipher_request *ablk_req;
+	int error;
+
+	/* Finish the encrypt or decrypt process */
+	if (vc_sym_req->type == VIRTIO_CRYPTO_SYM_OP_CIPHER) {
+		switch (vc_req->status) {
+		case VIRTIO_CRYPTO_OK:
+			error = 0;
+			break;
+		case VIRTIO_CRYPTO_INVSESS:
+		case VIRTIO_CRYPTO_ERR:
+			error = -EINVAL;
+			break;
+		case VIRTIO_CRYPTO_BADMSG:
+			error = -EBADMSG;
+			break;
+		default:
+			error = -EIO;
+			break;
+		}
+		ablk_req = vc_sym_req->ablkcipher_req;
+		virtio_crypto_ablkcipher_finalize_req(vc_sym_req,
+							ablk_req, error);
+	}
+}
 
 static u64 virtio_crypto_alg_sg_nents_length(struct scatterlist *sg)
 {
@@ -286,13 +342,14 @@ static int virtio_crypto_ablkcipher_setkey(struct crypto_ablkcipher *tfm,
 }
 
 static int
-__virtio_crypto_ablkcipher_do_req(struct virtio_crypto_request *vc_req,
+__virtio_crypto_ablkcipher_do_req(struct virtio_crypto_sym_request *vc_sym_req,
 		struct ablkcipher_request *req,
 		struct data_queue *data_vq)
 {
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct virtio_crypto_ablkcipher_ctx *ctx = vc_sym_req->ablkcipher_ctx;
+	struct virtio_crypto_request *vc_req = &vc_sym_req->base;
 	unsigned int ivsize = crypto_ablkcipher_ivsize(tfm);
-	struct virtio_crypto_ablkcipher_ctx *ctx = vc_req->ablkcipher_ctx;
 	struct virtio_crypto *vcrypto = ctx->vcrypto;
 	struct virtio_crypto_op_data_req *req_data;
 	int src_nents, dst_nents;
@@ -326,9 +383,9 @@ __virtio_crypto_ablkcipher_do_req(struct virtio_crypto_request *vc_req,
 	}
 
 	vc_req->req_data = req_data;
-	vc_req->type = VIRTIO_CRYPTO_SYM_OP_CIPHER;
+	vc_sym_req->type = VIRTIO_CRYPTO_SYM_OP_CIPHER;
 	/* Head of operation */
-	if (vc_req->encrypt) {
+	if (vc_sym_req->encrypt) {
 		req_data->header.session_id =
 			cpu_to_le64(ctx->enc_sess_info.session_id);
 		req_data->header.opcode =
@@ -383,7 +440,7 @@ __virtio_crypto_ablkcipher_do_req(struct virtio_crypto_request *vc_req,
 	memcpy(iv, req->info, ivsize);
 	sg_init_one(&iv_sg, iv, ivsize);
 	sgs[num_out++] = &iv_sg;
-	vc_req->iv = iv;
+	vc_sym_req->iv = iv;
 
 	/* Source data */
 	for (i = 0; i < src_nents; i++)
@@ -421,15 +478,18 @@ static int virtio_crypto_ablkcipher_encrypt(struct ablkcipher_request *req)
 {
 	struct crypto_ablkcipher *atfm = crypto_ablkcipher_reqtfm(req);
 	struct virtio_crypto_ablkcipher_ctx *ctx = crypto_ablkcipher_ctx(atfm);
-	struct virtio_crypto_request *vc_req = ablkcipher_request_ctx(req);
+	struct virtio_crypto_sym_request *vc_sym_req =
+				ablkcipher_request_ctx(req);
+	struct virtio_crypto_request *vc_req = &vc_sym_req->base;
 	struct virtio_crypto *vcrypto = ctx->vcrypto;
 	/* Use the first data virtqueue as default */
 	struct data_queue *data_vq = &vcrypto->data_vq[0];
 
-	vc_req->ablkcipher_ctx = ctx;
-	vc_req->ablkcipher_req = req;
-	vc_req->encrypt = true;
 	vc_req->dataq = data_vq;
+	vc_req->alg_cb = virtio_crypto_dataq_sym_callback;
+	vc_sym_req->ablkcipher_ctx = ctx;
+	vc_sym_req->ablkcipher_req = req;
+	vc_sym_req->encrypt = true;
 
 	return crypto_transfer_cipher_request_to_engine(data_vq->engine, req);
 }
@@ -438,16 +498,18 @@ static int virtio_crypto_ablkcipher_decrypt(struct ablkcipher_request *req)
 {
 	struct crypto_ablkcipher *atfm = crypto_ablkcipher_reqtfm(req);
 	struct virtio_crypto_ablkcipher_ctx *ctx = crypto_ablkcipher_ctx(atfm);
-	struct virtio_crypto_request *vc_req = ablkcipher_request_ctx(req);
+	struct virtio_crypto_sym_request *vc_sym_req =
+				ablkcipher_request_ctx(req);
+	struct virtio_crypto_request *vc_req = &vc_sym_req->base;
 	struct virtio_crypto *vcrypto = ctx->vcrypto;
 	/* Use the first data virtqueue as default */
 	struct data_queue *data_vq = &vcrypto->data_vq[0];
 
-	vc_req->ablkcipher_ctx = ctx;
-	vc_req->ablkcipher_req = req;
-
-	vc_req->encrypt = false;
 	vc_req->dataq = data_vq;
+	vc_req->alg_cb = virtio_crypto_dataq_sym_callback;
+	vc_sym_req->ablkcipher_ctx = ctx;
+	vc_sym_req->ablkcipher_req = req;
+	vc_sym_req->encrypt = false;
 
 	return crypto_transfer_cipher_request_to_engine(data_vq->engine, req);
 }
@@ -456,7 +518,7 @@ static int virtio_crypto_ablkcipher_init(struct crypto_tfm *tfm)
 {
 	struct virtio_crypto_ablkcipher_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	tfm->crt_ablkcipher.reqsize = sizeof(struct virtio_crypto_request);
+	tfm->crt_ablkcipher.reqsize = sizeof(struct virtio_crypto_sym_request);
 	ctx->tfm = tfm;
 
 	return 0;
@@ -479,11 +541,13 @@ int virtio_crypto_ablkcipher_crypt_req(
 	struct crypto_engine *engine,
 	struct ablkcipher_request *req)
 {
-	struct virtio_crypto_request *vc_req = ablkcipher_request_ctx(req);
+	struct virtio_crypto_sym_request *vc_sym_req =
+				ablkcipher_request_ctx(req);
+	struct virtio_crypto_request *vc_req = &vc_sym_req->base;
 	struct data_queue *data_vq = vc_req->dataq;
 	int ret;
 
-	ret = __virtio_crypto_ablkcipher_do_req(vc_req, req, data_vq);
+	ret = __virtio_crypto_ablkcipher_do_req(vc_sym_req, req, data_vq);
 	if (ret < 0)
 		return ret;
 
@@ -492,14 +556,15 @@ int virtio_crypto_ablkcipher_crypt_req(
 	return 0;
 }
 
-void virtio_crypto_ablkcipher_finalize_req(
-	struct virtio_crypto_request *vc_req,
+static void virtio_crypto_ablkcipher_finalize_req(
+	struct virtio_crypto_sym_request *vc_sym_req,
 	struct ablkcipher_request *req,
 	int err)
 {
-	crypto_finalize_cipher_request(vc_req->dataq->engine, req, err);
-
-	virtcrypto_clear_request(vc_req);
+	crypto_finalize_cipher_request(vc_sym_req->base.dataq->engine,
+					req, err);
+	kzfree(vc_sym_req->iv);
+	virtcrypto_clear_request(&vc_sym_req->base);
 }
 
 static struct crypto_alg virtio_crypto_algs[] = { {
diff --git a/drivers/crypto/virtio/virtio_crypto_common.h b/drivers/crypto/virtio/virtio_crypto_common.h
index da6d8c0ea407a..e976539a05d92 100644
--- a/drivers/crypto/virtio/virtio_crypto_common.h
+++ b/drivers/crypto/virtio/virtio_crypto_common.h
@@ -83,26 +83,16 @@ struct virtio_crypto_sym_session_info {
 	__u64 session_id;
 };
 
-struct virtio_crypto_ablkcipher_ctx {
-	struct virtio_crypto *vcrypto;
-	struct crypto_tfm *tfm;
-
-	struct virtio_crypto_sym_session_info enc_sess_info;
-	struct virtio_crypto_sym_session_info dec_sess_info;
-};
+struct virtio_crypto_request;
+typedef void (*virtio_crypto_data_callback)
+		(struct virtio_crypto_request *vc_req, int len);
 
 struct virtio_crypto_request {
-	/* Cipher or aead */
-	uint32_t type;
 	uint8_t status;
-	struct virtio_crypto_ablkcipher_ctx *ablkcipher_ctx;
-	struct ablkcipher_request *ablkcipher_req;
 	struct virtio_crypto_op_data_req *req_data;
 	struct scatterlist **sgs;
-	uint8_t *iv;
-	/* Encryption? */
-	bool encrypt;
 	struct data_queue *dataq;
+	virtio_crypto_data_callback alg_cb;
 };
 
 int virtcrypto_devmgr_add_dev(struct virtio_crypto *vcrypto_dev);
@@ -119,10 +109,6 @@ void virtcrypto_dev_stop(struct virtio_crypto *vcrypto);
 int virtio_crypto_ablkcipher_crypt_req(
 	struct crypto_engine *engine,
 	struct ablkcipher_request *req);
-void virtio_crypto_ablkcipher_finalize_req(
-	struct virtio_crypto_request *vc_req,
-	struct ablkcipher_request *req,
-	int err);
 
 void
 virtcrypto_clear_request(struct virtio_crypto_request *vc_req);
diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c
index a111cd72797b1..ff1410a32c2bb 100644
--- a/drivers/crypto/virtio/virtio_crypto_core.c
+++ b/drivers/crypto/virtio/virtio_crypto_core.c
@@ -29,7 +29,6 @@ void
 virtcrypto_clear_request(struct virtio_crypto_request *vc_req)
 {
 	if (vc_req) {
-		kzfree(vc_req->iv);
 		kzfree(vc_req->req_data);
 		kfree(vc_req->sgs);
 	}
@@ -41,40 +40,18 @@ static void virtcrypto_dataq_callback(struct virtqueue *vq)
 	struct virtio_crypto_request *vc_req;
 	unsigned long flags;
 	unsigned int len;
-	struct ablkcipher_request *ablk_req;
-	int error;
 	unsigned int qid = vq->index;
 
 	spin_lock_irqsave(&vcrypto->data_vq[qid].lock, flags);
 	do {
 		virtqueue_disable_cb(vq);
 		while ((vc_req = virtqueue_get_buf(vq, &len)) != NULL) {
-			if (vc_req->type == VIRTIO_CRYPTO_SYM_OP_CIPHER) {
-				switch (vc_req->status) {
-				case VIRTIO_CRYPTO_OK:
-					error = 0;
-					break;
-				case VIRTIO_CRYPTO_INVSESS:
-				case VIRTIO_CRYPTO_ERR:
-					error = -EINVAL;
-					break;
-				case VIRTIO_CRYPTO_BADMSG:
-					error = -EBADMSG;
-					break;
-				default:
-					error = -EIO;
-					break;
-				}
-				ablk_req = vc_req->ablkcipher_req;
-
-				spin_unlock_irqrestore(
-					&vcrypto->data_vq[qid].lock, flags);
-				/* Finish the encrypt or decrypt process */
-				virtio_crypto_ablkcipher_finalize_req(vc_req,
-				    ablk_req, error);
-				spin_lock_irqsave(
-					&vcrypto->data_vq[qid].lock, flags);
-			}
+			spin_unlock_irqrestore(
+				&vcrypto->data_vq[qid].lock, flags);
+			if (vc_req->alg_cb)
+				vc_req->alg_cb(vc_req, len);
+			spin_lock_irqsave(
+				&vcrypto->data_vq[qid].lock, flags);
 		}
 	} while (!virtqueue_enable_cb(vq));
 	spin_unlock_irqrestore(&vcrypto->data_vq[qid].lock, flags);
@@ -270,7 +247,7 @@ static int virtcrypto_update_status(struct virtio_crypto *vcrypto)
 
 			return -EPERM;
 		}
-		dev_info(&vcrypto->vdev->dev, "Accelerator is ready\n");
+		dev_info(&vcrypto->vdev->dev, "Accelerator device is ready\n");
 	} else {
 		virtcrypto_dev_stop(vcrypto);
 		dev_info(&vcrypto->vdev->dev, "Accelerator is not ready\n");

From a93854c9a596871f6740c26157c4a394aca76817 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 26 Jun 2017 20:41:03 +0100
Subject: [PATCH 002/116] crypto: qat - fix spelling mistake: "runing" ->
 "running"

trivial fix to spelling mistake in dev_info message

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_common/adf_aer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c
index d3e25c37dc339..da8a2d3b5e9a0 100644
--- a/drivers/crypto/qat/qat_common/adf_aer.c
+++ b/drivers/crypto/qat/qat_common/adf_aer.c
@@ -208,7 +208,7 @@ static pci_ers_result_t adf_slot_reset(struct pci_dev *pdev)
 static void adf_resume(struct pci_dev *pdev)
 {
 	dev_info(&pdev->dev, "Acceleration driver reset completed\n");
-	dev_info(&pdev->dev, "Device is up and runnig\n");
+	dev_info(&pdev->dev, "Device is up and running\n");
 }
 
 static const struct pci_error_handlers adf_err_handler = {

From 30aabe36d3db22791c5b6260b61fe451e77e0edd Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 27 Jun 2017 17:11:23 +0530
Subject: [PATCH 003/116] crypto: sahara - make of_device_ids const

of_device_ids are not supposed to change at runtime. All functions
working with of_device_ids provided by <linux/of.h> work with const
of_device_ids. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
   9759	   2736	      8	  12503	   30d7	drivers/crypto/sahara.o

File size after constify:
   text	   data	    bss	    dec	    hex	filename
  10367	   2128	      8	  12503	   30d7	drivers/crypto/sahara.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/sahara.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c
index 1d9ecd368b5bb..c2174ec88e2a8 100644
--- a/drivers/crypto/sahara.c
+++ b/drivers/crypto/sahara.c
@@ -1382,7 +1382,7 @@ static struct platform_device_id sahara_platform_ids[] = {
 };
 MODULE_DEVICE_TABLE(platform, sahara_platform_ids);
 
-static struct of_device_id sahara_dt_ids[] = {
+static const struct of_device_id sahara_dt_ids[] = {
 	{ .compatible = "fsl,imx53-sahara" },
 	{ .compatible = "fsl,imx27-sahara" },
 	{ /* sentinel */ }

From 77af0ae44e4a74f58dd741babdacec32fc8042b1 Mon Sep 17 00:00:00 2001
From: Gary R Hook <gary.hook@amd.com>
Date: Tue, 27 Jun 2017 08:58:04 -0500
Subject: [PATCH 004/116] crypto: ccp - Fix some line spacing

Add/remove blank lines as appropriate.

Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-dev.c | 1 +
 include/linux/ccp.h          | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
index 2506b50257008..67cbb3e76888d 100644
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -415,6 +415,7 @@ static void ccp_do_cmd_complete(unsigned long data)
 	struct ccp_cmd *cmd = tdata->cmd;
 
 	cmd->callback(cmd->data, cmd->ret);
+
 	complete(&tdata->completion);
 }
 
diff --git a/include/linux/ccp.h b/include/linux/ccp.h
index 3285c944194ad..c03ee844a99d7 100644
--- a/include/linux/ccp.h
+++ b/include/linux/ccp.h
@@ -20,7 +20,6 @@
 #include <crypto/aes.h>
 #include <crypto/sha.h>
 
-
 struct ccp_device;
 struct ccp_cmd;
 

From bce386af148c85f1cd19bb9bfa04ec92a947e473 Mon Sep 17 00:00:00 2001
From: Gary R Hook <gary.hook@amd.com>
Date: Tue, 27 Jun 2017 08:58:16 -0500
Subject: [PATCH 005/116] crypto: ccp - Change all references to use the JOB ID
 macro

Use the CCP_NEW_JOBID() macro when assigning an identifier

Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-ops.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index c0dfdacbdff52..78f29d459df87 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -1756,7 +1756,7 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 
 	memset(&op, 0, sizeof(op));
 	op.cmd_q = cmd_q;
-	op.jobid = ccp_gen_jobid(cmd_q->ccp);
+	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
 	op.sb_key = cmd_q->ccp->vdata->perform->sballoc(cmd_q, sb_count);
 
 	if (!op.sb_key)
@@ -1992,7 +1992,7 @@ static int ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q,
 
 	memset(&op, 0, sizeof(op));
 	op.cmd_q = cmd_q;
-	op.jobid = ccp_gen_jobid(cmd_q->ccp);
+	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
 
 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
 		/* Load the mask */

From 33460b2dd13f7b4845364c59fccb79c778d7c5e0 Mon Sep 17 00:00:00 2001
From: Gary R Hook <gary.hook@amd.com>
Date: Wed, 28 Jun 2017 11:56:47 -0500
Subject: [PATCH 006/116] crypto: ccp - Provide an error path for debugfs setup
 failure

Changes since v2:
  - On failure remove only the DebugFS heirarchy for this device
Changes since v1:
  - Remove unneeded local variable

Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-debugfs.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-debugfs.c b/drivers/crypto/ccp/ccp-debugfs.c
index 3cd6c83754e08..59d4ca4e72d8c 100644
--- a/drivers/crypto/ccp/ccp-debugfs.c
+++ b/drivers/crypto/ccp/ccp-debugfs.c
@@ -305,19 +305,19 @@ void ccp5_debugfs_setup(struct ccp_device *ccp)
 
 	ccp->debugfs_instance = debugfs_create_dir(ccp->name, ccp_debugfs_dir);
 	if (!ccp->debugfs_instance)
-		return;
+		goto err;
 
 	debugfs_info = debugfs_create_file("info", 0400,
 					   ccp->debugfs_instance, ccp,
 					   &ccp_debugfs_info_ops);
 	if (!debugfs_info)
-		return;
+		goto err;
 
 	debugfs_stats = debugfs_create_file("stats", 0600,
 					    ccp->debugfs_instance, ccp,
 					    &ccp_debugfs_stats_ops);
 	if (!debugfs_stats)
-		return;
+		goto err;
 
 	for (i = 0; i < ccp->cmd_q_count; i++) {
 		cmd_q = &ccp->cmd_q[i];
@@ -327,15 +327,20 @@ void ccp5_debugfs_setup(struct ccp_device *ccp)
 		debugfs_q_instance =
 			debugfs_create_dir(name, ccp->debugfs_instance);
 		if (!debugfs_q_instance)
-			return;
+			goto err;
 
 		debugfs_q_stats =
 			debugfs_create_file("stats", 0600,
 					    debugfs_q_instance, cmd_q,
 					    &ccp_debugfs_queue_ops);
 		if (!debugfs_q_stats)
-			return;
+			goto err;
 	}
+
+	return;
+
+err:
+	debugfs_remove_recursive(ccp->debugfs_instance);
 }
 
 void ccp5_debugfs_destroy(void)

From 28a2cc671989264844f989e09a7bb83398fd0dd4 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Fri, 30 Jun 2017 00:59:52 -0500
Subject: [PATCH 007/116] crypto: ccp - print error message on platform_get_irq
 failure

Print error message on platform_get_irq failure before return.

Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-platform.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/ccp/ccp-platform.c b/drivers/crypto/ccp/ccp-platform.c
index e26969e601ad6..6020c4ae72fcd 100644
--- a/drivers/crypto/ccp/ccp-platform.c
+++ b/drivers/crypto/ccp/ccp-platform.c
@@ -66,8 +66,10 @@ static int ccp_get_irq(struct ccp_device *ccp)
 	int ret;
 
 	ret = platform_get_irq(pdev, 0);
-	if (ret < 0)
+	if (ret < 0) {
+		dev_notice(dev, "unable to get IRQ (%d)\n", ret);
 		return ret;
+	}
 
 	ccp->irq = ret;
 	ret = request_irq(ccp->irq, ccp->vdata->perform->irqhandler, 0,

From 0a7d710de12a2a9bad195edc318729e0ebdcc512 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Fri, 30 Jun 2017 01:24:54 -0500
Subject: [PATCH 008/116] crypto: mediatek - fix error return code in
 mtk_crypto_probe()

Propagate the return value of platform_get_irq on failure.

Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/mediatek/mtk-platform.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/mediatek/mtk-platform.c b/drivers/crypto/mediatek/mtk-platform.c
index 000b6500a22d2..b182e941b0cd8 100644
--- a/drivers/crypto/mediatek/mtk-platform.c
+++ b/drivers/crypto/mediatek/mtk-platform.c
@@ -500,7 +500,7 @@ static int mtk_crypto_probe(struct platform_device *pdev)
 		cryp->irq[i] = platform_get_irq(pdev, i);
 		if (cryp->irq[i] < 0) {
 			dev_err(cryp->dev, "no IRQ:%d resource info\n", i);
-			return -ENXIO;
+			return cryp->irq[i];
 		}
 	}
 

From 5a19dc5bde0819886093c9d42b0424405a343516 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Fri, 30 Jun 2017 01:42:12 -0500
Subject: [PATCH 009/116] crypto: mxc-scc - fix error code in mxc_scc_probe()

Print and propagate the return value of platform_get_irq on failure.

Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/mxc-scc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/mxc-scc.c b/drivers/crypto/mxc-scc.c
index ee4be1b0d30ba..e01c46387df8d 100644
--- a/drivers/crypto/mxc-scc.c
+++ b/drivers/crypto/mxc-scc.c
@@ -708,8 +708,8 @@ static int mxc_scc_probe(struct platform_device *pdev)
 	for (i = 0; i < 2; i++) {
 		irq = platform_get_irq(pdev, i);
 		if (irq < 0) {
-			dev_err(dev, "failed to get irq resource\n");
-			ret = -EINVAL;
+			dev_err(dev, "failed to get irq resource: %d\n", irq);
+			ret = irq;
 			goto err_out;
 		}
 

From 353ef083970812168dc0990e356e93e203b2cb92 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Fri, 30 Jun 2017 01:54:16 -0500
Subject: [PATCH 010/116] crypto: mxs-dcp - print error message on
 platform_get_irq failure

Print error message on platform_get_irq failure before return.

Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/mxs-dcp.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c
index 625ee50fd78bf..764be3e6933c8 100644
--- a/drivers/crypto/mxs-dcp.c
+++ b/drivers/crypto/mxs-dcp.c
@@ -908,12 +908,16 @@ static int mxs_dcp_probe(struct platform_device *pdev)
 
 	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	dcp_vmi_irq = platform_get_irq(pdev, 0);
-	if (dcp_vmi_irq < 0)
+	if (dcp_vmi_irq < 0) {
+		dev_err(dev, "Failed to get IRQ: (%d)!\n", dcp_vmi_irq);
 		return dcp_vmi_irq;
+	}
 
 	dcp_irq = platform_get_irq(pdev, 1);
-	if (dcp_irq < 0)
+	if (dcp_irq < 0) {
+		dev_err(dev, "Failed to get IRQ: (%d)!\n", dcp_irq);
 		return dcp_irq;
+	}
 
 	sdcp = devm_kzalloc(dev, sizeof(*sdcp), GFP_KERNEL);
 	if (!sdcp)

From 62c58f8d6883e06e113d4a8f22c37143931e8857 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Fri, 30 Jun 2017 02:00:54 -0500
Subject: [PATCH 011/116] crypto: omap-aes - fix error return code in
 omap_aes_probe()

Propagate the return value of platform_get_irq on failure.

Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/omap-aes.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index 5120a17731d0c..c376a3ee7c2c3 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -1095,6 +1095,7 @@ static int omap_aes_probe(struct platform_device *pdev)
 		irq = platform_get_irq(pdev, 0);
 		if (irq < 0) {
 			dev_err(dev, "can't get IRQ resource\n");
+			err = irq;
 			goto err_irq;
 		}
 

From 3822c331c5be580d8f9f65bdd6c0c187e0c5b503 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Fri, 30 Jun 2017 02:07:04 -0500
Subject: [PATCH 012/116] crypto: omap-des - fix error return code in
 omap_des_probe()

Print and propagate the return value of platform_get_irq on failure.

Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/omap-des.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c
index 0bcab00e0ff52..d37c9506c36c8 100644
--- a/drivers/crypto/omap-des.c
+++ b/drivers/crypto/omap-des.c
@@ -1023,7 +1023,8 @@ static int omap_des_probe(struct platform_device *pdev)
 
 		irq = platform_get_irq(pdev, 0);
 		if (irq < 0) {
-			dev_err(dev, "can't get IRQ resource\n");
+			dev_err(dev, "can't get IRQ resource: %d\n", irq);
+			err = irq;
 			goto err_irq;
 		}
 

From b8ae5c7387ad075ee61e8c8774ce2bca46bc9236 Mon Sep 17 00:00:00 2001
From: Corentin LABBE <clabbe.montjoie@gmail.com>
Date: Mon, 3 Jul 2017 20:48:48 +0200
Subject: [PATCH 013/116] crypto: sun4i-ss - support the Security System PRNG

The Security System has a PRNG, this patch adds support for it via
crypto_rng.

Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Kconfig                  |  8 ++++
 drivers/crypto/sunxi-ss/Makefile        |  1 +
 drivers/crypto/sunxi-ss/sun4i-ss-core.c | 30 +++++++++++++
 drivers/crypto/sunxi-ss/sun4i-ss-prng.c | 56 +++++++++++++++++++++++++
 drivers/crypto/sunxi-ss/sun4i-ss.h      | 11 +++++
 5 files changed, 106 insertions(+)
 create mode 100644 drivers/crypto/sunxi-ss/sun4i-ss-prng.c

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 4b75084fabad2..5db825a469dea 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -616,6 +616,14 @@ config CRYPTO_DEV_SUN4I_SS
 	  To compile this driver as a module, choose M here: the module
 	  will be called sun4i-ss.
 
+config CRYPTO_DEV_SUN4I_SS_PRNG
+	bool "Support for Allwinner Security System PRNG"
+	depends on CRYPTO_DEV_SUN4I_SS
+	select CRYPTO_RNG
+	help
+	  Select this option if you want to provide kernel-side support for
+	  the Pseudo-Random Number Generator found in the Security System.
+
 config CRYPTO_DEV_ROCKCHIP
 	tristate "Rockchip's Cryptographic Engine driver"
 	depends on OF && ARCH_ROCKCHIP
diff --git a/drivers/crypto/sunxi-ss/Makefile b/drivers/crypto/sunxi-ss/Makefile
index 8f4c7a273141a..ccb8932190793 100644
--- a/drivers/crypto/sunxi-ss/Makefile
+++ b/drivers/crypto/sunxi-ss/Makefile
@@ -1,2 +1,3 @@
 obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sun4i-ss.o
 sun4i-ss-y += sun4i-ss-core.o sun4i-ss-hash.o sun4i-ss-cipher.o
+sun4i-ss-$(CONFIG_CRYPTO_DEV_SUN4I_SS_PRNG) += sun4i-ss-prng.o
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-core.c b/drivers/crypto/sunxi-ss/sun4i-ss-core.c
index 02ad8256e9002..1547cbe13dc2d 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss-core.c
+++ b/drivers/crypto/sunxi-ss/sun4i-ss-core.c
@@ -213,6 +213,23 @@ static struct sun4i_ss_alg_template ss_algs[] = {
 		}
 	}
 },
+#ifdef CONFIG_CRYPTO_DEV_SUN4I_SS_PRNG
+{
+	.type = CRYPTO_ALG_TYPE_RNG,
+	.alg.rng = {
+		.base = {
+			.cra_name		= "stdrng",
+			.cra_driver_name	= "sun4i_ss_rng",
+			.cra_priority		= 300,
+			.cra_ctxsize		= 0,
+			.cra_module		= THIS_MODULE,
+		},
+		.generate               = sun4i_ss_prng_generate,
+		.seed                   = sun4i_ss_prng_seed,
+		.seedsize               = SS_SEED_LEN / BITS_PER_BYTE,
+	}
+},
+#endif
 };
 
 static int sun4i_ss_probe(struct platform_device *pdev)
@@ -355,6 +372,13 @@ static int sun4i_ss_probe(struct platform_device *pdev)
 				goto error_alg;
 			}
 			break;
+		case CRYPTO_ALG_TYPE_RNG:
+			err = crypto_register_rng(&ss_algs[i].alg.rng);
+			if (err) {
+				dev_err(ss->dev, "Fail to register %s\n",
+					ss_algs[i].alg.rng.base.cra_name);
+			}
+			break;
 		}
 	}
 	platform_set_drvdata(pdev, ss);
@@ -369,6 +393,9 @@ static int sun4i_ss_probe(struct platform_device *pdev)
 		case CRYPTO_ALG_TYPE_AHASH:
 			crypto_unregister_ahash(&ss_algs[i].alg.hash);
 			break;
+		case CRYPTO_ALG_TYPE_RNG:
+			crypto_unregister_rng(&ss_algs[i].alg.rng);
+			break;
 		}
 	}
 	if (ss->reset)
@@ -393,6 +420,9 @@ static int sun4i_ss_remove(struct platform_device *pdev)
 		case CRYPTO_ALG_TYPE_AHASH:
 			crypto_unregister_ahash(&ss_algs[i].alg.hash);
 			break;
+		case CRYPTO_ALG_TYPE_RNG:
+			crypto_unregister_rng(&ss_algs[i].alg.rng);
+			break;
 		}
 	}
 
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-prng.c b/drivers/crypto/sunxi-ss/sun4i-ss-prng.c
new file mode 100644
index 0000000000000..0d01d16242527
--- /dev/null
+++ b/drivers/crypto/sunxi-ss/sun4i-ss-prng.c
@@ -0,0 +1,56 @@
+#include "sun4i-ss.h"
+
+int sun4i_ss_prng_seed(struct crypto_rng *tfm, const u8 *seed,
+		       unsigned int slen)
+{
+	struct sun4i_ss_alg_template *algt;
+	struct rng_alg *alg = crypto_rng_alg(tfm);
+
+	algt = container_of(alg, struct sun4i_ss_alg_template, alg.rng);
+	memcpy(algt->ss->seed, seed, slen);
+
+	return 0;
+}
+
+int sun4i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src,
+			   unsigned int slen, u8 *dst, unsigned int dlen)
+{
+	struct sun4i_ss_alg_template *algt;
+	struct rng_alg *alg = crypto_rng_alg(tfm);
+	int i;
+	u32 v;
+	u32 *data = (u32 *)dst;
+	const u32 mode = SS_OP_PRNG | SS_PRNG_CONTINUE | SS_ENABLED;
+	size_t len;
+	struct sun4i_ss_ctx *ss;
+	unsigned int todo = (dlen / 4) * 4;
+
+	algt = container_of(alg, struct sun4i_ss_alg_template, alg.rng);
+	ss = algt->ss;
+
+	spin_lock(&ss->slock);
+
+	writel(mode, ss->base + SS_CTL);
+
+	while (todo > 0) {
+		/* write the seed */
+		for (i = 0; i < SS_SEED_LEN / BITS_PER_LONG; i++)
+			writel(ss->seed[i], ss->base + SS_KEY0 + i * 4);
+
+		/* Read the random data */
+		len = min_t(size_t, SS_DATA_LEN / BITS_PER_BYTE, todo);
+		readsl(ss->base + SS_TXFIFO, data, len / 4);
+		data += len / 4;
+		todo -= len;
+
+		/* Update the seed */
+		for (i = 0; i < SS_SEED_LEN / BITS_PER_LONG; i++) {
+			v = readl(ss->base + SS_KEY0 + i * 4);
+			ss->seed[i] = v;
+		}
+	}
+
+	writel(0, ss->base + SS_CTL);
+	spin_unlock(&ss->slock);
+	return dlen;
+}
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss.h b/drivers/crypto/sunxi-ss/sun4i-ss.h
index a0e1efc1cb2a1..f3ac90692ac6d 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss.h
+++ b/drivers/crypto/sunxi-ss/sun4i-ss.h
@@ -32,6 +32,7 @@
 #include <crypto/aes.h>
 #include <crypto/des.h>
 #include <crypto/internal/rng.h>
+#include <crypto/rng.h>
 
 #define SS_CTL            0x00
 #define SS_KEY0           0x04
@@ -127,6 +128,9 @@
 #define SS_RXFIFO_EMP_INT_ENABLE	(1 << 2)
 #define SS_TXFIFO_AVA_INT_ENABLE	(1 << 0)
 
+#define SS_SEED_LEN 192
+#define SS_DATA_LEN 160
+
 struct sun4i_ss_ctx {
 	void __iomem *base;
 	int irq;
@@ -136,6 +140,9 @@ struct sun4i_ss_ctx {
 	struct device *dev;
 	struct resource *res;
 	spinlock_t slock; /* control the use of the device */
+#ifdef CONFIG_CRYPTO_DEV_SUN4I_SS_PRNG
+	u32 seed[SS_SEED_LEN / BITS_PER_LONG];
+#endif
 };
 
 struct sun4i_ss_alg_template {
@@ -144,6 +151,7 @@ struct sun4i_ss_alg_template {
 	union {
 		struct skcipher_alg crypto;
 		struct ahash_alg hash;
+		struct rng_alg rng;
 	} alg;
 	struct sun4i_ss_ctx *ss;
 };
@@ -201,3 +209,6 @@ int sun4i_ss_des_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			unsigned int keylen);
 int sun4i_ss_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			 unsigned int keylen);
+int sun4i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src,
+			   unsigned int slen, u8 *dst, unsigned int dlen);
+int sun4i_ss_prng_seed(struct crypto_rng *tfm, const u8 *seed, unsigned int slen);

From 66d3994c673d211a9ce5da3d7576d90c8e0cd377 Mon Sep 17 00:00:00 2001
From: Tudor-Dan Ambarus <tudor.ambarus@microchip.com>
Date: Wed, 5 Jul 2017 13:07:58 +0300
Subject: [PATCH 014/116] crypto: kpp - add get/set_flags helpers

These helpers will be used for fallbacks to kpp software
implementations.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/kpp.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/include/crypto/kpp.h b/include/crypto/kpp.h
index 2133d17b7156e..1bde0a6514fa6 100644
--- a/include/crypto/kpp.h
+++ b/include/crypto/kpp.h
@@ -145,6 +145,16 @@ static inline struct crypto_kpp *crypto_kpp_reqtfm(struct kpp_request *req)
 	return __crypto_kpp_tfm(req->base.tfm);
 }
 
+static inline u32 crypto_kpp_get_flags(struct crypto_kpp *tfm)
+{
+	return crypto_tfm_get_flags(crypto_kpp_tfm(tfm));
+}
+
+static inline void crypto_kpp_set_flags(struct crypto_kpp *tfm, u32 flags)
+{
+	crypto_tfm_set_flags(crypto_kpp_tfm(tfm), flags);
+}
+
 /**
  * crypto_free_kpp() - free KPP tfm handle
  *

From 11105693fa05f499532b330da65c78ff93ed4440 Mon Sep 17 00:00:00 2001
From: Tudor-Dan Ambarus <tudor.ambarus@microchip.com>
Date: Wed, 5 Jul 2017 13:07:59 +0300
Subject: [PATCH 015/116] crypto: atmel-ecc - introduce Microchip / Atmel ECC
 driver

Add ECDH support for ATECC508A (I2C) device.

The device features hardware acceleration for the NIST standard
P256 prime curve and supports the complete key life cycle from
private key generation to ECDH key agreement.

Random private key generation is supported internally within
the device to ensure that the private key can never be known
outside of the device. If the user wants to use its own private
keys, the driver will fallback to the ecdh software implementation.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../bindings/crypto/atmel-crypto.txt          |  13 +
 drivers/crypto/Kconfig                        |  14 +
 drivers/crypto/Makefile                       |   1 +
 drivers/crypto/atmel-ecc.c                    | 781 ++++++++++++++++++
 drivers/crypto/atmel-ecc.h                    | 128 +++
 5 files changed, 937 insertions(+)
 create mode 100644 drivers/crypto/atmel-ecc.c
 create mode 100644 drivers/crypto/atmel-ecc.h

diff --git a/Documentation/devicetree/bindings/crypto/atmel-crypto.txt b/Documentation/devicetree/bindings/crypto/atmel-crypto.txt
index f2aab3dc2b523..7de1a9674c707 100644
--- a/Documentation/devicetree/bindings/crypto/atmel-crypto.txt
+++ b/Documentation/devicetree/bindings/crypto/atmel-crypto.txt
@@ -66,3 +66,16 @@ sha@f8034000 {
 	dmas = <&dma1 2 17>;
 	dma-names = "tx";
 };
+
+* Eliptic Curve Cryptography (I2C)
+
+Required properties:
+- compatible : must be "atmel,atecc508a".
+- reg: I2C bus address of the device.
+- clock-frequency: must be present in the i2c controller node.
+
+Example:
+atecc508a@C0 {
+	compatible = "atmel,atecc508a";
+	reg = <0xC0>;
+};
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 5db825a469dea..49397a9ae67c2 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -525,6 +525,20 @@ config CRYPTO_DEV_ATMEL_SHA
 	  To compile this driver as a module, choose M here: the module
 	  will be called atmel-sha.
 
+config CRYPTO_DEV_ATMEL_ECC
+	tristate "Support for Microchip / Atmel ECC hw accelerator"
+	depends on ARCH_AT91 || COMPILE_TEST
+	depends on I2C
+	select CRYPTO_ECDH
+	select CRC16
+	help
+	  Microhip / Atmel ECC hw accelerator.
+	  Select this if you want to use the Microchip / Atmel module for
+	  ECDH algorithm.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called atmel-ecc.
+
 config CRYPTO_DEV_CCP
 	bool "Support for AMD Cryptographic Coprocessor"
 	depends on ((X86 && PCI) || (ARM64 && (OF_ADDRESS || ACPI))) && HAS_IOMEM
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 2c555a3393b27..de629165fde7d 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -1,6 +1,7 @@
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o
+obj-$(CONFIG_CRYPTO_DEV_ATMEL_ECC) += atmel-ecc.o
 obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o
 obj-$(CONFIG_CRYPTO_DEV_CAVIUM_ZIP) += cavium/
 obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c
new file mode 100644
index 0000000000000..66ab1021eba58
--- /dev/null
+++ b/drivers/crypto/atmel-ecc.c
@@ -0,0 +1,781 @@
+/*
+ * Microchip / Atmel ECC (I2C) driver.
+ *
+ * Copyright (c) 2017, Microchip Technology Inc.
+ * Author: Tudor Ambarus <tudor.ambarus@microchip.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/bitrev.h>
+#include <linux/crc16.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <crypto/internal/kpp.h>
+#include <crypto/ecdh.h>
+#include <crypto/kpp.h>
+#include "atmel-ecc.h"
+
+/* Used for binding tfm objects to i2c clients. */
+struct atmel_ecc_driver_data {
+	struct list_head i2c_client_list;
+	spinlock_t i2c_list_lock;
+} ____cacheline_aligned;
+
+static struct atmel_ecc_driver_data driver_data;
+
+/**
+ * atmel_ecc_i2c_client_priv - i2c_client private data
+ * @client              : pointer to i2c client device
+ * @i2c_client_list_node: part of i2c_client_list
+ * @lock                : lock for sending i2c commands
+ * @wake_token          : wake token array of zeros
+ * @wake_token_sz       : size in bytes of the wake_token
+ * @tfm_count           : number of active crypto transformations on i2c client
+ *
+ * Reads and writes from/to the i2c client are sequential. The first byte
+ * transmitted to the device is treated as the byte size. Any attempt to send
+ * more than this number of bytes will cause the device to not ACK those bytes.
+ * After the host writes a single command byte to the input buffer, reads are
+ * prohibited until after the device completes command execution. Use a mutex
+ * when sending i2c commands.
+ */
+struct atmel_ecc_i2c_client_priv {
+	struct i2c_client *client;
+	struct list_head i2c_client_list_node;
+	struct mutex lock;
+	u8 wake_token[WAKE_TOKEN_MAX_SIZE];
+	size_t wake_token_sz;
+	atomic_t tfm_count ____cacheline_aligned;
+};
+
+/**
+ * atmel_ecdh_ctx - transformation context
+ * @client     : pointer to i2c client device
+ * @fallback   : used for unsupported curves or when user wants to use its own
+ *               private key.
+ * @public_key : generated when calling set_secret(). It's the responsibility
+ *               of the user to not call set_secret() while
+ *               generate_public_key() or compute_shared_secret() are in flight.
+ * @curve_id   : elliptic curve id
+ * @n_sz       : size in bytes of the n prime
+ * @do_fallback: true when the device doesn't support the curve or when the user
+ *               wants to use its own private key.
+ */
+struct atmel_ecdh_ctx {
+	struct i2c_client *client;
+	struct crypto_kpp *fallback;
+	const u8 *public_key;
+	unsigned int curve_id;
+	size_t n_sz;
+	bool do_fallback;
+};
+
+/**
+ * atmel_ecc_work_data - data structure representing the work
+ * @ctx : transformation context.
+ * @cbk : pointer to a callback function to be invoked upon completion of this
+ *        request. This has the form:
+ *        callback(struct atmel_ecc_work_data *work_data, void *areq, u8 status)
+ *        where:
+ *        @work_data: data structure representing the work
+ *        @areq     : optional pointer to an argument passed with the original
+ *                    request.
+ *        @status   : status returned from the device.
+ * @areq: optional pointer to a user argument for use at callback time.
+ * @work: describes the task to be executed.
+ * @cmd : structure used for communicating with the device.
+ */
+struct atmel_ecc_work_data {
+	struct atmel_ecdh_ctx *ctx;
+	void (*cbk)(struct atmel_ecc_work_data *work_data, void *areq,
+		    u8 status);
+	void *areq;
+	struct work_struct work;
+	struct atmel_ecc_cmd cmd;
+};
+
+static u16 atmel_ecc_crc16(u16 crc, const u8 *buffer, size_t len)
+{
+	return cpu_to_le16(bitrev16(crc16(crc, buffer, len)));
+}
+
+/**
+ * atmel_ecc_checksum() - Generate 16-bit CRC as required by ATMEL ECC.
+ * CRC16 verification of the count, opcode, param1, param2 and data bytes.
+ * The checksum is saved in little-endian format in the least significant
+ * two bytes of the command. CRC polynomial is 0x8005 and the initial register
+ * value should be zero.
+ *
+ * @cmd : structure used for communicating with the device.
+ */
+static void atmel_ecc_checksum(struct atmel_ecc_cmd *cmd)
+{
+	u8 *data = &cmd->count;
+	size_t len = cmd->count - CRC_SIZE;
+	u16 *crc16 = (u16 *)(data + len);
+
+	*crc16 = atmel_ecc_crc16(0, data, len);
+}
+
+static void atmel_ecc_init_read_cmd(struct atmel_ecc_cmd *cmd)
+{
+	cmd->word_addr = COMMAND;
+	cmd->opcode = OPCODE_READ;
+	/*
+	 * Read the word from Configuration zone that contains the lock bytes
+	 * (UserExtra, Selector, LockValue, LockConfig).
+	 */
+	cmd->param1 = CONFIG_ZONE;
+	cmd->param2 = DEVICE_LOCK_ADDR;
+	cmd->count = READ_COUNT;
+
+	atmel_ecc_checksum(cmd);
+
+	cmd->msecs = MAX_EXEC_TIME_READ;
+	cmd->rxsize = READ_RSP_SIZE;
+}
+
+static void atmel_ecc_init_genkey_cmd(struct atmel_ecc_cmd *cmd, u16 keyid)
+{
+	cmd->word_addr = COMMAND;
+	cmd->count = GENKEY_COUNT;
+	cmd->opcode = OPCODE_GENKEY;
+	cmd->param1 = GENKEY_MODE_PRIVATE;
+	/* a random private key will be generated and stored in slot keyID */
+	cmd->param2 = cpu_to_le16(keyid);
+
+	atmel_ecc_checksum(cmd);
+
+	cmd->msecs = MAX_EXEC_TIME_GENKEY;
+	cmd->rxsize = GENKEY_RSP_SIZE;
+}
+
+static int atmel_ecc_init_ecdh_cmd(struct atmel_ecc_cmd *cmd,
+				   struct scatterlist *pubkey)
+{
+	size_t copied;
+
+	cmd->word_addr = COMMAND;
+	cmd->count = ECDH_COUNT;
+	cmd->opcode = OPCODE_ECDH;
+	cmd->param1 = ECDH_PREFIX_MODE;
+	/* private key slot */
+	cmd->param2 = cpu_to_le16(DATA_SLOT_2);
+
+	/*
+	 * The device only supports NIST P256 ECC keys. The public key size will
+	 * always be the same. Use a macro for the key size to avoid unnecessary
+	 * computations.
+	 */
+	copied = sg_copy_to_buffer(pubkey, 1, cmd->data, ATMEL_ECC_PUBKEY_SIZE);
+	if (copied != ATMEL_ECC_PUBKEY_SIZE)
+		return -EINVAL;
+
+	atmel_ecc_checksum(cmd);
+
+	cmd->msecs = MAX_EXEC_TIME_ECDH;
+	cmd->rxsize = ECDH_RSP_SIZE;
+
+	return 0;
+}
+
+/*
+ * After wake and after execution of a command, there will be error, status, or
+ * result bytes in the device's output register that can be retrieved by the
+ * system. When the length of that group is four bytes, the codes returned are
+ * detailed in error_list.
+ */
+static int atmel_ecc_status(struct device *dev, u8 *status)
+{
+	size_t err_list_len = ARRAY_SIZE(error_list);
+	int i;
+	u8 err_id = status[1];
+
+	if (*status != STATUS_SIZE)
+		return 0;
+
+	if (err_id == STATUS_WAKE_SUCCESSFUL || err_id == STATUS_NOERR)
+		return 0;
+
+	for (i = 0; i < err_list_len; i++)
+		if (error_list[i].value == err_id)
+			break;
+
+	/* if err_id is not in the error_list then ignore it */
+	if (i != err_list_len) {
+		dev_err(dev, "%02x: %s:\n", err_id, error_list[i].error_text);
+		return err_id;
+	}
+
+	return 0;
+}
+
+static int atmel_ecc_wakeup(struct i2c_client *client)
+{
+	struct atmel_ecc_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
+	u8 status[STATUS_RSP_SIZE];
+	int ret;
+
+	/*
+	 * The device ignores any levels or transitions on the SCL pin when the
+	 * device is idle, asleep or during waking up. Don't check for error
+	 * when waking up the device.
+	 */
+	i2c_master_send(client, i2c_priv->wake_token, i2c_priv->wake_token_sz);
+
+	/*
+	 * Wait to wake the device. Typical execution times for ecdh and genkey
+	 * are around tens of milliseconds. Delta is chosen to 50 microseconds.
+	 */
+	usleep_range(TWHI_MIN, TWHI_MAX);
+
+	ret = i2c_master_recv(client, status, STATUS_SIZE);
+	if (ret < 0)
+		return ret;
+
+	return atmel_ecc_status(&client->dev, status);
+}
+
+static int atmel_ecc_sleep(struct i2c_client *client)
+{
+	u8 sleep = SLEEP_TOKEN;
+
+	return i2c_master_send(client, &sleep, 1);
+}
+
+static void atmel_ecdh_done(struct atmel_ecc_work_data *work_data, void *areq,
+			    u8 status)
+{
+	struct kpp_request *req = areq;
+	struct atmel_ecdh_ctx *ctx = work_data->ctx;
+	struct atmel_ecc_cmd *cmd = &work_data->cmd;
+	size_t copied;
+	size_t n_sz = ctx->n_sz;
+
+	if (status)
+		goto free_work_data;
+
+	/* copy the shared secret */
+	copied = sg_copy_from_buffer(req->dst, 1, &cmd->data[RSP_DATA_IDX],
+				     n_sz);
+	if (copied != n_sz)
+		status = -EINVAL;
+
+	/* fall through */
+free_work_data:
+	kzfree(work_data);
+	kpp_request_complete(req, status);
+}
+
+/*
+ * atmel_ecc_send_receive() - send a command to the device and receive its
+ *                            response.
+ * @client: i2c client device
+ * @cmd   : structure used to communicate with the device
+ *
+ * After the device receives a Wake token, a watchdog counter starts within the
+ * device. After the watchdog timer expires, the device enters sleep mode
+ * regardless of whether some I/O transmission or command execution is in
+ * progress. If a command is attempted when insufficient time remains prior to
+ * watchdog timer execution, the device will return the watchdog timeout error
+ * code without attempting to execute the command. There is no way to reset the
+ * counter other than to put the device into sleep or idle mode and then
+ * wake it up again.
+ */
+static int atmel_ecc_send_receive(struct i2c_client *client,
+				  struct atmel_ecc_cmd *cmd)
+{
+	struct atmel_ecc_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
+	int ret;
+
+	mutex_lock(&i2c_priv->lock);
+
+	ret = atmel_ecc_wakeup(client);
+	if (ret)
+		goto err;
+
+	/* send the command */
+	ret = i2c_master_send(client, (u8 *)cmd, cmd->count + WORD_ADDR_SIZE);
+	if (ret < 0)
+		goto err;
+
+	/* delay the appropriate amount of time for command to execute */
+	msleep(cmd->msecs);
+
+	/* receive the response */
+	ret = i2c_master_recv(client, cmd->data, cmd->rxsize);
+	if (ret < 0)
+		goto err;
+
+	/* put the device into low-power mode */
+	ret = atmel_ecc_sleep(client);
+	if (ret < 0)
+		goto err;
+
+	mutex_unlock(&i2c_priv->lock);
+	return atmel_ecc_status(&client->dev, cmd->data);
+err:
+	mutex_unlock(&i2c_priv->lock);
+	return ret;
+}
+
+static void atmel_ecc_work_handler(struct work_struct *work)
+{
+	struct atmel_ecc_work_data *work_data =
+			container_of(work, struct atmel_ecc_work_data, work);
+	struct atmel_ecc_cmd *cmd = &work_data->cmd;
+	struct i2c_client *client = work_data->ctx->client;
+	u8 status;
+
+	status = atmel_ecc_send_receive(client, cmd);
+	work_data->cbk(work_data, work_data->areq, status);
+}
+
+static void atmel_ecc_enqueue(struct atmel_ecc_work_data *work_data,
+			      void (*cbk)(struct atmel_ecc_work_data *work_data,
+					  void *areq, u8 status),
+			      void *areq)
+{
+	work_data->cbk = (void *)cbk;
+	work_data->areq = areq;
+
+	INIT_WORK(&work_data->work, atmel_ecc_work_handler);
+	schedule_work(&work_data->work);
+}
+
+static unsigned int atmel_ecdh_supported_curve(unsigned int curve_id)
+{
+	if (curve_id == ECC_CURVE_NIST_P256)
+		return ATMEL_ECC_NIST_P256_N_SIZE;
+
+	return 0;
+}
+
+/*
+ * A random private key is generated and stored in the device. The device
+ * returns the pair public key.
+ */
+static int atmel_ecdh_set_secret(struct crypto_kpp *tfm, const void *buf,
+				 unsigned int len)
+{
+	struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm);
+	struct atmel_ecc_cmd *cmd;
+	void *public_key;
+	struct ecdh params;
+	int ret = -ENOMEM;
+
+	/* free the old public key, if any */
+	kfree(ctx->public_key);
+	/* make sure you don't free the old public key twice */
+	ctx->public_key = NULL;
+
+	if (crypto_ecdh_decode_key(buf, len, &params) < 0) {
+		dev_err(&ctx->client->dev, "crypto_ecdh_decode_key failed\n");
+		return -EINVAL;
+	}
+
+	ctx->n_sz = atmel_ecdh_supported_curve(params.curve_id);
+	if (!ctx->n_sz || params.key_size) {
+		/* fallback to ecdh software implementation */
+		ctx->do_fallback = true;
+		return crypto_kpp_set_secret(ctx->fallback, buf, len);
+	}
+
+	cmd = kmalloc(sizeof(*cmd), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	/*
+	 * The device only supports NIST P256 ECC keys. The public key size will
+	 * always be the same. Use a macro for the key size to avoid unnecessary
+	 * computations.
+	 */
+	public_key = kmalloc(ATMEL_ECC_PUBKEY_SIZE, GFP_KERNEL);
+	if (!public_key)
+		goto free_cmd;
+
+	ctx->do_fallback = false;
+	ctx->curve_id = params.curve_id;
+
+	atmel_ecc_init_genkey_cmd(cmd, DATA_SLOT_2);
+
+	ret = atmel_ecc_send_receive(ctx->client, cmd);
+	if (ret)
+		goto free_public_key;
+
+	/* save the public key */
+	memcpy(public_key, &cmd->data[RSP_DATA_IDX], ATMEL_ECC_PUBKEY_SIZE);
+	ctx->public_key = public_key;
+
+	kfree(cmd);
+	return 0;
+
+free_public_key:
+	kfree(public_key);
+free_cmd:
+	kfree(cmd);
+	return ret;
+}
+
+static int atmel_ecdh_generate_public_key(struct kpp_request *req)
+{
+	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+	struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm);
+	size_t copied;
+	int ret = 0;
+
+	if (ctx->do_fallback) {
+		kpp_request_set_tfm(req, ctx->fallback);
+		return crypto_kpp_generate_public_key(req);
+	}
+
+	/* public key was saved at private key generation */
+	copied = sg_copy_from_buffer(req->dst, 1, ctx->public_key,
+				     ATMEL_ECC_PUBKEY_SIZE);
+	if (copied != ATMEL_ECC_PUBKEY_SIZE)
+		ret = -EINVAL;
+
+	return ret;
+}
+
+static int atmel_ecdh_compute_shared_secret(struct kpp_request *req)
+{
+	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+	struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm);
+	struct atmel_ecc_work_data *work_data;
+	gfp_t gfp;
+	int ret;
+
+	if (ctx->do_fallback) {
+		kpp_request_set_tfm(req, ctx->fallback);
+		return crypto_kpp_compute_shared_secret(req);
+	}
+
+	gfp = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL :
+							     GFP_ATOMIC;
+
+	work_data = kmalloc(sizeof(*work_data), gfp);
+	if (!work_data)
+		return -ENOMEM;
+
+	work_data->ctx = ctx;
+
+	ret = atmel_ecc_init_ecdh_cmd(&work_data->cmd, req->src);
+	if (ret)
+		goto free_work_data;
+
+	atmel_ecc_enqueue(work_data, atmel_ecdh_done, req);
+
+	return -EINPROGRESS;
+
+free_work_data:
+	kfree(work_data);
+	return ret;
+}
+
+struct i2c_client *atmel_ecc_i2c_client_alloc(void)
+{
+	struct atmel_ecc_i2c_client_priv *i2c_priv, *min_i2c_priv = NULL;
+	struct i2c_client *client = ERR_PTR(-ENODEV);
+	int min_tfm_cnt = INT_MAX;
+	int tfm_cnt;
+
+	spin_lock(&driver_data.i2c_list_lock);
+
+	if (list_empty(&driver_data.i2c_client_list)) {
+		spin_unlock(&driver_data.i2c_list_lock);
+		return ERR_PTR(-ENODEV);
+	}
+
+	list_for_each_entry(i2c_priv, &driver_data.i2c_client_list,
+			    i2c_client_list_node) {
+		tfm_cnt = atomic_read(&i2c_priv->tfm_count);
+		if (tfm_cnt < min_tfm_cnt) {
+			min_tfm_cnt = tfm_cnt;
+			min_i2c_priv = i2c_priv;
+		}
+		if (!min_tfm_cnt)
+			break;
+	}
+
+	if (min_i2c_priv) {
+		atomic_inc(&min_i2c_priv->tfm_count);
+		client = min_i2c_priv->client;
+	}
+
+	spin_unlock(&driver_data.i2c_list_lock);
+
+	return client;
+}
+
+void atmel_ecc_i2c_client_free(struct i2c_client *client)
+{
+	struct atmel_ecc_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
+
+	atomic_dec(&i2c_priv->tfm_count);
+}
+
+static int atmel_ecdh_init_tfm(struct crypto_kpp *tfm)
+{
+	const char *alg = kpp_alg_name(tfm);
+	struct crypto_kpp *fallback;
+	struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm);
+
+	ctx->client = atmel_ecc_i2c_client_alloc();
+	if (IS_ERR(ctx->client)) {
+		pr_err("tfm - i2c_client binding failed\n");
+		return PTR_ERR(ctx->client);
+	}
+
+	fallback = crypto_alloc_kpp(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(fallback)) {
+		dev_err(&ctx->client->dev, "Failed to allocate transformation for '%s': %ld\n",
+			alg, PTR_ERR(fallback));
+		return PTR_ERR(fallback);
+	}
+
+	crypto_kpp_set_flags(fallback, crypto_kpp_get_flags(tfm));
+
+	dev_info(&ctx->client->dev, "Using '%s' as fallback implementation.\n",
+		 crypto_tfm_alg_driver_name(crypto_kpp_tfm(fallback)));
+
+	ctx->fallback = fallback;
+
+	return 0;
+}
+
+static void atmel_ecdh_exit_tfm(struct crypto_kpp *tfm)
+{
+	struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm);
+
+	kfree(ctx->public_key);
+	crypto_free_kpp(ctx->fallback);
+	atmel_ecc_i2c_client_free(ctx->client);
+}
+
+static unsigned int atmel_ecdh_max_size(struct crypto_kpp *tfm)
+{
+	struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm);
+
+	if (ctx->fallback)
+		return crypto_kpp_maxsize(ctx->fallback);
+
+	/*
+	 * The device only supports NIST P256 ECC keys. The public key size will
+	 * always be the same. Use a macro for the key size to avoid unnecessary
+	 * computations.
+	 */
+	return ATMEL_ECC_PUBKEY_SIZE;
+}
+
+static struct kpp_alg atmel_ecdh = {
+	.set_secret = atmel_ecdh_set_secret,
+	.generate_public_key = atmel_ecdh_generate_public_key,
+	.compute_shared_secret = atmel_ecdh_compute_shared_secret,
+	.init = atmel_ecdh_init_tfm,
+	.exit = atmel_ecdh_exit_tfm,
+	.max_size = atmel_ecdh_max_size,
+	.base = {
+		.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+		.cra_name = "ecdh",
+		.cra_driver_name = "atmel-ecdh",
+		.cra_priority = ATMEL_ECC_PRIORITY,
+		.cra_module = THIS_MODULE,
+		.cra_ctxsize = sizeof(struct atmel_ecdh_ctx),
+	},
+};
+
+static inline size_t atmel_ecc_wake_token_sz(u32 bus_clk_rate)
+{
+	u32 no_of_bits = DIV_ROUND_UP(TWLO_USEC * bus_clk_rate, USEC_PER_SEC);
+
+	/* return the size of the wake_token in bytes */
+	return DIV_ROUND_UP(no_of_bits, 8);
+}
+
+static int device_sanity_check(struct i2c_client *client)
+{
+	struct atmel_ecc_cmd *cmd;
+	int ret;
+
+	cmd = kmalloc(sizeof(*cmd), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	atmel_ecc_init_read_cmd(cmd);
+
+	ret = atmel_ecc_send_receive(client, cmd);
+	if (ret)
+		goto free_cmd;
+
+	/*
+	 * It is vital that the Configuration, Data and OTP zones be locked
+	 * prior to release into the field of the system containing the device.
+	 * Failure to lock these zones may permit modification of any secret
+	 * keys and may lead to other security problems.
+	 */
+	if (cmd->data[LOCK_CONFIG_IDX] || cmd->data[LOCK_VALUE_IDX]) {
+		dev_err(&client->dev, "Configuration or Data and OTP zones are unlocked!\n");
+		ret = -ENOTSUPP;
+	}
+
+	/* fall through */
+free_cmd:
+	kfree(cmd);
+	return ret;
+}
+
+static int atmel_ecc_probe(struct i2c_client *client,
+			   const struct i2c_device_id *id)
+{
+	struct atmel_ecc_i2c_client_priv *i2c_priv;
+	struct device *dev = &client->dev;
+	int ret;
+	u32 bus_clk_rate;
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+		dev_err(dev, "I2C_FUNC_I2C not supported\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(client->adapter->dev.of_node,
+				   "clock-frequency", &bus_clk_rate);
+	if (ret) {
+		dev_err(dev, "of: failed to read clock-frequency property\n");
+		return ret;
+	}
+
+	if (bus_clk_rate > 1000000L) {
+		dev_err(dev, "%d exceeds maximum supported clock frequency (1MHz)\n",
+			bus_clk_rate);
+		return -EINVAL;
+	}
+
+	i2c_priv = devm_kmalloc(dev, sizeof(*i2c_priv), GFP_KERNEL);
+	if (!i2c_priv)
+		return -ENOMEM;
+
+	i2c_priv->client = client;
+	mutex_init(&i2c_priv->lock);
+
+	/*
+	 * WAKE_TOKEN_MAX_SIZE was calculated for the maximum bus_clk_rate -
+	 * 1MHz. The previous bus_clk_rate check ensures us that wake_token_sz
+	 * will always be smaller than or equal to WAKE_TOKEN_MAX_SIZE.
+	 */
+	i2c_priv->wake_token_sz = atmel_ecc_wake_token_sz(bus_clk_rate);
+
+	memset(i2c_priv->wake_token, 0, sizeof(i2c_priv->wake_token));
+
+	atomic_set(&i2c_priv->tfm_count, 0);
+
+	i2c_set_clientdata(client, i2c_priv);
+
+	ret = device_sanity_check(client);
+	if (ret)
+		return ret;
+
+	spin_lock(&driver_data.i2c_list_lock);
+	list_add_tail(&i2c_priv->i2c_client_list_node,
+		      &driver_data.i2c_client_list);
+	spin_unlock(&driver_data.i2c_list_lock);
+
+	ret = crypto_register_kpp(&atmel_ecdh);
+	if (ret) {
+		spin_lock(&driver_data.i2c_list_lock);
+		list_del(&i2c_priv->i2c_client_list_node);
+		spin_unlock(&driver_data.i2c_list_lock);
+
+		dev_err(dev, "%s alg registration failed\n",
+			atmel_ecdh.base.cra_driver_name);
+	} else {
+		dev_info(dev, "atmel ecc algorithms registered in /proc/crypto\n");
+	}
+
+	return ret;
+}
+
+static int atmel_ecc_remove(struct i2c_client *client)
+{
+	struct atmel_ecc_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
+
+	/* Return EBUSY if i2c client already allocated. */
+	if (atomic_read(&i2c_priv->tfm_count)) {
+		dev_err(&client->dev, "Device is busy\n");
+		return -EBUSY;
+	}
+
+	crypto_unregister_kpp(&atmel_ecdh);
+
+	spin_lock(&driver_data.i2c_list_lock);
+	list_del(&i2c_priv->i2c_client_list_node);
+	spin_unlock(&driver_data.i2c_list_lock);
+
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id atmel_ecc_dt_ids[] = {
+	{
+		.compatible = "atmel,atecc508a",
+	}, {
+		/* sentinel */
+	}
+};
+MODULE_DEVICE_TABLE(of, atmel_ecc_dt_ids);
+#endif
+
+static const struct i2c_device_id atmel_ecc_id[] = {
+	{ "atecc508a", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, atmel_ecc_id);
+
+static struct i2c_driver atmel_ecc_driver = {
+	.driver = {
+		.name	= "atmel-ecc",
+		.of_match_table = of_match_ptr(atmel_ecc_dt_ids),
+	},
+	.probe		= atmel_ecc_probe,
+	.remove		= atmel_ecc_remove,
+	.id_table	= atmel_ecc_id,
+};
+
+static int __init atmel_ecc_init(void)
+{
+	spin_lock_init(&driver_data.i2c_list_lock);
+	INIT_LIST_HEAD(&driver_data.i2c_client_list);
+	return i2c_add_driver(&atmel_ecc_driver);
+}
+
+static void __exit atmel_ecc_exit(void)
+{
+	flush_scheduled_work();
+	i2c_del_driver(&atmel_ecc_driver);
+}
+
+module_init(atmel_ecc_init);
+module_exit(atmel_ecc_exit);
+
+MODULE_AUTHOR("Tudor Ambarus <tudor.ambarus@microchip.com>");
+MODULE_DESCRIPTION("Microchip / Atmel ECC (I2C) driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/crypto/atmel-ecc.h b/drivers/crypto/atmel-ecc.h
new file mode 100644
index 0000000000000..25232c8abcc2e
--- /dev/null
+++ b/drivers/crypto/atmel-ecc.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2017, Microchip Technology Inc.
+ * Author: Tudor Ambarus <tudor.ambarus@microchip.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef __ATMEL_ECC_H__
+#define __ATMEL_ECC_H__
+
+#define ATMEL_ECC_PRIORITY		300
+
+#define COMMAND				0x03 /* packet function */
+#define SLEEP_TOKEN			0x01
+#define WAKE_TOKEN_MAX_SIZE		8
+
+/* Definitions of Data and Command sizes */
+#define WORD_ADDR_SIZE			1
+#define COUNT_SIZE			1
+#define CRC_SIZE			2
+#define CMD_OVERHEAD_SIZE		(COUNT_SIZE + CRC_SIZE)
+
+/* size in bytes of the n prime */
+#define ATMEL_ECC_NIST_P256_N_SIZE	32
+#define ATMEL_ECC_PUBKEY_SIZE		(2 * ATMEL_ECC_NIST_P256_N_SIZE)
+
+#define STATUS_RSP_SIZE			4
+#define ECDH_RSP_SIZE			(32 + CMD_OVERHEAD_SIZE)
+#define GENKEY_RSP_SIZE			(ATMEL_ECC_PUBKEY_SIZE + \
+					 CMD_OVERHEAD_SIZE)
+#define READ_RSP_SIZE			(4 + CMD_OVERHEAD_SIZE)
+#define MAX_RSP_SIZE			GENKEY_RSP_SIZE
+
+/**
+ * atmel_ecc_cmd - structure used for communicating with the device.
+ * @word_addr: indicates the function of the packet sent to the device. This
+ *             byte should have a value of COMMAND for normal operation.
+ * @count    : number of bytes to be transferred to (or from) the device.
+ * @opcode   : the command code.
+ * @param1   : the first parameter; always present.
+ * @param2   : the second parameter; always present.
+ * @data     : optional remaining input data. Includes a 2-byte CRC.
+ * @rxsize   : size of the data received from i2c client.
+ * @msecs    : command execution time in milliseconds
+ */
+struct atmel_ecc_cmd {
+	u8 word_addr;
+	u8 count;
+	u8 opcode;
+	u8 param1;
+	u16 param2;
+	u8 data[MAX_RSP_SIZE];
+	u8 msecs;
+	u16 rxsize;
+} __packed;
+
+/* Status/Error codes */
+#define STATUS_SIZE			0x04
+#define STATUS_NOERR			0x00
+#define STATUS_WAKE_SUCCESSFUL		0x11
+
+static const struct {
+	u8 value;
+	const char *error_text;
+} error_list[] = {
+	{ 0x01, "CheckMac or Verify miscompare" },
+	{ 0x03, "Parse Error" },
+	{ 0x05, "ECC Fault" },
+	{ 0x0F, "Execution Error" },
+	{ 0xEE, "Watchdog about to expire" },
+	{ 0xFF, "CRC or other communication error" },
+};
+
+/* Definitions for eeprom organization */
+#define CONFIG_ZONE			0
+
+/* Definitions for Indexes common to all commands */
+#define RSP_DATA_IDX			1 /* buffer index of data in response */
+#define DATA_SLOT_2			2 /* used for ECDH private key */
+
+/* Definitions for the device lock state */
+#define DEVICE_LOCK_ADDR		0x15
+#define LOCK_VALUE_IDX			(RSP_DATA_IDX + 2)
+#define LOCK_CONFIG_IDX			(RSP_DATA_IDX + 3)
+
+/*
+ * Wake High delay to data communication (microseconds). SDA should be stable
+ * high for this entire duration.
+ */
+#define TWHI_MIN			1500
+#define TWHI_MAX			1550
+
+/* Wake Low duration */
+#define TWLO_USEC			60
+
+/* Command execution time (milliseconds) */
+#define MAX_EXEC_TIME_ECDH		58
+#define MAX_EXEC_TIME_GENKEY		115
+#define MAX_EXEC_TIME_READ		1
+
+/* Command opcode */
+#define OPCODE_ECDH			0x43
+#define OPCODE_GENKEY			0x40
+#define OPCODE_READ			0x02
+
+/* Definitions for the READ Command */
+#define READ_COUNT			7
+
+/* Definitions for the GenKey Command */
+#define GENKEY_COUNT			7
+#define GENKEY_MODE_PRIVATE		0x04
+
+/* Definitions for the ECDH Command */
+#define ECDH_COUNT			71
+#define ECDH_PREFIX_MODE		0x00
+
+#endif /* __ATMEL_ECC_H__ */

From a0a613abe190c96047c63b9147bd1c9d8e96e74f Mon Sep 17 00:00:00 2001
From: Tudor-Dan Ambarus <tudor.ambarus@microchip.com>
Date: Wed, 5 Jul 2017 13:08:00 +0300
Subject: [PATCH 016/116] MAINTAINERS: add a maintainer for Microchip / Atmel
 ECC driver

A new cryptographic engine driver was added in
drivers/crypto/atmel-ecc.*.
Add myself as a maintainer for this driver.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 MAINTAINERS | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 205d3977ac46e..d5b6c71e783eb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8685,6 +8685,12 @@ F:	drivers/dma/at_hdmac.c
 F:	drivers/dma/at_hdmac_regs.h
 F:	include/linux/platform_data/dma-atmel.h
 
+MICROCHIP / ATMEL ECC DRIVER
+M:	Tudor Ambarus <tudor.ambarus@microchip.com>
+L:	linux-crypto@vger.kernel.org
+S:	Maintained
+F:	drivers/crypto/atmel-ecc.*
+
 MICROCHIP / ATMEL ISC DRIVER
 M:	Songjun Wu <songjun.wu@microchip.com>
 L:	linux-media@vger.kernel.org

From 970e8303cb8d6d8e77402345abbdd83862e800ac Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Thu, 6 Jul 2017 09:59:13 -0500
Subject: [PATCH 017/116] crypto: ccp - Use devres interface to allocate
 PCI/iomap and cleanup

Update pci and platform files to use devres interface to allocate the PCI
and iomap resources. Also add helper functions to consolicate module init,
exit and power mangagement code duplication.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Acked-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-dev-v3.c   |   7 ++
 drivers/crypto/ccp/ccp-dev.c      |  61 ++++++++++++++++
 drivers/crypto/ccp/ccp-dev.h      |   6 ++
 drivers/crypto/ccp/ccp-pci.c      | 114 +++++++-----------------------
 drivers/crypto/ccp/ccp-platform.c |  56 ++-------------
 5 files changed, 106 insertions(+), 138 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c
index 367c2e30656fc..52aa88ba13a55 100644
--- a/drivers/crypto/ccp/ccp-dev-v3.c
+++ b/drivers/crypto/ccp/ccp-dev-v3.c
@@ -586,6 +586,13 @@ static const struct ccp_actions ccp3_actions = {
 	.irqhandler = ccp_irq_handler,
 };
 
+const struct ccp_vdata ccpv3_platform = {
+	.version = CCP_VERSION(3, 0),
+	.setup = NULL,
+	.perform = &ccp3_actions,
+	.offset = 0,
+};
+
 const struct ccp_vdata ccpv3 = {
 	.version = CCP_VERSION(3, 0),
 	.setup = NULL,
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
index 67cbb3e76888d..83a0ce5661adb 100644
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -539,8 +539,69 @@ bool ccp_queues_suspended(struct ccp_device *ccp)
 
 	return ccp->cmd_q_count == suspended;
 }
+
+int ccp_dev_suspend(struct ccp_device *ccp, pm_message_t state)
+{
+	unsigned long flags;
+	unsigned int i;
+
+	spin_lock_irqsave(&ccp->cmd_lock, flags);
+
+	ccp->suspending = 1;
+
+	/* Wake all the queue kthreads to prepare for suspend */
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		wake_up_process(ccp->cmd_q[i].kthread);
+
+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
+
+	/* Wait for all queue kthreads to say they're done */
+	while (!ccp_queues_suspended(ccp))
+		wait_event_interruptible(ccp->suspend_queue,
+					 ccp_queues_suspended(ccp));
+
+	return 0;
+}
+
+int ccp_dev_resume(struct ccp_device *ccp)
+{
+	unsigned long flags;
+	unsigned int i;
+
+	spin_lock_irqsave(&ccp->cmd_lock, flags);
+
+	ccp->suspending = 0;
+
+	/* Wake up all the kthreads */
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		ccp->cmd_q[i].suspended = 0;
+		wake_up_process(ccp->cmd_q[i].kthread);
+	}
+
+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
+
+	return 0;
+}
 #endif
 
+int ccp_dev_init(struct ccp_device *ccp)
+{
+	ccp->io_regs = ccp->io_map + ccp->vdata->offset;
+
+	if (ccp->vdata->setup)
+		ccp->vdata->setup(ccp);
+
+	return ccp->vdata->perform->init(ccp);
+}
+
+void ccp_dev_destroy(struct ccp_device *ccp)
+{
+	if (!ccp)
+		return;
+
+	ccp->vdata->perform->destroy(ccp);
+}
+
 static int __init ccp_mod_init(void)
 {
 #ifdef CONFIG_X86
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index a70154ac74056..df2e76e212ead 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -652,6 +652,11 @@ void ccp_dmaengine_unregister(struct ccp_device *ccp);
 void ccp5_debugfs_setup(struct ccp_device *ccp);
 void ccp5_debugfs_destroy(void);
 
+int ccp_dev_init(struct ccp_device *ccp);
+void ccp_dev_destroy(struct ccp_device *ccp);
+int ccp_dev_suspend(struct ccp_device *ccp, pm_message_t state);
+int ccp_dev_resume(struct ccp_device *ccp);
+
 /* Structure for computation functions that are device-specific */
 struct ccp_actions {
 	int (*aes)(struct ccp_op *);
@@ -679,6 +684,7 @@ struct ccp_vdata {
 	const unsigned int offset;
 };
 
+extern const struct ccp_vdata ccpv3_platform;
 extern const struct ccp_vdata ccpv3;
 extern const struct ccp_vdata ccpv5a;
 extern const struct ccp_vdata ccpv5b;
diff --git a/drivers/crypto/ccp/ccp-pci.c b/drivers/crypto/ccp/ccp-pci.c
index e880d4cf4ada8..490ad0ad22185 100644
--- a/drivers/crypto/ccp/ccp-pci.c
+++ b/drivers/crypto/ccp/ccp-pci.c
@@ -150,28 +150,13 @@ static void ccp_free_irqs(struct ccp_device *ccp)
 	ccp->irq = 0;
 }
 
-static int ccp_find_mmio_area(struct ccp_device *ccp)
-{
-	struct device *dev = ccp->dev;
-	struct pci_dev *pdev = to_pci_dev(dev);
-	resource_size_t io_len;
-	unsigned long io_flags;
-
-	io_flags = pci_resource_flags(pdev, ccp->vdata->bar);
-	io_len = pci_resource_len(pdev, ccp->vdata->bar);
-	if ((io_flags & IORESOURCE_MEM) &&
-	    (io_len >= (ccp->vdata->offset + 0x800)))
-		return ccp->vdata->bar;
-
-	return -EIO;
-}
-
 static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct ccp_device *ccp;
 	struct ccp_pci *ccp_pci;
 	struct device *dev = &pdev->dev;
-	unsigned int bar;
+	void __iomem * const *iomap_table;
+	int bar_mask;
 	int ret;
 
 	ret = -ENOMEM;
@@ -193,32 +178,34 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	ccp->get_irq = ccp_get_irqs;
 	ccp->free_irq = ccp_free_irqs;
 
-	ret = pci_request_regions(pdev, "ccp");
+	ret = pcim_enable_device(pdev);
 	if (ret) {
-		dev_err(dev, "pci_request_regions failed (%d)\n", ret);
+		dev_err(dev, "pcim_enable_device failed (%d)\n", ret);
 		goto e_err;
 	}
 
-	ret = pci_enable_device(pdev);
+	bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
+	ret = pcim_iomap_regions(pdev, bar_mask, "ccp");
 	if (ret) {
-		dev_err(dev, "pci_enable_device failed (%d)\n", ret);
-		goto e_regions;
+		dev_err(dev, "pcim_iomap_regions failed (%d)\n", ret);
+		goto e_err;
 	}
 
-	pci_set_master(pdev);
-
-	ret = ccp_find_mmio_area(ccp);
-	if (ret < 0)
-		goto e_device;
-	bar = ret;
+	iomap_table = pcim_iomap_table(pdev);
+	if (!iomap_table) {
+		dev_err(dev, "pcim_iomap_table failed\n");
+		ret = -ENOMEM;
+		goto e_err;
+	}
 
-	ret = -EIO;
-	ccp->io_map = pci_iomap(pdev, bar, 0);
+	ccp->io_map = iomap_table[ccp->vdata->bar];
 	if (!ccp->io_map) {
-		dev_err(dev, "pci_iomap failed\n");
-		goto e_device;
+		dev_err(dev, "ioremap failed\n");
+		ret = -ENOMEM;
+		goto e_err;
 	}
-	ccp->io_regs = ccp->io_map + ccp->vdata->offset;
+
+	pci_set_master(pdev);
 
 	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
 	if (ret) {
@@ -226,32 +213,20 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		if (ret) {
 			dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n",
 				ret);
-			goto e_iomap;
+			goto e_err;
 		}
 	}
 
 	dev_set_drvdata(dev, ccp);
 
-	if (ccp->vdata->setup)
-		ccp->vdata->setup(ccp);
-
-	ret = ccp->vdata->perform->init(ccp);
+	ret = ccp_dev_init(ccp);
 	if (ret)
-		goto e_iomap;
+		goto e_err;
 
 	dev_notice(dev, "enabled\n");
 
 	return 0;
 
-e_iomap:
-	pci_iounmap(pdev, ccp->io_map);
-
-e_device:
-	pci_disable_device(pdev);
-
-e_regions:
-	pci_release_regions(pdev);
-
 e_err:
 	dev_notice(dev, "initialization failed\n");
 	return ret;
@@ -265,13 +240,7 @@ static void ccp_pci_remove(struct pci_dev *pdev)
 	if (!ccp)
 		return;
 
-	ccp->vdata->perform->destroy(ccp);
-
-	pci_iounmap(pdev, ccp->io_map);
-
-	pci_disable_device(pdev);
-
-	pci_release_regions(pdev);
+	ccp_dev_destroy(ccp);
 
 	dev_notice(dev, "disabled\n");
 }
@@ -281,47 +250,16 @@ static int ccp_pci_suspend(struct pci_dev *pdev, pm_message_t state)
 {
 	struct device *dev = &pdev->dev;
 	struct ccp_device *ccp = dev_get_drvdata(dev);
-	unsigned long flags;
-	unsigned int i;
-
-	spin_lock_irqsave(&ccp->cmd_lock, flags);
-
-	ccp->suspending = 1;
-
-	/* Wake all the queue kthreads to prepare for suspend */
-	for (i = 0; i < ccp->cmd_q_count; i++)
-		wake_up_process(ccp->cmd_q[i].kthread);
 
-	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
-
-	/* Wait for all queue kthreads to say they're done */
-	while (!ccp_queues_suspended(ccp))
-		wait_event_interruptible(ccp->suspend_queue,
-					 ccp_queues_suspended(ccp));
-
-	return 0;
+	return ccp_dev_suspend(ccp, state);
 }
 
 static int ccp_pci_resume(struct pci_dev *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct ccp_device *ccp = dev_get_drvdata(dev);
-	unsigned long flags;
-	unsigned int i;
-
-	spin_lock_irqsave(&ccp->cmd_lock, flags);
-
-	ccp->suspending = 0;
 
-	/* Wake up all the kthreads */
-	for (i = 0; i < ccp->cmd_q_count; i++) {
-		ccp->cmd_q[i].suspended = 0;
-		wake_up_process(ccp->cmd_q[i].kthread);
-	}
-
-	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
-
-	return 0;
+	return ccp_dev_resume(ccp);
 }
 #endif
 
diff --git a/drivers/crypto/ccp/ccp-platform.c b/drivers/crypto/ccp/ccp-platform.c
index 6020c4ae72fcd..613188c5a6244 100644
--- a/drivers/crypto/ccp/ccp-platform.c
+++ b/drivers/crypto/ccp/ccp-platform.c
@@ -104,19 +104,6 @@ static void ccp_free_irqs(struct ccp_device *ccp)
 	free_irq(ccp->irq, dev);
 }
 
-static struct resource *ccp_find_mmio_area(struct ccp_device *ccp)
-{
-	struct device *dev = ccp->dev;
-	struct platform_device *pdev = to_platform_device(dev);
-	struct resource *ior;
-
-	ior = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (ior && (resource_size(ior) >= 0x800))
-		return ior;
-
-	return NULL;
-}
-
 static int ccp_platform_probe(struct platform_device *pdev)
 {
 	struct ccp_device *ccp;
@@ -146,7 +133,7 @@ static int ccp_platform_probe(struct platform_device *pdev)
 	ccp->get_irq = ccp_get_irqs;
 	ccp->free_irq = ccp_free_irqs;
 
-	ior = ccp_find_mmio_area(ccp);
+	ior = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	ccp->io_map = devm_ioremap_resource(dev, ior);
 	if (IS_ERR(ccp->io_map)) {
 		ret = PTR_ERR(ccp->io_map);
@@ -174,7 +161,7 @@ static int ccp_platform_probe(struct platform_device *pdev)
 
 	dev_set_drvdata(dev, ccp);
 
-	ret = ccp->vdata->perform->init(ccp);
+	ret = ccp_dev_init(ccp);
 	if (ret)
 		goto e_err;
 
@@ -192,7 +179,7 @@ static int ccp_platform_remove(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct ccp_device *ccp = dev_get_drvdata(dev);
 
-	ccp->vdata->perform->destroy(ccp);
+	ccp_dev_destroy(ccp);
 
 	dev_notice(dev, "disabled\n");
 
@@ -205,47 +192,16 @@ static int ccp_platform_suspend(struct platform_device *pdev,
 {
 	struct device *dev = &pdev->dev;
 	struct ccp_device *ccp = dev_get_drvdata(dev);
-	unsigned long flags;
-	unsigned int i;
-
-	spin_lock_irqsave(&ccp->cmd_lock, flags);
 
-	ccp->suspending = 1;
-
-	/* Wake all the queue kthreads to prepare for suspend */
-	for (i = 0; i < ccp->cmd_q_count; i++)
-		wake_up_process(ccp->cmd_q[i].kthread);
-
-	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
-
-	/* Wait for all queue kthreads to say they're done */
-	while (!ccp_queues_suspended(ccp))
-		wait_event_interruptible(ccp->suspend_queue,
-					 ccp_queues_suspended(ccp));
-
-	return 0;
+	return ccp_dev_suspend(ccp, state);
 }
 
 static int ccp_platform_resume(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct ccp_device *ccp = dev_get_drvdata(dev);
-	unsigned long flags;
-	unsigned int i;
-
-	spin_lock_irqsave(&ccp->cmd_lock, flags);
 
-	ccp->suspending = 0;
-
-	/* Wake up all the kthreads */
-	for (i = 0; i < ccp->cmd_q_count; i++) {
-		ccp->cmd_q[i].suspended = 0;
-		wake_up_process(ccp->cmd_q[i].kthread);
-	}
-
-	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
-
-	return 0;
+	return ccp_dev_resume(ccp);
 }
 #endif
 
@@ -260,7 +216,7 @@ MODULE_DEVICE_TABLE(acpi, ccp_acpi_match);
 #ifdef CONFIG_OF
 static const struct of_device_id ccp_of_match[] = {
 	{ .compatible = "amd,ccp-seattle-v1a",
-	  .data = (const void *)&ccpv3 },
+	  .data = (const void *)&ccpv3_platform },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, ccp_of_match);

From 720419f01832f7e697cb80480b97b2a1e96045cd Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Thu, 6 Jul 2017 09:59:14 -0500
Subject: [PATCH 018/116] crypto: ccp - Introduce the AMD Secure Processor
 device

The CCP device is part of the AMD Secure Processor. In order to expand
the usage of the AMD Secure Processor, create a framework that allows
functional components of the AMD Secure Processor to be initialized and
handled appropriately.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Acked-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Kconfig            |   6 +-
 drivers/crypto/ccp/Kconfig        |  21 ++--
 drivers/crypto/ccp/Makefile       |   4 +-
 drivers/crypto/ccp/ccp-dev-v3.c   |   4 +-
 drivers/crypto/ccp/ccp-dev-v5.c   |   5 +-
 drivers/crypto/ccp/ccp-dev.c      | 106 +++++++----------
 drivers/crypto/ccp/ccp-dev.h      |  21 +---
 drivers/crypto/ccp/ccp-pci.c      |  81 ++++++++-----
 drivers/crypto/ccp/ccp-platform.c |  70 ++++++------
 drivers/crypto/ccp/sp-dev.c       | 182 ++++++++++++++++++++++++++++++
 drivers/crypto/ccp/sp-dev.h       | 120 ++++++++++++++++++++
 include/linux/ccp.h               |   7 +-
 12 files changed, 463 insertions(+), 164 deletions(-)
 create mode 100644 drivers/crypto/ccp/sp-dev.c
 create mode 100644 drivers/crypto/ccp/sp-dev.h

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 49397a9ae67c2..5b5393f1b87ab 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -540,11 +540,11 @@ config CRYPTO_DEV_ATMEL_ECC
 	  will be called atmel-ecc.
 
 config CRYPTO_DEV_CCP
-	bool "Support for AMD Cryptographic Coprocessor"
+	bool "Support for AMD Secure Processor"
 	depends on ((X86 && PCI) || (ARM64 && (OF_ADDRESS || ACPI))) && HAS_IOMEM
 	help
-	  The AMD Cryptographic Coprocessor provides hardware offload support
-	  for encryption, hashing and related operations.
+	  The AMD Secure Processor provides support for the Cryptographic Coprocessor
+	  (CCP) and the Platform Security Processor (PSP) devices.
 
 if CRYPTO_DEV_CCP
 	source "drivers/crypto/ccp/Kconfig"
diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig
index 2238f77aa2489..15b63fd3d180d 100644
--- a/drivers/crypto/ccp/Kconfig
+++ b/drivers/crypto/ccp/Kconfig
@@ -1,22 +1,29 @@
 config CRYPTO_DEV_CCP_DD
-	tristate "Cryptographic Coprocessor device driver"
-	depends on CRYPTO_DEV_CCP
+	tristate "Secure Processor device driver"
 	default m
+	help
+	  Provides AMD Secure Processor device driver.
+	  If you choose 'M' here, this module will be called ccp.
+
+config CRYPTO_DEV_SP_CCP
+	bool "Cryptographic Coprocessor device"
+	default y
+	depends on CRYPTO_DEV_CCP_DD
 	select HW_RANDOM
 	select DMA_ENGINE
 	select DMADEVICES
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
 	help
-	  Provides the interface to use the AMD Cryptographic Coprocessor
-	  which can be used to offload encryption operations such as SHA,
-	  AES and more. If you choose 'M' here, this module will be called
-	  ccp.
+	  Provides the support for AMD Cryptographic Coprocessor (CCP) device
+	  which can be used to offload encryption operations such as SHA, AES
+	  and more.
 
 config CRYPTO_DEV_CCP_CRYPTO
 	tristate "Encryption and hashing offload support"
-	depends on CRYPTO_DEV_CCP_DD
 	default m
+	depends on CRYPTO_DEV_CCP_DD
+	depends on CRYPTO_DEV_SP_CCP
 	select CRYPTO_HASH
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_AUTHENC
diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile
index 59493fd3a7514..d2f1b5264362a 100644
--- a/drivers/crypto/ccp/Makefile
+++ b/drivers/crypto/ccp/Makefile
@@ -1,9 +1,9 @@
 obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o
-ccp-objs := ccp-dev.o \
+ccp-objs  := sp-dev.o ccp-platform.o
+ccp-$(CONFIG_CRYPTO_DEV_SP_CCP) += ccp-dev.o \
 	    ccp-ops.o \
 	    ccp-dev-v3.o \
 	    ccp-dev-v5.o \
-	    ccp-platform.o \
 	    ccp-dmaengine.o \
 	    ccp-debugfs.o
 ccp-$(CONFIG_PCI) += ccp-pci.o
diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c
index 52aa88ba13a55..57179034b6049 100644
--- a/drivers/crypto/ccp/ccp-dev-v3.c
+++ b/drivers/crypto/ccp/ccp-dev-v3.c
@@ -359,8 +359,7 @@ static void ccp_irq_bh(unsigned long data)
 
 static irqreturn_t ccp_irq_handler(int irq, void *data)
 {
-	struct device *dev = data;
-	struct ccp_device *ccp = dev_get_drvdata(dev);
+	struct ccp_device *ccp = (struct ccp_device *)data;
 
 	ccp_disable_queue_interrupts(ccp);
 	if (ccp->use_tasklet)
@@ -597,6 +596,5 @@ const struct ccp_vdata ccpv3 = {
 	.version = CCP_VERSION(3, 0),
 	.setup = NULL,
 	.perform = &ccp3_actions,
-	.bar = 2,
 	.offset = 0x20000,
 };
diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c
index b10d2d2075cbd..8ed2b377d05f2 100644
--- a/drivers/crypto/ccp/ccp-dev-v5.c
+++ b/drivers/crypto/ccp/ccp-dev-v5.c
@@ -769,8 +769,7 @@ static void ccp5_irq_bh(unsigned long data)
 
 static irqreturn_t ccp5_irq_handler(int irq, void *data)
 {
-	struct device *dev = data;
-	struct ccp_device *ccp = dev_get_drvdata(dev);
+	struct ccp_device *ccp = (struct ccp_device *)data;
 
 	ccp5_disable_queue_interrupts(ccp);
 	ccp->total_interrupts++;
@@ -1113,7 +1112,6 @@ const struct ccp_vdata ccpv5a = {
 	.version = CCP_VERSION(5, 0),
 	.setup = ccp5_config,
 	.perform = &ccp5_actions,
-	.bar = 2,
 	.offset = 0x0,
 };
 
@@ -1122,6 +1120,5 @@ const struct ccp_vdata ccpv5b = {
 	.dma_chan_attr = DMA_PRIVATE,
 	.setup = ccp5other_config,
 	.perform = &ccp5_actions,
-	.bar = 2,
 	.offset = 0x0,
 };
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
index 83a0ce5661adb..6b4315f8dad95 100644
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -111,13 +111,6 @@ static LIST_HEAD(ccp_units);
 static DEFINE_SPINLOCK(ccp_rr_lock);
 static struct ccp_device *ccp_rr;
 
-/* Ever-increasing value to produce unique unit numbers */
-static atomic_t ccp_unit_ordinal;
-static unsigned int ccp_increment_unit_ordinal(void)
-{
-	return atomic_inc_return(&ccp_unit_ordinal);
-}
-
 /**
  * ccp_add_device - add a CCP device to the list
  *
@@ -465,14 +458,17 @@ int ccp_cmd_queue_thread(void *data)
  *
  * @dev: device struct of the CCP
  */
-struct ccp_device *ccp_alloc_struct(struct device *dev)
+struct ccp_device *ccp_alloc_struct(struct sp_device *sp)
 {
+	struct device *dev = sp->dev;
 	struct ccp_device *ccp;
 
 	ccp = devm_kzalloc(dev, sizeof(*ccp), GFP_KERNEL);
 	if (!ccp)
 		return NULL;
 	ccp->dev = dev;
+	ccp->sp = sp;
+	ccp->axcache = sp->axcache;
 
 	INIT_LIST_HEAD(&ccp->cmd);
 	INIT_LIST_HEAD(&ccp->backlog);
@@ -487,9 +483,8 @@ struct ccp_device *ccp_alloc_struct(struct device *dev)
 	init_waitqueue_head(&ccp->sb_queue);
 	init_waitqueue_head(&ccp->suspend_queue);
 
-	ccp->ord = ccp_increment_unit_ordinal();
-	snprintf(ccp->name, MAX_CCP_NAME_LEN, "ccp-%u", ccp->ord);
-	snprintf(ccp->rngname, MAX_CCP_NAME_LEN, "ccp-%u-rng", ccp->ord);
+	snprintf(ccp->name, MAX_CCP_NAME_LEN, "ccp-%u", sp->ord);
+	snprintf(ccp->rngname, MAX_CCP_NAME_LEN, "ccp-%u-rng", sp->ord);
 
 	return ccp;
 }
@@ -540,8 +535,9 @@ bool ccp_queues_suspended(struct ccp_device *ccp)
 	return ccp->cmd_q_count == suspended;
 }
 
-int ccp_dev_suspend(struct ccp_device *ccp, pm_message_t state)
+int ccp_dev_suspend(struct sp_device *sp, pm_message_t state)
 {
+	struct ccp_device *ccp = sp->ccp_data;
 	unsigned long flags;
 	unsigned int i;
 
@@ -563,8 +559,9 @@ int ccp_dev_suspend(struct ccp_device *ccp, pm_message_t state)
 	return 0;
 }
 
-int ccp_dev_resume(struct ccp_device *ccp)
+int ccp_dev_resume(struct sp_device *sp)
 {
+	struct ccp_device *ccp = sp->ccp_data;
 	unsigned long flags;
 	unsigned int i;
 
@@ -584,71 +581,54 @@ int ccp_dev_resume(struct ccp_device *ccp)
 }
 #endif
 
-int ccp_dev_init(struct ccp_device *ccp)
+int ccp_dev_init(struct sp_device *sp)
 {
-	ccp->io_regs = ccp->io_map + ccp->vdata->offset;
-
-	if (ccp->vdata->setup)
-		ccp->vdata->setup(ccp);
-
-	return ccp->vdata->perform->init(ccp);
-}
+	struct device *dev = sp->dev;
+	struct ccp_device *ccp;
+	int ret;
 
-void ccp_dev_destroy(struct ccp_device *ccp)
-{
+	ret = -ENOMEM;
+	ccp = ccp_alloc_struct(sp);
 	if (!ccp)
-		return;
+		goto e_err;
+	sp->ccp_data = ccp;
+
+	ccp->vdata = (struct ccp_vdata *)sp->dev_vdata->ccp_vdata;
+	if (!ccp->vdata || !ccp->vdata->version) {
+		ret = -ENODEV;
+		dev_err(dev, "missing driver data\n");
+		goto e_err;
+	}
 
-	ccp->vdata->perform->destroy(ccp);
-}
+	ccp->get_irq = sp->get_irq;
+	ccp->free_irq = sp->free_irq;
 
-static int __init ccp_mod_init(void)
-{
-#ifdef CONFIG_X86
-	int ret;
+	ccp->io_regs = sp->io_map + ccp->vdata->offset;
+	if (ccp->vdata->setup)
+		ccp->vdata->setup(ccp);
 
-	ret = ccp_pci_init();
+	ret = ccp->vdata->perform->init(ccp);
 	if (ret)
-		return ret;
+		goto e_err;
 
-	/* Don't leave the driver loaded if init failed */
-	if (ccp_present() != 0) {
-		ccp_pci_exit();
-		return -ENODEV;
-	}
+	dev_notice(dev, "ccp enabled\n");
 
 	return 0;
-#endif
-
-#ifdef CONFIG_ARM64
-	int ret;
 
-	ret = ccp_platform_init();
-	if (ret)
-		return ret;
+e_err:
+	sp->ccp_data = NULL;
 
-	/* Don't leave the driver loaded if init failed */
-	if (ccp_present() != 0) {
-		ccp_platform_exit();
-		return -ENODEV;
-	}
-
-	return 0;
-#endif
+	dev_notice(dev, "ccp initialization failed\n");
 
-	return -ENODEV;
+	return ret;
 }
 
-static void __exit ccp_mod_exit(void)
+void ccp_dev_destroy(struct sp_device *sp)
 {
-#ifdef CONFIG_X86
-	ccp_pci_exit();
-#endif
+	struct ccp_device *ccp = sp->ccp_data;
 
-#ifdef CONFIG_ARM64
-	ccp_platform_exit();
-#endif
-}
+	if (!ccp)
+		return;
 
-module_init(ccp_mod_init);
-module_exit(ccp_mod_exit);
+	ccp->vdata->perform->destroy(ccp);
+}
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index df2e76e212ead..ca44821a376d5 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -27,6 +27,8 @@
 #include <linux/irqreturn.h>
 #include <linux/dmaengine.h>
 
+#include "sp-dev.h"
+
 #define MAX_CCP_NAME_LEN		16
 #define MAX_DMAPOOL_NAME_LEN		32
 
@@ -344,6 +346,7 @@ struct ccp_device {
 	char rngname[MAX_CCP_NAME_LEN];
 
 	struct device *dev;
+	struct sp_device *sp;
 
 	/* Bus specific device information
 	 */
@@ -362,7 +365,6 @@ struct ccp_device {
 	 *   them.
 	 */
 	struct mutex req_mutex ____cacheline_aligned;
-	void __iomem *io_map;
 	void __iomem *io_regs;
 
 	/* Master lists that all cmds are queued on. Because there can be
@@ -637,7 +639,7 @@ void ccp_del_device(struct ccp_device *ccp);
 
 extern void ccp_log_error(struct ccp_device *, int);
 
-struct ccp_device *ccp_alloc_struct(struct device *dev);
+struct ccp_device *ccp_alloc_struct(struct sp_device *sp);
 bool ccp_queues_suspended(struct ccp_device *ccp);
 int ccp_cmd_queue_thread(void *data);
 int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait);
@@ -652,11 +654,6 @@ void ccp_dmaengine_unregister(struct ccp_device *ccp);
 void ccp5_debugfs_setup(struct ccp_device *ccp);
 void ccp5_debugfs_destroy(void);
 
-int ccp_dev_init(struct ccp_device *ccp);
-void ccp_dev_destroy(struct ccp_device *ccp);
-int ccp_dev_suspend(struct ccp_device *ccp, pm_message_t state);
-int ccp_dev_resume(struct ccp_device *ccp);
-
 /* Structure for computation functions that are device-specific */
 struct ccp_actions {
 	int (*aes)(struct ccp_op *);
@@ -674,16 +671,6 @@ struct ccp_actions {
 	irqreturn_t (*irqhandler)(int, void *);
 };
 
-/* Structure to hold CCP version-specific values */
-struct ccp_vdata {
-	const unsigned int version;
-	const unsigned int dma_chan_attr;
-	void (*setup)(struct ccp_device *);
-	const struct ccp_actions *perform;
-	const unsigned int bar;
-	const unsigned int offset;
-};
-
 extern const struct ccp_vdata ccpv3_platform;
 extern const struct ccp_vdata ccpv3;
 extern const struct ccp_vdata ccpv5a;
diff --git a/drivers/crypto/ccp/ccp-pci.c b/drivers/crypto/ccp/ccp-pci.c
index 490ad0ad22185..ab2df96c4b9df 100644
--- a/drivers/crypto/ccp/ccp-pci.c
+++ b/drivers/crypto/ccp/ccp-pci.c
@@ -40,7 +40,8 @@ struct ccp_pci {
 
 static int ccp_get_msix_irqs(struct ccp_device *ccp)
 {
-	struct ccp_pci *ccp_pci = ccp->dev_specific;
+	struct sp_device *sp = ccp->sp;
+	struct ccp_pci *ccp_pci = sp->dev_specific;
 	struct device *dev = ccp->dev;
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct msix_entry msix_entry[MSIX_VECTORS];
@@ -58,11 +59,11 @@ static int ccp_get_msix_irqs(struct ccp_device *ccp)
 	for (v = 0; v < ccp_pci->msix_count; v++) {
 		/* Set the interrupt names and request the irqs */
 		snprintf(ccp_pci->msix[v].name, name_len, "%s-%u",
-			 ccp->name, v);
+			 sp->name, v);
 		ccp_pci->msix[v].vector = msix_entry[v].vector;
 		ret = request_irq(ccp_pci->msix[v].vector,
 				  ccp->vdata->perform->irqhandler,
-				  0, ccp_pci->msix[v].name, dev);
+				  0, ccp_pci->msix[v].name, ccp);
 		if (ret) {
 			dev_notice(dev, "unable to allocate MSI-X IRQ (%d)\n",
 				   ret);
@@ -86,6 +87,7 @@ static int ccp_get_msix_irqs(struct ccp_device *ccp)
 
 static int ccp_get_msi_irq(struct ccp_device *ccp)
 {
+	struct sp_device *sp = ccp->sp;
 	struct device *dev = ccp->dev;
 	struct pci_dev *pdev = to_pci_dev(dev);
 	int ret;
@@ -96,7 +98,7 @@ static int ccp_get_msi_irq(struct ccp_device *ccp)
 
 	ccp->irq = pdev->irq;
 	ret = request_irq(ccp->irq, ccp->vdata->perform->irqhandler, 0,
-			  ccp->name, dev);
+			  sp->name, ccp);
 	if (ret) {
 		dev_notice(dev, "unable to allocate MSI IRQ (%d)\n", ret);
 		goto e_msi;
@@ -134,17 +136,18 @@ static int ccp_get_irqs(struct ccp_device *ccp)
 
 static void ccp_free_irqs(struct ccp_device *ccp)
 {
-	struct ccp_pci *ccp_pci = ccp->dev_specific;
+	struct sp_device *sp = ccp->sp;
+	struct ccp_pci *ccp_pci = sp->dev_specific;
 	struct device *dev = ccp->dev;
 	struct pci_dev *pdev = to_pci_dev(dev);
 
 	if (ccp_pci->msix_count) {
 		while (ccp_pci->msix_count--)
 			free_irq(ccp_pci->msix[ccp_pci->msix_count].vector,
-				 dev);
+				 ccp);
 		pci_disable_msix(pdev);
 	} else if (ccp->irq) {
-		free_irq(ccp->irq, dev);
+		free_irq(ccp->irq, ccp);
 		pci_disable_msi(pdev);
 	}
 	ccp->irq = 0;
@@ -152,7 +155,7 @@ static void ccp_free_irqs(struct ccp_device *ccp)
 
 static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
-	struct ccp_device *ccp;
+	struct sp_device *sp;
 	struct ccp_pci *ccp_pci;
 	struct device *dev = &pdev->dev;
 	void __iomem * const *iomap_table;
@@ -160,23 +163,23 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	int ret;
 
 	ret = -ENOMEM;
-	ccp = ccp_alloc_struct(dev);
-	if (!ccp)
+	sp = sp_alloc_struct(dev);
+	if (!sp)
 		goto e_err;
 
 	ccp_pci = devm_kzalloc(dev, sizeof(*ccp_pci), GFP_KERNEL);
 	if (!ccp_pci)
 		goto e_err;
 
-	ccp->dev_specific = ccp_pci;
-	ccp->vdata = (struct ccp_vdata *)id->driver_data;
-	if (!ccp->vdata || !ccp->vdata->version) {
+	sp->dev_specific = ccp_pci;
+	sp->dev_vdata = (struct sp_dev_vdata *)id->driver_data;
+	if (!sp->dev_vdata) {
 		ret = -ENODEV;
 		dev_err(dev, "missing driver data\n");
 		goto e_err;
 	}
-	ccp->get_irq = ccp_get_irqs;
-	ccp->free_irq = ccp_free_irqs;
+	sp->get_irq = ccp_get_irqs;
+	sp->free_irq = ccp_free_irqs;
 
 	ret = pcim_enable_device(pdev);
 	if (ret) {
@@ -198,8 +201,8 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto e_err;
 	}
 
-	ccp->io_map = iomap_table[ccp->vdata->bar];
-	if (!ccp->io_map) {
+	sp->io_map = iomap_table[sp->dev_vdata->bar];
+	if (!sp->io_map) {
 		dev_err(dev, "ioremap failed\n");
 		ret = -ENOMEM;
 		goto e_err;
@@ -217,9 +220,9 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		}
 	}
 
-	dev_set_drvdata(dev, ccp);
+	dev_set_drvdata(dev, sp);
 
-	ret = ccp_dev_init(ccp);
+	ret = sp_init(sp);
 	if (ret)
 		goto e_err;
 
@@ -235,12 +238,12 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 static void ccp_pci_remove(struct pci_dev *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct ccp_device *ccp = dev_get_drvdata(dev);
+	struct sp_device *sp = dev_get_drvdata(dev);
 
-	if (!ccp)
+	if (!sp)
 		return;
 
-	ccp_dev_destroy(ccp);
+	sp_destroy(sp);
 
 	dev_notice(dev, "disabled\n");
 }
@@ -249,24 +252,44 @@ static void ccp_pci_remove(struct pci_dev *pdev)
 static int ccp_pci_suspend(struct pci_dev *pdev, pm_message_t state)
 {
 	struct device *dev = &pdev->dev;
-	struct ccp_device *ccp = dev_get_drvdata(dev);
+	struct sp_device *sp = dev_get_drvdata(dev);
 
-	return ccp_dev_suspend(ccp, state);
+	return sp_suspend(sp, state);
 }
 
 static int ccp_pci_resume(struct pci_dev *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct ccp_device *ccp = dev_get_drvdata(dev);
+	struct sp_device *sp = dev_get_drvdata(dev);
 
-	return ccp_dev_resume(ccp);
+	return sp_resume(sp);
 }
 #endif
 
+static const struct sp_dev_vdata dev_vdata[] = {
+	{
+		.bar = 2,
+#ifdef CONFIG_CRYPTO_DEV_SP_CCP
+		.ccp_vdata = &ccpv3,
+#endif
+	},
+	{
+		.bar = 2,
+#ifdef CONFIG_CRYPTO_DEV_SP_CCP
+		.ccp_vdata = &ccpv5a,
+#endif
+	},
+	{
+		.bar = 2,
+#ifdef CONFIG_CRYPTO_DEV_SP_CCP
+		.ccp_vdata = &ccpv5b,
+#endif
+	},
+};
 static const struct pci_device_id ccp_pci_table[] = {
-	{ PCI_VDEVICE(AMD, 0x1537), (kernel_ulong_t)&ccpv3 },
-	{ PCI_VDEVICE(AMD, 0x1456), (kernel_ulong_t)&ccpv5a },
-	{ PCI_VDEVICE(AMD, 0x1468), (kernel_ulong_t)&ccpv5b },
+	{ PCI_VDEVICE(AMD, 0x1537), (kernel_ulong_t)&dev_vdata[0] },
+	{ PCI_VDEVICE(AMD, 0x1456), (kernel_ulong_t)&dev_vdata[1] },
+	{ PCI_VDEVICE(AMD, 0x1468), (kernel_ulong_t)&dev_vdata[2] },
 	/* Last entry must be zero */
 	{ 0, }
 };
diff --git a/drivers/crypto/ccp/ccp-platform.c b/drivers/crypto/ccp/ccp-platform.c
index 613188c5a6244..419e00ccf5329 100644
--- a/drivers/crypto/ccp/ccp-platform.c
+++ b/drivers/crypto/ccp/ccp-platform.c
@@ -35,26 +35,26 @@ struct ccp_platform {
 static const struct acpi_device_id ccp_acpi_match[];
 static const struct of_device_id ccp_of_match[];
 
-static struct ccp_vdata *ccp_get_of_version(struct platform_device *pdev)
+static struct sp_dev_vdata *ccp_get_of_version(struct platform_device *pdev)
 {
 #ifdef CONFIG_OF
 	const struct of_device_id *match;
 
 	match = of_match_node(ccp_of_match, pdev->dev.of_node);
 	if (match && match->data)
-		return (struct ccp_vdata *)match->data;
+		return (struct sp_dev_vdata *)match->data;
 #endif
 	return NULL;
 }
 
-static struct ccp_vdata *ccp_get_acpi_version(struct platform_device *pdev)
+static struct sp_dev_vdata *ccp_get_acpi_version(struct platform_device *pdev)
 {
 #ifdef CONFIG_ACPI
 	const struct acpi_device_id *match;
 
 	match = acpi_match_device(ccp_acpi_match, &pdev->dev);
 	if (match && match->driver_data)
-		return (struct ccp_vdata *)match->driver_data;
+		return (struct sp_dev_vdata *)match->driver_data;
 #endif
 	return NULL;
 }
@@ -73,7 +73,7 @@ static int ccp_get_irq(struct ccp_device *ccp)
 
 	ccp->irq = ret;
 	ret = request_irq(ccp->irq, ccp->vdata->perform->irqhandler, 0,
-			  ccp->name, dev);
+			  ccp->name, ccp);
 	if (ret) {
 		dev_notice(dev, "unable to allocate IRQ (%d)\n", ret);
 		return ret;
@@ -99,14 +99,12 @@ static int ccp_get_irqs(struct ccp_device *ccp)
 
 static void ccp_free_irqs(struct ccp_device *ccp)
 {
-	struct device *dev = ccp->dev;
-
-	free_irq(ccp->irq, dev);
+	free_irq(ccp->irq, ccp);
 }
 
 static int ccp_platform_probe(struct platform_device *pdev)
 {
-	struct ccp_device *ccp;
+	struct sp_device *sp;
 	struct ccp_platform *ccp_platform;
 	struct device *dev = &pdev->dev;
 	enum dev_dma_attr attr;
@@ -114,32 +112,31 @@ static int ccp_platform_probe(struct platform_device *pdev)
 	int ret;
 
 	ret = -ENOMEM;
-	ccp = ccp_alloc_struct(dev);
-	if (!ccp)
+	sp = sp_alloc_struct(dev);
+	if (!sp)
 		goto e_err;
 
 	ccp_platform = devm_kzalloc(dev, sizeof(*ccp_platform), GFP_KERNEL);
 	if (!ccp_platform)
 		goto e_err;
 
-	ccp->dev_specific = ccp_platform;
-	ccp->vdata = pdev->dev.of_node ? ccp_get_of_version(pdev)
+	sp->dev_specific = ccp_platform;
+	sp->dev_vdata = pdev->dev.of_node ? ccp_get_of_version(pdev)
 					 : ccp_get_acpi_version(pdev);
-	if (!ccp->vdata || !ccp->vdata->version) {
+	if (!sp->dev_vdata) {
 		ret = -ENODEV;
 		dev_err(dev, "missing driver data\n");
 		goto e_err;
 	}
-	ccp->get_irq = ccp_get_irqs;
-	ccp->free_irq = ccp_free_irqs;
+	sp->get_irq = ccp_get_irqs;
+	sp->free_irq = ccp_free_irqs;
 
 	ior = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	ccp->io_map = devm_ioremap_resource(dev, ior);
-	if (IS_ERR(ccp->io_map)) {
-		ret = PTR_ERR(ccp->io_map);
+	sp->io_map = devm_ioremap_resource(dev, ior);
+	if (IS_ERR(sp->io_map)) {
+		ret = PTR_ERR(sp->io_map);
 		goto e_err;
 	}
-	ccp->io_regs = ccp->io_map;
 
 	attr = device_get_dma_attr(dev);
 	if (attr == DEV_DMA_NOT_SUPPORTED) {
@@ -149,9 +146,9 @@ static int ccp_platform_probe(struct platform_device *pdev)
 
 	ccp_platform->coherent = (attr == DEV_DMA_COHERENT);
 	if (ccp_platform->coherent)
-		ccp->axcache = CACHE_WB_NO_ALLOC;
+		sp->axcache = CACHE_WB_NO_ALLOC;
 	else
-		ccp->axcache = CACHE_NONE;
+		sp->axcache = CACHE_NONE;
 
 	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
 	if (ret) {
@@ -159,9 +156,9 @@ static int ccp_platform_probe(struct platform_device *pdev)
 		goto e_err;
 	}
 
-	dev_set_drvdata(dev, ccp);
+	dev_set_drvdata(dev, sp);
 
-	ret = ccp_dev_init(ccp);
+	ret = sp_init(sp);
 	if (ret)
 		goto e_err;
 
@@ -177,9 +174,9 @@ static int ccp_platform_probe(struct platform_device *pdev)
 static int ccp_platform_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct ccp_device *ccp = dev_get_drvdata(dev);
+	struct sp_device *sp = dev_get_drvdata(dev);
 
-	ccp_dev_destroy(ccp);
+	sp_destroy(sp);
 
 	dev_notice(dev, "disabled\n");
 
@@ -191,23 +188,32 @@ static int ccp_platform_suspend(struct platform_device *pdev,
 				pm_message_t state)
 {
 	struct device *dev = &pdev->dev;
-	struct ccp_device *ccp = dev_get_drvdata(dev);
+	struct sp_device *sp = dev_get_drvdata(dev);
 
-	return ccp_dev_suspend(ccp, state);
+	return sp_suspend(sp, state);
 }
 
 static int ccp_platform_resume(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct ccp_device *ccp = dev_get_drvdata(dev);
+	struct sp_device *sp = dev_get_drvdata(dev);
 
-	return ccp_dev_resume(ccp);
+	return sp_resume(sp);
 }
 #endif
 
+static const struct sp_dev_vdata dev_vdata[] = {
+	{
+		.bar = 0,
+#ifdef CONFIG_CRYPTO_DEV_SP_CCP
+		.ccp_vdata = &ccpv3_platform,
+#endif
+	},
+};
+
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id ccp_acpi_match[] = {
-	{ "AMDI0C00", (kernel_ulong_t)&ccpv3 },
+	{ "AMDI0C00", (kernel_ulong_t)&dev_vdata[0] },
 	{ },
 };
 MODULE_DEVICE_TABLE(acpi, ccp_acpi_match);
@@ -216,7 +222,7 @@ MODULE_DEVICE_TABLE(acpi, ccp_acpi_match);
 #ifdef CONFIG_OF
 static const struct of_device_id ccp_of_match[] = {
 	{ .compatible = "amd,ccp-seattle-v1a",
-	  .data = (const void *)&ccpv3_platform },
+	  .data = (const void *)&dev_vdata[0] },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, ccp_of_match);
diff --git a/drivers/crypto/ccp/sp-dev.c b/drivers/crypto/ccp/sp-dev.c
new file mode 100644
index 0000000000000..63cc74ee6d2a0
--- /dev/null
+++ b/drivers/crypto/ccp/sp-dev.c
@@ -0,0 +1,182 @@
+/*
+ * AMD Secure Processor driver
+ *
+ * Copyright (C) 2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/spinlock_types.h>
+#include <linux/types.h>
+#include <linux/ccp.h>
+
+#include "ccp-dev.h"
+#include "sp-dev.h"
+
+MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>");
+MODULE_AUTHOR("Gary R Hook <gary.hook@amd.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.1.0");
+MODULE_DESCRIPTION("AMD Secure Processor driver");
+
+/* List of SPs, SP count, read-write access lock, and access functions
+ *
+ * Lock structure: get sp_unit_lock for reading whenever we need to
+ * examine the SP list.
+ */
+static DEFINE_RWLOCK(sp_unit_lock);
+static LIST_HEAD(sp_units);
+
+/* Ever-increasing value to produce unique unit numbers */
+static atomic_t sp_ordinal;
+
+static void sp_add_device(struct sp_device *sp)
+{
+	unsigned long flags;
+
+	write_lock_irqsave(&sp_unit_lock, flags);
+
+	list_add_tail(&sp->entry, &sp_units);
+
+	write_unlock_irqrestore(&sp_unit_lock, flags);
+}
+
+static void sp_del_device(struct sp_device *sp)
+{
+	unsigned long flags;
+
+	write_lock_irqsave(&sp_unit_lock, flags);
+
+	list_del(&sp->entry);
+
+	write_unlock_irqrestore(&sp_unit_lock, flags);
+}
+
+/**
+ * sp_alloc_struct - allocate and initialize the sp_device struct
+ *
+ * @dev: device struct of the SP
+ */
+struct sp_device *sp_alloc_struct(struct device *dev)
+{
+	struct sp_device *sp;
+
+	sp = devm_kzalloc(dev, sizeof(*sp), GFP_KERNEL);
+	if (!sp)
+		return NULL;
+
+	sp->dev = dev;
+	sp->ord = atomic_inc_return(&sp_ordinal);
+	snprintf(sp->name, SP_MAX_NAME_LEN, "sp-%u", sp->ord);
+
+	return sp;
+}
+
+int sp_init(struct sp_device *sp)
+{
+	sp_add_device(sp);
+
+	if (sp->dev_vdata->ccp_vdata)
+		ccp_dev_init(sp);
+
+	return 0;
+}
+
+void sp_destroy(struct sp_device *sp)
+{
+	if (sp->dev_vdata->ccp_vdata)
+		ccp_dev_destroy(sp);
+
+	sp_del_device(sp);
+}
+
+#ifdef CONFIG_PM
+int sp_suspend(struct sp_device *sp, pm_message_t state)
+{
+	int ret;
+
+	if (sp->dev_vdata->ccp_vdata) {
+		ret = ccp_dev_suspend(sp, state);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int sp_resume(struct sp_device *sp)
+{
+	int ret;
+
+	if (sp->dev_vdata->ccp_vdata) {
+		ret = ccp_dev_resume(sp);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+#endif
+
+static int __init sp_mod_init(void)
+{
+#ifdef CONFIG_X86
+	int ret;
+
+	ret = ccp_pci_init();
+	if (ret)
+		return ret;
+
+	/* Don't leave the driver loaded if init failed */
+	if (ccp_present() != 0) {
+		ccp_pci_exit();
+		return -ENODEV;
+	}
+
+	return 0;
+#endif
+
+#ifdef CONFIG_ARM64
+	int ret;
+
+	ret = ccp_platform_init();
+	if (ret)
+		return ret;
+
+	/* Don't leave the driver loaded if init failed */
+	if (ccp_present() != 0) {
+		ccp_platform_exit();
+		return -ENODEV;
+	}
+
+	return 0;
+#endif
+
+	return -ENODEV;
+}
+
+static void __exit sp_mod_exit(void)
+{
+#ifdef CONFIG_X86
+	ccp_pci_exit();
+#endif
+
+#ifdef CONFIG_ARM64
+	ccp_platform_exit();
+#endif
+}
+
+module_init(sp_mod_init);
+module_exit(sp_mod_exit);
diff --git a/drivers/crypto/ccp/sp-dev.h b/drivers/crypto/ccp/sp-dev.h
new file mode 100644
index 0000000000000..189e57eeb4fab
--- /dev/null
+++ b/drivers/crypto/ccp/sp-dev.h
@@ -0,0 +1,120 @@
+/*
+ * AMD Secure Processor driver
+ *
+ * Copyright (C) 2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __SP_DEV_H__
+#define __SP_DEV_H__
+
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/wait.h>
+#include <linux/dmapool.h>
+#include <linux/hw_random.h>
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/irqreturn.h>
+
+#define SP_MAX_NAME_LEN		32
+
+#define CACHE_NONE			0x00
+#define CACHE_WB_NO_ALLOC		0xb7
+
+/* Structure to hold CCP device data */
+struct ccp_device;
+struct ccp_vdata {
+	const unsigned int version;
+	const unsigned int dma_chan_attr;
+	void (*setup)(struct ccp_device *);
+	const struct ccp_actions *perform;
+	const unsigned int offset;
+};
+/* Structure to hold SP device data */
+struct sp_dev_vdata {
+	const unsigned int bar;
+
+	const struct ccp_vdata *ccp_vdata;
+	void *psp_vdata;
+};
+
+struct sp_device {
+	struct list_head entry;
+
+	struct device *dev;
+
+	struct sp_dev_vdata *dev_vdata;
+	unsigned int ord;
+	char name[SP_MAX_NAME_LEN];
+
+	/* Bus specific device information */
+	void *dev_specific;
+
+	/* I/O area used for device communication. */
+	void __iomem *io_map;
+
+	/* DMA caching attribute support */
+	unsigned int axcache;
+
+	bool irq_registered;
+
+	int (*get_irq)(struct ccp_device *ccp);
+	void (*free_irq)(struct ccp_device *ccp);
+
+	void *ccp_data;
+	void *psp_data;
+};
+
+int sp_pci_init(void);
+void sp_pci_exit(void);
+
+int sp_platform_init(void);
+void sp_platform_exit(void);
+
+struct sp_device *sp_alloc_struct(struct device *dev);
+
+int sp_init(struct sp_device *sp);
+void sp_destroy(struct sp_device *sp);
+struct sp_device *sp_get_master(void);
+
+int sp_suspend(struct sp_device *sp, pm_message_t state);
+int sp_resume(struct sp_device *sp);
+
+#ifdef CONFIG_CRYPTO_DEV_SP_CCP
+
+int ccp_dev_init(struct sp_device *sp);
+void ccp_dev_destroy(struct sp_device *sp);
+
+int ccp_dev_suspend(struct sp_device *sp, pm_message_t state);
+int ccp_dev_resume(struct sp_device *sp);
+
+#else	/* !CONFIG_CRYPTO_DEV_SP_CCP */
+
+static inline int ccp_dev_init(struct sp_device *sp)
+{
+	return 0;
+}
+static inline void ccp_dev_destroy(struct sp_device *sp) { }
+
+static inline int ccp_dev_suspend(struct sp_device *sp, pm_message_t state)
+{
+	return 0;
+}
+static inline int ccp_dev_resume(struct sp_device *sp)
+{
+	return 0;
+}
+#endif	/* CONFIG_CRYPTO_DEV_SP_CCP */
+
+#endif
diff --git a/include/linux/ccp.h b/include/linux/ccp.h
index c03ee844a99d7..3fd9a6dac3441 100644
--- a/include/linux/ccp.h
+++ b/include/linux/ccp.h
@@ -23,8 +23,7 @@
 struct ccp_device;
 struct ccp_cmd;
 
-#if defined(CONFIG_CRYPTO_DEV_CCP_DD) || \
-	defined(CONFIG_CRYPTO_DEV_CCP_DD_MODULE)
+#if defined(CONFIG_CRYPTO_DEV_SP_CCP)
 
 /**
  * ccp_present - check if a CCP device is present
@@ -70,7 +69,7 @@ unsigned int ccp_version(void);
  */
 int ccp_enqueue_cmd(struct ccp_cmd *cmd);
 
-#else /* CONFIG_CRYPTO_DEV_CCP_DD is not enabled */
+#else /* CONFIG_CRYPTO_DEV_CCP_SP_DEV is not enabled */
 
 static inline int ccp_present(void)
 {
@@ -87,7 +86,7 @@ static inline int ccp_enqueue_cmd(struct ccp_cmd *cmd)
 	return -ENODEV;
 }
 
-#endif /* CONFIG_CRYPTO_DEV_CCP_DD */
+#endif /* CONFIG_CRYPTO_DEV_SP_CCP */
 
 
 /***** AES engine *****/

From f4d18d656f882a7ca558313d5f1b18b1fd01f759 Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Thu, 6 Jul 2017 09:59:15 -0500
Subject: [PATCH 019/116] crypto: ccp - Abstract interrupt registeration

The CCP and PSP devices part of AMD Secure Procesor may share the same
interrupt. Hence we expand the SP device to register a common interrupt
handler and provide functions to CCP and PSP devices to register their
interrupt callback which will be invoked upon interrupt.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Acked-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-dev-v3.c   |   6 +-
 drivers/crypto/ccp/ccp-dev-v5.c   |   7 +-
 drivers/crypto/ccp/ccp-dev.c      |   3 +-
 drivers/crypto/ccp/ccp-dev.h      |   2 -
 drivers/crypto/ccp/ccp-pci.c      | 103 +++++++++-------------------
 drivers/crypto/ccp/ccp-platform.c |  59 ++++++++--------
 drivers/crypto/ccp/sp-dev.c       | 107 ++++++++++++++++++++++++++++++
 drivers/crypto/ccp/sp-dev.h       |  16 ++++-
 8 files changed, 188 insertions(+), 115 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c
index 57179034b6049..695fde887c118 100644
--- a/drivers/crypto/ccp/ccp-dev-v3.c
+++ b/drivers/crypto/ccp/ccp-dev-v3.c
@@ -453,7 +453,7 @@ static int ccp_init(struct ccp_device *ccp)
 	iowrite32(ccp->qim, ccp->io_regs + IRQ_STATUS_REG);
 
 	/* Request an irq */
-	ret = ccp->get_irq(ccp);
+	ret = sp_request_ccp_irq(ccp->sp, ccp_irq_handler, ccp->name, ccp);
 	if (ret) {
 		dev_err(dev, "unable to allocate an IRQ\n");
 		goto e_pool;
@@ -510,7 +510,7 @@ static int ccp_init(struct ccp_device *ccp)
 		if (ccp->cmd_q[i].kthread)
 			kthread_stop(ccp->cmd_q[i].kthread);
 
-	ccp->free_irq(ccp);
+	sp_free_ccp_irq(ccp->sp, ccp);
 
 e_pool:
 	for (i = 0; i < ccp->cmd_q_count; i++)
@@ -549,7 +549,7 @@ static void ccp_destroy(struct ccp_device *ccp)
 		if (ccp->cmd_q[i].kthread)
 			kthread_stop(ccp->cmd_q[i].kthread);
 
-	ccp->free_irq(ccp);
+	sp_free_ccp_irq(ccp->sp, ccp);
 
 	for (i = 0; i < ccp->cmd_q_count; i++)
 		dma_pool_destroy(ccp->cmd_q[i].dma_pool);
diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c
index 8ed2b377d05f2..b0391f03b0fe0 100644
--- a/drivers/crypto/ccp/ccp-dev-v5.c
+++ b/drivers/crypto/ccp/ccp-dev-v5.c
@@ -880,7 +880,7 @@ static int ccp5_init(struct ccp_device *ccp)
 
 	dev_dbg(dev, "Requesting an IRQ...\n");
 	/* Request an irq */
-	ret = ccp->get_irq(ccp);
+	ret = sp_request_ccp_irq(ccp->sp, ccp5_irq_handler, ccp->name, ccp);
 	if (ret) {
 		dev_err(dev, "unable to allocate an IRQ\n");
 		goto e_pool;
@@ -986,7 +986,7 @@ static int ccp5_init(struct ccp_device *ccp)
 			kthread_stop(ccp->cmd_q[i].kthread);
 
 e_irq:
-	ccp->free_irq(ccp);
+	sp_free_ccp_irq(ccp->sp, ccp);
 
 e_pool:
 	for (i = 0; i < ccp->cmd_q_count; i++)
@@ -1036,7 +1036,7 @@ static void ccp5_destroy(struct ccp_device *ccp)
 		if (ccp->cmd_q[i].kthread)
 			kthread_stop(ccp->cmd_q[i].kthread);
 
-	ccp->free_irq(ccp);
+	sp_free_ccp_irq(ccp->sp, ccp);
 
 	for (i = 0; i < ccp->cmd_q_count; i++) {
 		cmd_q = &ccp->cmd_q[i];
@@ -1105,7 +1105,6 @@ static const struct ccp_actions ccp5_actions = {
 	.init = ccp5_init,
 	.destroy = ccp5_destroy,
 	.get_free_slots = ccp5_get_free_slots,
-	.irqhandler = ccp5_irq_handler,
 };
 
 const struct ccp_vdata ccpv5a = {
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
index 6b4315f8dad95..b2d176164402e 100644
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -600,8 +600,7 @@ int ccp_dev_init(struct sp_device *sp)
 		goto e_err;
 	}
 
-	ccp->get_irq = sp->get_irq;
-	ccp->free_irq = sp->free_irq;
+	ccp->use_tasklet = sp->use_tasklet;
 
 	ccp->io_regs = sp->io_map + ccp->vdata->offset;
 	if (ccp->vdata->setup)
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index ca44821a376d5..193f309736944 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -351,8 +351,6 @@ struct ccp_device {
 	/* Bus specific device information
 	 */
 	void *dev_specific;
-	int (*get_irq)(struct ccp_device *ccp);
-	void (*free_irq)(struct ccp_device *ccp);
 	unsigned int qim;
 	unsigned int irq;
 	bool use_tasklet;
diff --git a/drivers/crypto/ccp/ccp-pci.c b/drivers/crypto/ccp/ccp-pci.c
index ab2df96c4b9df..b29a093fd1272 100644
--- a/drivers/crypto/ccp/ccp-pci.c
+++ b/drivers/crypto/ccp/ccp-pci.c
@@ -28,67 +28,37 @@
 
 #define MSIX_VECTORS			2
 
-struct ccp_msix {
-	u32 vector;
-	char name[16];
-};
-
 struct ccp_pci {
 	int msix_count;
-	struct ccp_msix msix[MSIX_VECTORS];
+	struct msix_entry msix_entry[MSIX_VECTORS];
 };
 
-static int ccp_get_msix_irqs(struct ccp_device *ccp)
+static int ccp_get_msix_irqs(struct sp_device *sp)
 {
-	struct sp_device *sp = ccp->sp;
 	struct ccp_pci *ccp_pci = sp->dev_specific;
-	struct device *dev = ccp->dev;
+	struct device *dev = sp->dev;
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct msix_entry msix_entry[MSIX_VECTORS];
-	unsigned int name_len = sizeof(ccp_pci->msix[0].name) - 1;
 	int v, ret;
 
-	for (v = 0; v < ARRAY_SIZE(msix_entry); v++)
-		msix_entry[v].entry = v;
+	for (v = 0; v < ARRAY_SIZE(ccp_pci->msix_entry); v++)
+		ccp_pci->msix_entry[v].entry = v;
 
-	ret = pci_enable_msix_range(pdev, msix_entry, 1, v);
+	ret = pci_enable_msix_range(pdev, ccp_pci->msix_entry, 1, v);
 	if (ret < 0)
 		return ret;
 
 	ccp_pci->msix_count = ret;
-	for (v = 0; v < ccp_pci->msix_count; v++) {
-		/* Set the interrupt names and request the irqs */
-		snprintf(ccp_pci->msix[v].name, name_len, "%s-%u",
-			 sp->name, v);
-		ccp_pci->msix[v].vector = msix_entry[v].vector;
-		ret = request_irq(ccp_pci->msix[v].vector,
-				  ccp->vdata->perform->irqhandler,
-				  0, ccp_pci->msix[v].name, ccp);
-		if (ret) {
-			dev_notice(dev, "unable to allocate MSI-X IRQ (%d)\n",
-				   ret);
-			goto e_irq;
-		}
-	}
-	ccp->use_tasklet = true;
+	sp->use_tasklet = true;
 
+	sp->psp_irq = ccp_pci->msix_entry[0].vector;
+	sp->ccp_irq = (ccp_pci->msix_count > 1) ? ccp_pci->msix_entry[1].vector
+					       : ccp_pci->msix_entry[0].vector;
 	return 0;
-
-e_irq:
-	while (v--)
-		free_irq(ccp_pci->msix[v].vector, dev);
-
-	pci_disable_msix(pdev);
-
-	ccp_pci->msix_count = 0;
-
-	return ret;
 }
 
-static int ccp_get_msi_irq(struct ccp_device *ccp)
+static int ccp_get_msi_irq(struct sp_device *sp)
 {
-	struct sp_device *sp = ccp->sp;
-	struct device *dev = ccp->dev;
+	struct device *dev = sp->dev;
 	struct pci_dev *pdev = to_pci_dev(dev);
 	int ret;
 
@@ -96,35 +66,24 @@ static int ccp_get_msi_irq(struct ccp_device *ccp)
 	if (ret)
 		return ret;
 
-	ccp->irq = pdev->irq;
-	ret = request_irq(ccp->irq, ccp->vdata->perform->irqhandler, 0,
-			  sp->name, ccp);
-	if (ret) {
-		dev_notice(dev, "unable to allocate MSI IRQ (%d)\n", ret);
-		goto e_msi;
-	}
-	ccp->use_tasklet = true;
+	sp->ccp_irq = pdev->irq;
+	sp->psp_irq = pdev->irq;
 
 	return 0;
-
-e_msi:
-	pci_disable_msi(pdev);
-
-	return ret;
 }
 
-static int ccp_get_irqs(struct ccp_device *ccp)
+static int ccp_get_irqs(struct sp_device *sp)
 {
-	struct device *dev = ccp->dev;
+	struct device *dev = sp->dev;
 	int ret;
 
-	ret = ccp_get_msix_irqs(ccp);
+	ret = ccp_get_msix_irqs(sp);
 	if (!ret)
 		return 0;
 
 	/* Couldn't get MSI-X vectors, try MSI */
 	dev_notice(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
-	ret = ccp_get_msi_irq(ccp);
+	ret = ccp_get_msi_irq(sp);
 	if (!ret)
 		return 0;
 
@@ -134,23 +93,19 @@ static int ccp_get_irqs(struct ccp_device *ccp)
 	return ret;
 }
 
-static void ccp_free_irqs(struct ccp_device *ccp)
+static void ccp_free_irqs(struct sp_device *sp)
 {
-	struct sp_device *sp = ccp->sp;
 	struct ccp_pci *ccp_pci = sp->dev_specific;
-	struct device *dev = ccp->dev;
+	struct device *dev = sp->dev;
 	struct pci_dev *pdev = to_pci_dev(dev);
 
-	if (ccp_pci->msix_count) {
-		while (ccp_pci->msix_count--)
-			free_irq(ccp_pci->msix[ccp_pci->msix_count].vector,
-				 ccp);
+	if (ccp_pci->msix_count)
 		pci_disable_msix(pdev);
-	} else if (ccp->irq) {
-		free_irq(ccp->irq, ccp);
+	else if (sp->psp_irq)
 		pci_disable_msi(pdev);
-	}
-	ccp->irq = 0;
+
+	sp->ccp_irq = 0;
+	sp->psp_irq = 0;
 }
 
 static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
@@ -178,8 +133,6 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		dev_err(dev, "missing driver data\n");
 		goto e_err;
 	}
-	sp->get_irq = ccp_get_irqs;
-	sp->free_irq = ccp_free_irqs;
 
 	ret = pcim_enable_device(pdev);
 	if (ret) {
@@ -208,6 +161,10 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto e_err;
 	}
 
+	ret = ccp_get_irqs(sp);
+	if (ret)
+		goto e_err;
+
 	pci_set_master(pdev);
 
 	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
@@ -245,6 +202,8 @@ static void ccp_pci_remove(struct pci_dev *pdev)
 
 	sp_destroy(sp);
 
+	ccp_free_irqs(sp);
+
 	dev_notice(dev, "disabled\n");
 }
 
diff --git a/drivers/crypto/ccp/ccp-platform.c b/drivers/crypto/ccp/ccp-platform.c
index 419e00ccf5329..04a1cc42dbb27 100644
--- a/drivers/crypto/ccp/ccp-platform.c
+++ b/drivers/crypto/ccp/ccp-platform.c
@@ -30,6 +30,7 @@
 
 struct ccp_platform {
 	int coherent;
+	unsigned int irq_count;
 };
 
 static const struct acpi_device_id ccp_acpi_match[];
@@ -59,49 +60,45 @@ static struct sp_dev_vdata *ccp_get_acpi_version(struct platform_device *pdev)
 	return NULL;
 }
 
-static int ccp_get_irq(struct ccp_device *ccp)
+static int ccp_get_irqs(struct sp_device *sp)
 {
-	struct device *dev = ccp->dev;
+	struct ccp_platform *ccp_platform = sp->dev_specific;
+	struct device *dev = sp->dev;
 	struct platform_device *pdev = to_platform_device(dev);
+	unsigned int i, count;
 	int ret;
 
+	for (i = 0, count = 0; i < pdev->num_resources; i++) {
+		struct resource *res = &pdev->resource[i];
+
+		if (resource_type(res) == IORESOURCE_IRQ)
+			count++;
+	}
+
+	ccp_platform->irq_count = count;
+
 	ret = platform_get_irq(pdev, 0);
 	if (ret < 0) {
 		dev_notice(dev, "unable to get IRQ (%d)\n", ret);
 		return ret;
 	}
 
-	ccp->irq = ret;
-	ret = request_irq(ccp->irq, ccp->vdata->perform->irqhandler, 0,
-			  ccp->name, ccp);
-	if (ret) {
-		dev_notice(dev, "unable to allocate IRQ (%d)\n", ret);
-		return ret;
+	sp->psp_irq = ret;
+	if (count == 1) {
+		sp->ccp_irq = ret;
+	} else {
+		ret = platform_get_irq(pdev, 1);
+		if (ret < 0) {
+			dev_notice(dev, "unable to get IRQ (%d)\n", ret);
+			return ret;
+		}
+
+		sp->ccp_irq = ret;
 	}
 
 	return 0;
 }
 
-static int ccp_get_irqs(struct ccp_device *ccp)
-{
-	struct device *dev = ccp->dev;
-	int ret;
-
-	ret = ccp_get_irq(ccp);
-	if (!ret)
-		return 0;
-
-	/* Couldn't get an interrupt */
-	dev_notice(dev, "could not enable interrupts (%d)\n", ret);
-
-	return ret;
-}
-
-static void ccp_free_irqs(struct ccp_device *ccp)
-{
-	free_irq(ccp->irq, ccp);
-}
-
 static int ccp_platform_probe(struct platform_device *pdev)
 {
 	struct sp_device *sp;
@@ -128,8 +125,6 @@ static int ccp_platform_probe(struct platform_device *pdev)
 		dev_err(dev, "missing driver data\n");
 		goto e_err;
 	}
-	sp->get_irq = ccp_get_irqs;
-	sp->free_irq = ccp_free_irqs;
 
 	ior = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	sp->io_map = devm_ioremap_resource(dev, ior);
@@ -156,6 +151,10 @@ static int ccp_platform_probe(struct platform_device *pdev)
 		goto e_err;
 	}
 
+	ret = ccp_get_irqs(sp);
+	if (ret)
+		goto e_err;
+
 	dev_set_drvdata(dev, sp);
 
 	ret = sp_init(sp);
diff --git a/drivers/crypto/ccp/sp-dev.c b/drivers/crypto/ccp/sp-dev.c
index 63cc74ee6d2a0..7e30773443e93 100644
--- a/drivers/crypto/ccp/sp-dev.c
+++ b/drivers/crypto/ccp/sp-dev.c
@@ -64,6 +64,113 @@ static void sp_del_device(struct sp_device *sp)
 	write_unlock_irqrestore(&sp_unit_lock, flags);
 }
 
+static irqreturn_t sp_irq_handler(int irq, void *data)
+{
+	struct sp_device *sp = data;
+
+	if (sp->ccp_irq_handler)
+		sp->ccp_irq_handler(irq, sp->ccp_irq_data);
+
+	if (sp->psp_irq_handler)
+		sp->psp_irq_handler(irq, sp->psp_irq_data);
+
+	return IRQ_HANDLED;
+}
+
+int sp_request_ccp_irq(struct sp_device *sp, irq_handler_t handler,
+		       const char *name, void *data)
+{
+	int ret;
+
+	if ((sp->psp_irq == sp->ccp_irq) && sp->dev_vdata->psp_vdata) {
+		/* Need a common routine to manage all interrupts */
+		sp->ccp_irq_data = data;
+		sp->ccp_irq_handler = handler;
+
+		if (!sp->irq_registered) {
+			ret = request_irq(sp->ccp_irq, sp_irq_handler, 0,
+					  sp->name, sp);
+			if (ret)
+				return ret;
+
+			sp->irq_registered = true;
+		}
+	} else {
+		/* Each sub-device can manage it's own interrupt */
+		ret = request_irq(sp->ccp_irq, handler, 0, name, data);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int sp_request_psp_irq(struct sp_device *sp, irq_handler_t handler,
+		       const char *name, void *data)
+{
+	int ret;
+
+	if ((sp->psp_irq == sp->ccp_irq) && sp->dev_vdata->ccp_vdata) {
+		/* Need a common routine to manage all interrupts */
+		sp->psp_irq_data = data;
+		sp->psp_irq_handler = handler;
+
+		if (!sp->irq_registered) {
+			ret = request_irq(sp->psp_irq, sp_irq_handler, 0,
+					  sp->name, sp);
+			if (ret)
+				return ret;
+
+			sp->irq_registered = true;
+		}
+	} else {
+		/* Each sub-device can manage it's own interrupt */
+		ret = request_irq(sp->psp_irq, handler, 0, name, data);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+void sp_free_ccp_irq(struct sp_device *sp, void *data)
+{
+	if ((sp->psp_irq == sp->ccp_irq) && sp->dev_vdata->psp_vdata) {
+		/* Using common routine to manage all interrupts */
+		if (!sp->psp_irq_handler) {
+			/* Nothing else using it, so free it */
+			free_irq(sp->ccp_irq, sp);
+
+			sp->irq_registered = false;
+		}
+
+		sp->ccp_irq_handler = NULL;
+		sp->ccp_irq_data = NULL;
+	} else {
+		/* Each sub-device can manage it's own interrupt */
+		free_irq(sp->ccp_irq, data);
+	}
+}
+
+void sp_free_psp_irq(struct sp_device *sp, void *data)
+{
+	if ((sp->psp_irq == sp->ccp_irq) && sp->dev_vdata->ccp_vdata) {
+		/* Using common routine to manage all interrupts */
+		if (!sp->ccp_irq_handler) {
+			/* Nothing else using it, so free it */
+			free_irq(sp->psp_irq, sp);
+
+			sp->irq_registered = false;
+		}
+
+		sp->psp_irq_handler = NULL;
+		sp->psp_irq_data = NULL;
+	} else {
+		/* Each sub-device can manage it's own interrupt */
+		free_irq(sp->psp_irq, data);
+	}
+}
+
 /**
  * sp_alloc_struct - allocate and initialize the sp_device struct
  *
diff --git a/drivers/crypto/ccp/sp-dev.h b/drivers/crypto/ccp/sp-dev.h
index 189e57eeb4fab..3520da4e20cfa 100644
--- a/drivers/crypto/ccp/sp-dev.h
+++ b/drivers/crypto/ccp/sp-dev.h
@@ -68,9 +68,15 @@ struct sp_device {
 	unsigned int axcache;
 
 	bool irq_registered;
+	bool use_tasklet;
 
-	int (*get_irq)(struct ccp_device *ccp);
-	void (*free_irq)(struct ccp_device *ccp);
+	unsigned int ccp_irq;
+	irq_handler_t ccp_irq_handler;
+	void *ccp_irq_data;
+
+	unsigned int psp_irq;
+	irq_handler_t psp_irq_handler;
+	void *psp_irq_data;
 
 	void *ccp_data;
 	void *psp_data;
@@ -90,6 +96,12 @@ struct sp_device *sp_get_master(void);
 
 int sp_suspend(struct sp_device *sp, pm_message_t state);
 int sp_resume(struct sp_device *sp);
+int sp_request_ccp_irq(struct sp_device *sp, irq_handler_t handler,
+		       const char *name, void *data);
+void sp_free_ccp_irq(struct sp_device *sp, void *data);
+int sp_request_psp_irq(struct sp_device *sp, irq_handler_t handler,
+		       const char *name, void *data);
+void sp_free_psp_irq(struct sp_device *sp, void *data);
 
 #ifdef CONFIG_CRYPTO_DEV_SP_CCP
 

From d0ebbc0c407a10485a8672ef370dfe55c666d57f Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Thu, 6 Jul 2017 09:59:16 -0500
Subject: [PATCH 020/116] crypto: ccp - rename ccp driver initialize files as
 sp device

CCP device initializes is now integerated into higher level SP device,
to avoid the confusion lets rename the ccp driver initialization files
(ccp-platform.c->sp-platform.c, ccp-pci.c->sp-pci.c). The patch does not
make any functional changes other than renaming file and structures

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Acked-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/Makefile                   |  4 +-
 drivers/crypto/ccp/ccp-dev.h                  |  6 --
 drivers/crypto/ccp/sp-dev.c                   | 12 +--
 drivers/crypto/ccp/{ccp-pci.c => sp-pci.c}    | 80 +++++++++----------
 .../ccp/{ccp-platform.c => sp-platform.c}     | 78 +++++++++---------
 5 files changed, 87 insertions(+), 93 deletions(-)
 rename drivers/crypto/ccp/{ccp-pci.c => sp-pci.c} (71%)
 rename drivers/crypto/ccp/{ccp-platform.c => sp-platform.c} (67%)

diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile
index d2f1b5264362a..5f2adc5facfe8 100644
--- a/drivers/crypto/ccp/Makefile
+++ b/drivers/crypto/ccp/Makefile
@@ -1,12 +1,12 @@
 obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o
-ccp-objs  := sp-dev.o ccp-platform.o
+ccp-objs  := sp-dev.o sp-platform.o
 ccp-$(CONFIG_CRYPTO_DEV_SP_CCP) += ccp-dev.o \
 	    ccp-ops.o \
 	    ccp-dev-v3.o \
 	    ccp-dev-v5.o \
 	    ccp-dmaengine.o \
 	    ccp-debugfs.o
-ccp-$(CONFIG_PCI) += ccp-pci.o
+ccp-$(CONFIG_PCI) += sp-pci.o
 
 obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o
 ccp-crypto-objs := ccp-crypto-main.o \
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index 193f309736944..b959e2402aa2c 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -626,12 +626,6 @@ struct ccp5_desc {
 	struct dword7 dw7;
 };
 
-int ccp_pci_init(void);
-void ccp_pci_exit(void);
-
-int ccp_platform_init(void);
-void ccp_platform_exit(void);
-
 void ccp_add_device(struct ccp_device *ccp);
 void ccp_del_device(struct ccp_device *ccp);
 
diff --git a/drivers/crypto/ccp/sp-dev.c b/drivers/crypto/ccp/sp-dev.c
index 7e30773443e93..1e5ffad4437bd 100644
--- a/drivers/crypto/ccp/sp-dev.c
+++ b/drivers/crypto/ccp/sp-dev.c
@@ -242,13 +242,13 @@ static int __init sp_mod_init(void)
 #ifdef CONFIG_X86
 	int ret;
 
-	ret = ccp_pci_init();
+	ret = sp_pci_init();
 	if (ret)
 		return ret;
 
 	/* Don't leave the driver loaded if init failed */
 	if (ccp_present() != 0) {
-		ccp_pci_exit();
+		sp_pci_exit();
 		return -ENODEV;
 	}
 
@@ -258,13 +258,13 @@ static int __init sp_mod_init(void)
 #ifdef CONFIG_ARM64
 	int ret;
 
-	ret = ccp_platform_init();
+	ret = sp_platform_init();
 	if (ret)
 		return ret;
 
 	/* Don't leave the driver loaded if init failed */
 	if (ccp_present() != 0) {
-		ccp_platform_exit();
+		sp_platform_exit();
 		return -ENODEV;
 	}
 
@@ -277,11 +277,11 @@ static int __init sp_mod_init(void)
 static void __exit sp_mod_exit(void)
 {
 #ifdef CONFIG_X86
-	ccp_pci_exit();
+	sp_pci_exit();
 #endif
 
 #ifdef CONFIG_ARM64
-	ccp_platform_exit();
+	sp_platform_exit();
 #endif
 }
 
diff --git a/drivers/crypto/ccp/ccp-pci.c b/drivers/crypto/ccp/sp-pci.c
similarity index 71%
rename from drivers/crypto/ccp/ccp-pci.c
rename to drivers/crypto/ccp/sp-pci.c
index b29a093fd1272..9859aa683a283 100644
--- a/drivers/crypto/ccp/ccp-pci.c
+++ b/drivers/crypto/ccp/sp-pci.c
@@ -1,5 +1,5 @@
 /*
- * AMD Cryptographic Coprocessor (CCP) driver
+ * AMD Secure Processor device driver
  *
  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  *
@@ -28,35 +28,35 @@
 
 #define MSIX_VECTORS			2
 
-struct ccp_pci {
+struct sp_pci {
 	int msix_count;
 	struct msix_entry msix_entry[MSIX_VECTORS];
 };
 
-static int ccp_get_msix_irqs(struct sp_device *sp)
+static int sp_get_msix_irqs(struct sp_device *sp)
 {
-	struct ccp_pci *ccp_pci = sp->dev_specific;
+	struct sp_pci *sp_pci = sp->dev_specific;
 	struct device *dev = sp->dev;
 	struct pci_dev *pdev = to_pci_dev(dev);
 	int v, ret;
 
-	for (v = 0; v < ARRAY_SIZE(ccp_pci->msix_entry); v++)
-		ccp_pci->msix_entry[v].entry = v;
+	for (v = 0; v < ARRAY_SIZE(sp_pci->msix_entry); v++)
+		sp_pci->msix_entry[v].entry = v;
 
-	ret = pci_enable_msix_range(pdev, ccp_pci->msix_entry, 1, v);
+	ret = pci_enable_msix_range(pdev, sp_pci->msix_entry, 1, v);
 	if (ret < 0)
 		return ret;
 
-	ccp_pci->msix_count = ret;
+	sp_pci->msix_count = ret;
 	sp->use_tasklet = true;
 
-	sp->psp_irq = ccp_pci->msix_entry[0].vector;
-	sp->ccp_irq = (ccp_pci->msix_count > 1) ? ccp_pci->msix_entry[1].vector
-					       : ccp_pci->msix_entry[0].vector;
+	sp->psp_irq = sp_pci->msix_entry[0].vector;
+	sp->ccp_irq = (sp_pci->msix_count > 1) ? sp_pci->msix_entry[1].vector
+					       : sp_pci->msix_entry[0].vector;
 	return 0;
 }
 
-static int ccp_get_msi_irq(struct sp_device *sp)
+static int sp_get_msi_irq(struct sp_device *sp)
 {
 	struct device *dev = sp->dev;
 	struct pci_dev *pdev = to_pci_dev(dev);
@@ -72,18 +72,18 @@ static int ccp_get_msi_irq(struct sp_device *sp)
 	return 0;
 }
 
-static int ccp_get_irqs(struct sp_device *sp)
+static int sp_get_irqs(struct sp_device *sp)
 {
 	struct device *dev = sp->dev;
 	int ret;
 
-	ret = ccp_get_msix_irqs(sp);
+	ret = sp_get_msix_irqs(sp);
 	if (!ret)
 		return 0;
 
 	/* Couldn't get MSI-X vectors, try MSI */
 	dev_notice(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
-	ret = ccp_get_msi_irq(sp);
+	ret = sp_get_msi_irq(sp);
 	if (!ret)
 		return 0;
 
@@ -93,13 +93,13 @@ static int ccp_get_irqs(struct sp_device *sp)
 	return ret;
 }
 
-static void ccp_free_irqs(struct sp_device *sp)
+static void sp_free_irqs(struct sp_device *sp)
 {
-	struct ccp_pci *ccp_pci = sp->dev_specific;
+	struct sp_pci *sp_pci = sp->dev_specific;
 	struct device *dev = sp->dev;
 	struct pci_dev *pdev = to_pci_dev(dev);
 
-	if (ccp_pci->msix_count)
+	if (sp_pci->msix_count)
 		pci_disable_msix(pdev);
 	else if (sp->psp_irq)
 		pci_disable_msi(pdev);
@@ -108,10 +108,10 @@ static void ccp_free_irqs(struct sp_device *sp)
 	sp->psp_irq = 0;
 }
 
-static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct sp_device *sp;
-	struct ccp_pci *ccp_pci;
+	struct sp_pci *sp_pci;
 	struct device *dev = &pdev->dev;
 	void __iomem * const *iomap_table;
 	int bar_mask;
@@ -122,11 +122,11 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (!sp)
 		goto e_err;
 
-	ccp_pci = devm_kzalloc(dev, sizeof(*ccp_pci), GFP_KERNEL);
-	if (!ccp_pci)
+	sp_pci = devm_kzalloc(dev, sizeof(*sp_pci), GFP_KERNEL);
+	if (!sp_pci)
 		goto e_err;
 
-	sp->dev_specific = ccp_pci;
+	sp->dev_specific = sp_pci;
 	sp->dev_vdata = (struct sp_dev_vdata *)id->driver_data;
 	if (!sp->dev_vdata) {
 		ret = -ENODEV;
@@ -161,7 +161,7 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto e_err;
 	}
 
-	ret = ccp_get_irqs(sp);
+	ret = sp_get_irqs(sp);
 	if (ret)
 		goto e_err;
 
@@ -192,7 +192,7 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	return ret;
 }
 
-static void ccp_pci_remove(struct pci_dev *pdev)
+static void sp_pci_remove(struct pci_dev *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct sp_device *sp = dev_get_drvdata(dev);
@@ -202,13 +202,13 @@ static void ccp_pci_remove(struct pci_dev *pdev)
 
 	sp_destroy(sp);
 
-	ccp_free_irqs(sp);
+	sp_free_irqs(sp);
 
 	dev_notice(dev, "disabled\n");
 }
 
 #ifdef CONFIG_PM
-static int ccp_pci_suspend(struct pci_dev *pdev, pm_message_t state)
+static int sp_pci_suspend(struct pci_dev *pdev, pm_message_t state)
 {
 	struct device *dev = &pdev->dev;
 	struct sp_device *sp = dev_get_drvdata(dev);
@@ -216,7 +216,7 @@ static int ccp_pci_suspend(struct pci_dev *pdev, pm_message_t state)
 	return sp_suspend(sp, state);
 }
 
-static int ccp_pci_resume(struct pci_dev *pdev)
+static int sp_pci_resume(struct pci_dev *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct sp_device *sp = dev_get_drvdata(dev);
@@ -245,32 +245,32 @@ static const struct sp_dev_vdata dev_vdata[] = {
 #endif
 	},
 };
-static const struct pci_device_id ccp_pci_table[] = {
+static const struct pci_device_id sp_pci_table[] = {
 	{ PCI_VDEVICE(AMD, 0x1537), (kernel_ulong_t)&dev_vdata[0] },
 	{ PCI_VDEVICE(AMD, 0x1456), (kernel_ulong_t)&dev_vdata[1] },
 	{ PCI_VDEVICE(AMD, 0x1468), (kernel_ulong_t)&dev_vdata[2] },
 	/* Last entry must be zero */
 	{ 0, }
 };
-MODULE_DEVICE_TABLE(pci, ccp_pci_table);
+MODULE_DEVICE_TABLE(pci, sp_pci_table);
 
-static struct pci_driver ccp_pci_driver = {
+static struct pci_driver sp_pci_driver = {
 	.name = "ccp",
-	.id_table = ccp_pci_table,
-	.probe = ccp_pci_probe,
-	.remove = ccp_pci_remove,
+	.id_table = sp_pci_table,
+	.probe = sp_pci_probe,
+	.remove = sp_pci_remove,
 #ifdef CONFIG_PM
-	.suspend = ccp_pci_suspend,
-	.resume = ccp_pci_resume,
+	.suspend = sp_pci_suspend,
+	.resume = sp_pci_resume,
 #endif
 };
 
-int ccp_pci_init(void)
+int sp_pci_init(void)
 {
-	return pci_register_driver(&ccp_pci_driver);
+	return pci_register_driver(&sp_pci_driver);
 }
 
-void ccp_pci_exit(void)
+void sp_pci_exit(void)
 {
-	pci_unregister_driver(&ccp_pci_driver);
+	pci_unregister_driver(&sp_pci_driver);
 }
diff --git a/drivers/crypto/ccp/ccp-platform.c b/drivers/crypto/ccp/sp-platform.c
similarity index 67%
rename from drivers/crypto/ccp/ccp-platform.c
rename to drivers/crypto/ccp/sp-platform.c
index 04a1cc42dbb27..71734f254fd15 100644
--- a/drivers/crypto/ccp/ccp-platform.c
+++ b/drivers/crypto/ccp/sp-platform.c
@@ -1,5 +1,5 @@
 /*
- * AMD Cryptographic Coprocessor (CCP) driver
+ * AMD Secure Processor device driver
  *
  * Copyright (C) 2014,2016 Advanced Micro Devices, Inc.
  *
@@ -28,41 +28,41 @@
 
 #include "ccp-dev.h"
 
-struct ccp_platform {
+struct sp_platform {
 	int coherent;
 	unsigned int irq_count;
 };
 
-static const struct acpi_device_id ccp_acpi_match[];
-static const struct of_device_id ccp_of_match[];
+static const struct acpi_device_id sp_acpi_match[];
+static const struct of_device_id sp_of_match[];
 
-static struct sp_dev_vdata *ccp_get_of_version(struct platform_device *pdev)
+static struct sp_dev_vdata *sp_get_of_version(struct platform_device *pdev)
 {
 #ifdef CONFIG_OF
 	const struct of_device_id *match;
 
-	match = of_match_node(ccp_of_match, pdev->dev.of_node);
+	match = of_match_node(sp_of_match, pdev->dev.of_node);
 	if (match && match->data)
 		return (struct sp_dev_vdata *)match->data;
 #endif
 	return NULL;
 }
 
-static struct sp_dev_vdata *ccp_get_acpi_version(struct platform_device *pdev)
+static struct sp_dev_vdata *sp_get_acpi_version(struct platform_device *pdev)
 {
 #ifdef CONFIG_ACPI
 	const struct acpi_device_id *match;
 
-	match = acpi_match_device(ccp_acpi_match, &pdev->dev);
+	match = acpi_match_device(sp_acpi_match, &pdev->dev);
 	if (match && match->driver_data)
 		return (struct sp_dev_vdata *)match->driver_data;
 #endif
 	return NULL;
 }
 
-static int ccp_get_irqs(struct sp_device *sp)
+static int sp_get_irqs(struct sp_device *sp)
 {
-	struct ccp_platform *ccp_platform = sp->dev_specific;
+	struct sp_platform *sp_platform = sp->dev_specific;
 	struct device *dev = sp->dev;
 	struct platform_device *pdev = to_platform_device(dev);
 	unsigned int i, count;
@@ -75,7 +75,7 @@ static int ccp_get_irqs(struct sp_device *sp)
 			count++;
 	}
 
-	ccp_platform->irq_count = count;
+	sp_platform->irq_count = count;
 
 	ret = platform_get_irq(pdev, 0);
 	if (ret < 0) {
@@ -99,10 +99,10 @@ static int ccp_get_irqs(struct sp_device *sp)
 	return 0;
 }
 
-static int ccp_platform_probe(struct platform_device *pdev)
+static int sp_platform_probe(struct platform_device *pdev)
 {
 	struct sp_device *sp;
-	struct ccp_platform *ccp_platform;
+	struct sp_platform *sp_platform;
 	struct device *dev = &pdev->dev;
 	enum dev_dma_attr attr;
 	struct resource *ior;
@@ -113,13 +113,13 @@ static int ccp_platform_probe(struct platform_device *pdev)
 	if (!sp)
 		goto e_err;
 
-	ccp_platform = devm_kzalloc(dev, sizeof(*ccp_platform), GFP_KERNEL);
-	if (!ccp_platform)
+	sp_platform = devm_kzalloc(dev, sizeof(*sp_platform), GFP_KERNEL);
+	if (!sp_platform)
 		goto e_err;
 
-	sp->dev_specific = ccp_platform;
-	sp->dev_vdata = pdev->dev.of_node ? ccp_get_of_version(pdev)
-					 : ccp_get_acpi_version(pdev);
+	sp->dev_specific = sp_platform;
+	sp->dev_vdata = pdev->dev.of_node ? sp_get_of_version(pdev)
+					 : sp_get_acpi_version(pdev);
 	if (!sp->dev_vdata) {
 		ret = -ENODEV;
 		dev_err(dev, "missing driver data\n");
@@ -139,8 +139,8 @@ static int ccp_platform_probe(struct platform_device *pdev)
 		goto e_err;
 	}
 
-	ccp_platform->coherent = (attr == DEV_DMA_COHERENT);
-	if (ccp_platform->coherent)
+	sp_platform->coherent = (attr == DEV_DMA_COHERENT);
+	if (sp_platform->coherent)
 		sp->axcache = CACHE_WB_NO_ALLOC;
 	else
 		sp->axcache = CACHE_NONE;
@@ -151,7 +151,7 @@ static int ccp_platform_probe(struct platform_device *pdev)
 		goto e_err;
 	}
 
-	ret = ccp_get_irqs(sp);
+	ret = sp_get_irqs(sp);
 	if (ret)
 		goto e_err;
 
@@ -170,7 +170,7 @@ static int ccp_platform_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int ccp_platform_remove(struct platform_device *pdev)
+static int sp_platform_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct sp_device *sp = dev_get_drvdata(dev);
@@ -183,7 +183,7 @@ static int ccp_platform_remove(struct platform_device *pdev)
 }
 
 #ifdef CONFIG_PM
-static int ccp_platform_suspend(struct platform_device *pdev,
+static int sp_platform_suspend(struct platform_device *pdev,
 				pm_message_t state)
 {
 	struct device *dev = &pdev->dev;
@@ -192,7 +192,7 @@ static int ccp_platform_suspend(struct platform_device *pdev,
 	return sp_suspend(sp, state);
 }
 
-static int ccp_platform_resume(struct platform_device *pdev)
+static int sp_platform_resume(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct sp_device *sp = dev_get_drvdata(dev);
@@ -211,46 +211,46 @@ static const struct sp_dev_vdata dev_vdata[] = {
 };
 
 #ifdef CONFIG_ACPI
-static const struct acpi_device_id ccp_acpi_match[] = {
+static const struct acpi_device_id sp_acpi_match[] = {
 	{ "AMDI0C00", (kernel_ulong_t)&dev_vdata[0] },
 	{ },
 };
-MODULE_DEVICE_TABLE(acpi, ccp_acpi_match);
+MODULE_DEVICE_TABLE(acpi, sp_acpi_match);
 #endif
 
 #ifdef CONFIG_OF
-static const struct of_device_id ccp_of_match[] = {
+static const struct of_device_id sp_of_match[] = {
 	{ .compatible = "amd,ccp-seattle-v1a",
 	  .data = (const void *)&dev_vdata[0] },
 	{ },
 };
-MODULE_DEVICE_TABLE(of, ccp_of_match);
+MODULE_DEVICE_TABLE(of, sp_of_match);
 #endif
 
-static struct platform_driver ccp_platform_driver = {
+static struct platform_driver sp_platform_driver = {
 	.driver = {
 		.name = "ccp",
 #ifdef CONFIG_ACPI
-		.acpi_match_table = ccp_acpi_match,
+		.acpi_match_table = sp_acpi_match,
 #endif
 #ifdef CONFIG_OF
-		.of_match_table = ccp_of_match,
+		.of_match_table = sp_of_match,
 #endif
 	},
-	.probe = ccp_platform_probe,
-	.remove = ccp_platform_remove,
+	.probe = sp_platform_probe,
+	.remove = sp_platform_remove,
 #ifdef CONFIG_PM
-	.suspend = ccp_platform_suspend,
-	.resume = ccp_platform_resume,
+	.suspend = sp_platform_suspend,
+	.resume = sp_platform_resume,
 #endif
 };
 
-int ccp_platform_init(void)
+int sp_platform_init(void)
 {
-	return platform_driver_register(&ccp_platform_driver);
+	return platform_driver_register(&sp_platform_driver);
 }
 
-void ccp_platform_exit(void)
+void sp_platform_exit(void)
 {
-	platform_driver_unregister(&ccp_platform_driver);
+	platform_driver_unregister(&sp_platform_driver);
 }

From 57de3aefb73fec7cebaf5c6f5f47ef6be4416c12 Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Thu, 6 Jul 2017 09:59:17 -0500
Subject: [PATCH 021/116] crypto: ccp - remove ccp_present() check from device
 initialize

Since SP device driver supports multiples devices (e.g CCP, PSP), we
should not fail the driver init just because CCP device is not found.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Acked-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/sp-dev.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/crypto/ccp/sp-dev.c b/drivers/crypto/ccp/sp-dev.c
index 1e5ffad4437bd..bef387c8abfd7 100644
--- a/drivers/crypto/ccp/sp-dev.c
+++ b/drivers/crypto/ccp/sp-dev.c
@@ -246,12 +246,6 @@ static int __init sp_mod_init(void)
 	if (ret)
 		return ret;
 
-	/* Don't leave the driver loaded if init failed */
-	if (ccp_present() != 0) {
-		sp_pci_exit();
-		return -ENODEV;
-	}
-
 	return 0;
 #endif
 
@@ -262,12 +256,6 @@ static int __init sp_mod_init(void)
 	if (ret)
 		return ret;
 
-	/* Don't leave the driver loaded if init failed */
-	if (ccp_present() != 0) {
-		sp_platform_exit();
-		return -ENODEV;
-	}
-
 	return 0;
 #endif
 

From 24086e3d44e1282e64f4bb4801838e554f43a709 Mon Sep 17 00:00:00 2001
From: Chris Gorman <chrisjohgorman@gmail.com>
Date: Thu, 6 Jul 2017 14:44:56 -0400
Subject: [PATCH 022/116] crypto: geode-aes - fixed coding style warnings and
 error

fixed WARNING: Block comments should align the * on each line
fixed WARNINGs: Missing a blank line after declarations
fixed ERROR: space prohibited before that ',' (ctx:WxE)

Signed-off-by: Chris Gorman <chrisjohgorman@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/geode-aes.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/crypto/geode-aes.c b/drivers/crypto/geode-aes.c
index fe538e5287a55..eb2a0a73cbed1 100644
--- a/drivers/crypto/geode-aes.c
+++ b/drivers/crypto/geode-aes.c
@@ -1,10 +1,10 @@
  /* Copyright (C) 2004-2006, Advanced Micro Devices, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+  *
+  * This program is free software; you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+  * the Free Software Foundation; either version 2 of the License, or
+  * (at your option) any later version.
+  */
 
 #include <linux/module.h>
 #include <linux/kernel.h>
@@ -30,6 +30,7 @@ static inline void
 _writefield(u32 offset, void *value)
 {
 	int i;
+
 	for (i = 0; i < 4; i++)
 		iowrite32(((u32 *) value)[i], _iobase + offset + (i * 4));
 }
@@ -39,6 +40,7 @@ static inline void
 _readfield(u32 offset, void *value)
 {
 	int i;
+
 	for (i = 0; i < 4; i++)
 		((u32 *) value)[i] = ioread32(_iobase + offset + (i * 4));
 }
@@ -515,6 +517,7 @@ static void geode_aes_remove(struct pci_dev *dev)
 static int geode_aes_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	int ret;
+
 	ret = pci_enable_device(dev);
 	if (ret)
 		return ret;
@@ -570,7 +573,7 @@ static int geode_aes_probe(struct pci_dev *dev, const struct pci_device_id *id)
 }
 
 static struct pci_device_id geode_aes_tbl[] = {
-	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_LX_AES), } ,
+	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_LX_AES), },
 	{ 0, }
 };
 

From c6090480b27ac7148bf4087f2112ed9c05f34c2b Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Fri, 7 Jul 2017 01:33:33 -0500
Subject: [PATCH 023/116] crypto: brcm - add NULL check on of_match_device()
 return value

Check return value from call to of_match_device()
in order to prevent a NULL pointer dereference.

In case of NULL print error message and return -ENODEV

Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/bcm/cipher.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c
index 9cfd36c1bcb63..f1a826f97fec6 100644
--- a/drivers/crypto/bcm/cipher.c
+++ b/drivers/crypto/bcm/cipher.c
@@ -4813,6 +4813,11 @@ static int spu_dt_read(struct platform_device *pdev)
 	int err;
 
 	match = of_match_device(of_match_ptr(bcm_spu_dt_ids), dev);
+	if (!match) {
+		dev_err(&pdev->dev, "Failed to match device\n");
+		return -ENODEV;
+	}
+
 	matched_spu_type = match->data;
 
 	if (iproc_priv.spu.num_spu > 1) {

From 84ea95436b83884fa55780618ffaf4bbe3312166 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
Date: Mon, 10 Jul 2017 08:40:27 +0300
Subject: [PATCH 024/116] crypto: caam/qi - fix typo in authenc alg driver name
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

s/desi/des for echainiv(authenc(hmac(sha256),cbc(des))) alg.

Cc: <stable@vger.kernel.org>
Fixes: b189817cf7894 ("crypto: caam/qi - add ablkcipher and authenc algorithms")
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/caamalg_qi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c
index 78c4c0485c582..a614ea998a876 100644
--- a/drivers/crypto/caam/caamalg_qi.c
+++ b/drivers/crypto/caam/caamalg_qi.c
@@ -1968,7 +1968,7 @@ static struct caam_aead_alg driver_aeads[] = {
 				.cra_name = "echainiv(authenc(hmac(sha256),"
 					    "cbc(des)))",
 				.cra_driver_name = "echainiv-authenc-"
-						   "hmac-sha256-cbc-desi-"
+						   "hmac-sha256-cbc-des-"
 						   "caam-qi",
 				.cra_blocksize = DES_BLOCK_SIZE,
 			},

From 972b812bd1e17cb0a9112f565951795f886fcc94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
Date: Mon, 10 Jul 2017 08:40:28 +0300
Subject: [PATCH 025/116] crypto: caam/qi - fix compilation with DEBUG enabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

caam/qi driver does not compile when DEBUG is enabled
(CRYPTO_DEV_FSL_CAAM_DEBUG=y):

drivers/crypto/caam/caamalg_qi.c: In function 'ablkcipher_done':
drivers/crypto/caam/caamalg_qi.c:794:2: error: implicit declaration of function 'dbg_dump_sg' [-Werror=implicit-function-declaration]
  dbg_dump_sg(KERN_ERR, "dst    @" __stringify(__LINE__)": ",

Since dbg_dump_sg() is shared between caam/jr and caam/qi, move it
in a shared location and export it.

At the same time:
-reduce ifdeferry by providing a no-op implementation for !DEBUG case
-rename it to caam_dump_sg() to be consistent in terms of
exported symbols namespace (caam_*)

Cc: <stable@vger.kernel.org>
Fixes: b189817cf789 ("crypto: caam/qi - add ablkcipher and authenc algorithms")
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/caamalg.c    | 66 ++++++++------------------------
 drivers/crypto/caam/caamalg_qi.c |  6 +--
 drivers/crypto/caam/error.c      | 40 +++++++++++++++++++
 drivers/crypto/caam/error.h      |  4 ++
 4 files changed, 62 insertions(+), 54 deletions(-)

diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index 0488b7f81dcf3..54f3b375a453b 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -81,40 +81,6 @@
 #define debug(format, arg...)
 #endif
 
-#ifdef DEBUG
-#include <linux/highmem.h>
-
-static void dbg_dump_sg(const char *level, const char *prefix_str,
-			int prefix_type, int rowsize, int groupsize,
-			struct scatterlist *sg, size_t tlen, bool ascii)
-{
-	struct scatterlist *it;
-	void *it_page;
-	size_t len;
-	void *buf;
-
-	for (it = sg; it != NULL && tlen > 0 ; it = sg_next(sg)) {
-		/*
-		 * make sure the scatterlist's page
-		 * has a valid virtual memory mapping
-		 */
-		it_page = kmap_atomic(sg_page(it));
-		if (unlikely(!it_page)) {
-			printk(KERN_ERR "dbg_dump_sg: kmap failed\n");
-			return;
-		}
-
-		buf = it_page + it->offset;
-		len = min_t(size_t, tlen, it->length);
-		print_hex_dump(level, prefix_str, prefix_type, rowsize,
-			       groupsize, buf, len, ascii);
-		tlen -= len;
-
-		kunmap_atomic(it_page);
-	}
-}
-#endif
-
 static struct list_head alg_list;
 
 struct caam_alg_entry {
@@ -898,10 +864,10 @@ static void ablkcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
 	print_hex_dump(KERN_ERR, "dstiv  @"__stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       edesc->src_nents > 1 ? 100 : ivsize, 1);
-	dbg_dump_sg(KERN_ERR, "dst    @"__stringify(__LINE__)": ",
-		    DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
-		    edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
 #endif
+	caam_dump_sg(KERN_ERR, "dst    @" __stringify(__LINE__)": ",
+		     DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
+		     edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
 
 	ablkcipher_unmap(jrdev, edesc, req);
 
@@ -937,10 +903,10 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
 	print_hex_dump(KERN_ERR, "dstiv  @"__stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       ivsize, 1);
-	dbg_dump_sg(KERN_ERR, "dst    @"__stringify(__LINE__)": ",
-		    DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
-		    edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
 #endif
+	caam_dump_sg(KERN_ERR, "dst    @" __stringify(__LINE__)": ",
+		     DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
+		     edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
 
 	ablkcipher_unmap(jrdev, edesc, req);
 
@@ -1107,10 +1073,10 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
 		       ivsize, 1);
 	pr_err("asked=%d, nbytes%d\n",
 	       (int)edesc->src_nents > 1 ? 100 : req->nbytes, req->nbytes);
-	dbg_dump_sg(KERN_ERR, "src    @"__stringify(__LINE__)": ",
-		    DUMP_PREFIX_ADDRESS, 16, 4, req->src,
-		    edesc->src_nents > 1 ? 100 : req->nbytes, 1);
 #endif
+	caam_dump_sg(KERN_ERR, "src    @" __stringify(__LINE__)": ",
+		     DUMP_PREFIX_ADDRESS, 16, 4, req->src,
+		     edesc->src_nents > 1 ? 100 : req->nbytes, 1);
 
 	len = desc_len(sh_desc);
 	init_job_desc_shared(desc, ptr, len, HDR_SHARE_DEFER | HDR_REVERSE);
@@ -1164,10 +1130,10 @@ static void init_ablkcipher_giv_job(u32 *sh_desc, dma_addr_t ptr,
 	print_hex_dump(KERN_ERR, "presciv@" __stringify(__LINE__) ": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       ivsize, 1);
-	dbg_dump_sg(KERN_ERR, "src    @" __stringify(__LINE__) ": ",
-		    DUMP_PREFIX_ADDRESS, 16, 4, req->src,
-		    edesc->src_nents > 1 ? 100 : req->nbytes, 1);
 #endif
+	caam_dump_sg(KERN_ERR, "src    @" __stringify(__LINE__) ": ",
+		     DUMP_PREFIX_ADDRESS, 16, 4, req->src,
+		     edesc->src_nents > 1 ? 100 : req->nbytes, 1);
 
 	len = desc_len(sh_desc);
 	init_job_desc_shared(desc, ptr, len, HDR_SHARE_DEFER | HDR_REVERSE);
@@ -1449,11 +1415,9 @@ static int aead_decrypt(struct aead_request *req)
 	u32 *desc;
 	int ret = 0;
 
-#ifdef DEBUG
-	dbg_dump_sg(KERN_ERR, "dec src@"__stringify(__LINE__)": ",
-		    DUMP_PREFIX_ADDRESS, 16, 4, req->src,
-		    req->assoclen + req->cryptlen, 1);
-#endif
+	caam_dump_sg(KERN_ERR, "dec src@" __stringify(__LINE__)": ",
+		     DUMP_PREFIX_ADDRESS, 16, 4, req->src,
+		     req->assoclen + req->cryptlen, 1);
 
 	/* allocate extended descriptor */
 	edesc = aead_edesc_alloc(req, AUTHENC_DESC_JOB_IO_LEN,
diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c
index a614ea998a876..058be8f094ee1 100644
--- a/drivers/crypto/caam/caamalg_qi.c
+++ b/drivers/crypto/caam/caamalg_qi.c
@@ -791,9 +791,9 @@ static void ablkcipher_done(struct caam_drv_req *drv_req, u32 status)
 	print_hex_dump(KERN_ERR, "dstiv  @" __stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       edesc->src_nents > 1 ? 100 : ivsize, 1);
-	dbg_dump_sg(KERN_ERR, "dst    @" __stringify(__LINE__)": ",
-		    DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
-		    edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
+	caam_dump_sg(KERN_ERR, "dst    @" __stringify(__LINE__)": ",
+		     DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
+		     edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
 #endif
 
 	ablkcipher_unmap(qidev, edesc, req);
diff --git a/drivers/crypto/caam/error.c b/drivers/crypto/caam/error.c
index 6f44ccb55c632..3d639f3b45aa0 100644
--- a/drivers/crypto/caam/error.c
+++ b/drivers/crypto/caam/error.c
@@ -9,6 +9,46 @@
 #include "desc.h"
 #include "error.h"
 
+#ifdef DEBUG
+#include <linux/highmem.h>
+
+void caam_dump_sg(const char *level, const char *prefix_str, int prefix_type,
+		  int rowsize, int groupsize, struct scatterlist *sg,
+		  size_t tlen, bool ascii)
+{
+	struct scatterlist *it;
+	void *it_page;
+	size_t len;
+	void *buf;
+
+	for (it = sg; it && tlen > 0 ; it = sg_next(sg)) {
+		/*
+		 * make sure the scatterlist's page
+		 * has a valid virtual memory mapping
+		 */
+		it_page = kmap_atomic(sg_page(it));
+		if (unlikely(!it_page)) {
+			pr_err("caam_dump_sg: kmap failed\n");
+			return;
+		}
+
+		buf = it_page + it->offset;
+		len = min_t(size_t, tlen, it->length);
+		print_hex_dump(level, prefix_str, prefix_type, rowsize,
+			       groupsize, buf, len, ascii);
+		tlen -= len;
+
+		kunmap_atomic(it_page);
+	}
+}
+#else
+void caam_dump_sg(const char *level, const char *prefix_str, int prefix_type,
+		  int rowsize, int groupsize, struct scatterlist *sg,
+		  size_t tlen, bool ascii)
+{}
+#endif /* DEBUG */
+EXPORT_SYMBOL(caam_dump_sg);
+
 static const struct {
 	u8 value;
 	const char *error_text;
diff --git a/drivers/crypto/caam/error.h b/drivers/crypto/caam/error.h
index b6350b0d91539..250e1a21c473e 100644
--- a/drivers/crypto/caam/error.h
+++ b/drivers/crypto/caam/error.h
@@ -8,4 +8,8 @@
 #define CAAM_ERROR_H
 #define CAAM_ERROR_STR_MAX 302
 void caam_jr_strstatus(struct device *jrdev, u32 status);
+
+void caam_dump_sg(const char *level, const char *prefix_str, int prefix_type,
+		  int rowsize, int groupsize, struct scatterlist *sg,
+		  size_t tlen, bool ascii);
 #endif /* CAAM_ERROR_H */

From 1ed289f7b78c34565a33dbe6f8c482e71f493934 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
Date: Mon, 10 Jul 2017 08:40:29 +0300
Subject: [PATCH 026/116] crypto: caam/qi - fix compilation with
 CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

caam/qi driver fails to compile when CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y.
Fix it by making the offending local per_cpu variable global.

Cc: <stable@vger.kernel.org>
Fixes: 67c2315def06c ("crypto: caam - add Queue Interface (QI) backend support")
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/qi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c
index 1990ed460c463..53aed58164164 100644
--- a/drivers/crypto/caam/qi.c
+++ b/drivers/crypto/caam/qi.c
@@ -55,6 +55,7 @@ struct caam_qi_pcpu_priv {
 } ____cacheline_aligned;
 
 static DEFINE_PER_CPU(struct caam_qi_pcpu_priv, pcpu_qipriv);
+static DEFINE_PER_CPU(int, last_cpu);
 
 /*
  * caam_qi_priv - CAAM QI backend private params
@@ -392,7 +393,6 @@ struct caam_drv_ctx *caam_drv_ctx_init(struct device *qidev,
 	dma_addr_t hwdesc;
 	struct caam_drv_ctx *drv_ctx;
 	const cpumask_t *cpus = qman_affine_cpus();
-	static DEFINE_PER_CPU(int, last_cpu);
 
 	num_words = desc_len(sh_desc);
 	if (num_words > MAX_SDLEN) {

From a68a193805224d90bedd94e9e8ac287600f07b78 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
Date: Mon, 10 Jul 2017 08:40:30 +0300
Subject: [PATCH 027/116] crypto: caam/qi - properly set IV after {en,de}crypt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

caam/qi needs a fix similar to what was done for caam/jr in
commit "crypto: caam/qi - properly set IV after {en,de}crypt",
to allow for ablkcipher/skcipher chunking/streaming.

Cc: <stable@vger.kernel.org>
Fixes: b189817cf789 ("crypto: caam/qi - add ablkcipher and authenc algorithms")
Suggested-by: David Gstir <david@sigma-star.at>
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/caamalg_qi.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c
index 058be8f094ee1..fe0185ceac161 100644
--- a/drivers/crypto/caam/caamalg_qi.c
+++ b/drivers/crypto/caam/caamalg_qi.c
@@ -776,9 +776,9 @@ static void ablkcipher_done(struct caam_drv_req *drv_req, u32 status)
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	struct caam_ctx *caam_ctx = crypto_ablkcipher_ctx(ablkcipher);
 	struct device *qidev = caam_ctx->qidev;
-#ifdef DEBUG
 	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
 
+#ifdef DEBUG
 	dev_err(qidev, "%s %d: status 0x%x\n", __func__, __LINE__, status);
 #endif
 
@@ -799,6 +799,13 @@ static void ablkcipher_done(struct caam_drv_req *drv_req, u32 status)
 	ablkcipher_unmap(qidev, edesc, req);
 	qi_cache_free(edesc);
 
+	/*
+	 * The crypto API expects us to set the IV (req->info) to the last
+	 * ciphertext block. This is used e.g. by the CTS mode.
+	 */
+	scatterwalk_map_and_copy(req->info, req->dst, req->nbytes - ivsize,
+				 ivsize, 0);
+
 	ablkcipher_request_complete(req, status);
 }
 

From eb9ba37dc15a6e6b6140eb6d62785ba99b7179d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
Date: Mon, 10 Jul 2017 08:40:31 +0300
Subject: [PATCH 028/116] crypto: caam/qi - handle large number of S/Gs case
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For more than 16 S/G entries, driver currently corrupts memory
on ARMv8, see below KASAN log.
Note: this does not reproduce on PowerPC due to different (smaller)
cache line size - 64 bytes on PPC vs. 128 bytes on ARMv8.

One such use case is one of the cbc(aes) test vectors - with 8 S/G
entries and src != dst. Driver needs 1 (IV) + 2 x 8 = 17 entries,
which goes over the 16 S/G entries limit:
(CAAM_QI_MEMCACHE_SIZE - offsetof(struct ablkcipher_edesc, sgt)) /
sizeof(struct qm_sg_entry) = 256 / 16 = 16 S/Gs

Fix this by:
-increasing object size in caamqicache pool from 512 to 768; this means
the maximum number of S/G entries grows from (at least) 16 to 32
(again, for ARMv8 case of 128-byte cache line)
-add checks in the driver to fail gracefully (ENOMEM) in case the 32 S/G
entries limit is exceeded

==================================================================
BUG: KASAN: slab-out-of-bounds in ablkcipher_edesc_alloc+0x4ec/0xf60
Write of size 1 at addr ffff800021cb6003 by task cryptomgr_test/1394

CPU: 3 PID: 1394 Comm: cryptomgr_test Not tainted 4.12.0-rc7-next-20170703-00023-g72badbcc1ea7-dirty #26
Hardware name: LS1046A RDB Board (DT)
Call trace:
[<ffff20000808ac6c>] dump_backtrace+0x0/0x290
[<ffff20000808b014>] show_stack+0x14/0x1c
[<ffff200008d62c00>] dump_stack+0xa4/0xc8
[<ffff200008264e40>] print_address_description+0x110/0x26c
[<ffff200008265224>] kasan_report+0x1d0/0x2fc
[<ffff2000082637b8>] __asan_store1+0x4c/0x54
[<ffff200008b4884c>] ablkcipher_edesc_alloc+0x4ec/0xf60
[<ffff200008b49304>] ablkcipher_encrypt+0x44/0xcc
[<ffff20000848a61c>] skcipher_encrypt_ablkcipher+0x120/0x138
[<ffff200008495014>] __test_skcipher+0xaec/0xe30
[<ffff200008497088>] test_skcipher+0x6c/0xd8
[<ffff200008497154>] alg_test_skcipher+0x60/0xe4
[<ffff2000084974c4>] alg_test.part.13+0x130/0x304
[<ffff2000084976d4>] alg_test+0x3c/0x68
[<ffff2000084938ac>] cryptomgr_test+0x54/0x5c
[<ffff20000810276c>] kthread+0x188/0x1c8
[<ffff2000080836c0>] ret_from_fork+0x10/0x50

Allocated by task 1394:
 save_stack_trace_tsk+0x0/0x1ac
 save_stack_trace+0x18/0x20
 kasan_kmalloc.part.5+0x48/0x110
 kasan_kmalloc+0x84/0xa0
 kasan_slab_alloc+0x14/0x1c
 kmem_cache_alloc+0x124/0x1e8
 qi_cache_alloc+0x28/0x58
 ablkcipher_edesc_alloc+0x244/0xf60
 ablkcipher_encrypt+0x44/0xcc
 skcipher_encrypt_ablkcipher+0x120/0x138
 __test_skcipher+0xaec/0xe30
 test_skcipher+0x6c/0xd8
 alg_test_skcipher+0x60/0xe4
 alg_test.part.13+0x130/0x304
 alg_test+0x3c/0x68
 cryptomgr_test+0x54/0x5c
 kthread+0x188/0x1c8
 ret_from_fork+0x10/0x50

Freed by task 0:
(stack is not available)

The buggy address belongs to the object at ffff800021cb5e00
 which belongs to the cache caamqicache of size 512
The buggy address is located 3 bytes to the right of
 512-byte region [ffff800021cb5e00, ffff800021cb6000)
The buggy address belongs to the page:
page:ffff7e0000872d00 count:1 mapcount:0 mapping:          (null)
index:0x0 compound_mapcount: 0
flags: 0xfffc00000008100(slab|head)
raw: 0fffc00000008100 0000000000000000 0000000000000000 0000000180190019
raw: dead000000000100 dead000000000200 ffff800931268200 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff800021cb5f00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 ffff800021cb5f80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
>ffff800021cb6000: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
                   ^
 ffff800021cb6080: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff800021cb6100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
==================================================================

Fixes: b189817cf789 ("crypto: caam/qi - add ablkcipher and authenc algorithms")
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/caamalg_qi.c | 32 +++++++++++++++++++++++++++++++-
 drivers/crypto/caam/qi.c         |  3 ---
 drivers/crypto/caam/qi.h         |  3 +++
 3 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c
index fe0185ceac161..85878f163cdda 100644
--- a/drivers/crypto/caam/caamalg_qi.c
+++ b/drivers/crypto/caam/caamalg_qi.c
@@ -411,6 +411,9 @@ struct aead_edesc {
 	dma_addr_t qm_sg_dma;
 	dma_addr_t assoclen_dma;
 	struct caam_drv_req drv_req;
+#define CAAM_QI_MAX_AEAD_SG						\
+	((CAAM_QI_MEMCACHE_SIZE - offsetof(struct aead_edesc, sgt)) /	\
+	 sizeof(struct qm_sg_entry))
 	struct qm_sg_entry sgt[0];
 };
 
@@ -431,6 +434,9 @@ struct ablkcipher_edesc {
 	int qm_sg_bytes;
 	dma_addr_t qm_sg_dma;
 	struct caam_drv_req drv_req;
+#define CAAM_QI_MAX_ABLKCIPHER_SG					    \
+	((CAAM_QI_MEMCACHE_SIZE - offsetof(struct ablkcipher_edesc, sgt)) / \
+	 sizeof(struct qm_sg_entry))
 	struct qm_sg_entry sgt[0];
 };
 
@@ -660,6 +666,14 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 	 */
 	qm_sg_ents = 1 + !!ivsize + mapped_src_nents +
 		     (mapped_dst_nents > 1 ? mapped_dst_nents : 0);
+	if (unlikely(qm_sg_ents > CAAM_QI_MAX_AEAD_SG)) {
+		dev_err(qidev, "Insufficient S/G entries: %d > %lu\n",
+			qm_sg_ents, CAAM_QI_MAX_AEAD_SG);
+		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
+			   iv_dma, ivsize, op_type, 0, 0);
+		qi_cache_free(edesc);
+		return ERR_PTR(-ENOMEM);
+	}
 	sg_table = &edesc->sgt[0];
 	qm_sg_bytes = qm_sg_ents * sizeof(*sg_table);
 
@@ -887,6 +901,15 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	}
 	dst_sg_idx = qm_sg_ents;
 
+	qm_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
+	if (unlikely(qm_sg_ents > CAAM_QI_MAX_ABLKCIPHER_SG)) {
+		dev_err(qidev, "Insufficient S/G entries: %d > %lu\n",
+			qm_sg_ents, CAAM_QI_MAX_ABLKCIPHER_SG);
+		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
+			   iv_dma, ivsize, op_type, 0, 0);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	/* allocate space for base edesc and link tables */
 	edesc = qi_cache_alloc(GFP_DMA | flags);
 	if (unlikely(!edesc)) {
@@ -899,7 +922,6 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	edesc->src_nents = src_nents;
 	edesc->dst_nents = dst_nents;
 	edesc->iv_dma = iv_dma;
-	qm_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
 	sg_table = &edesc->sgt[0];
 	edesc->qm_sg_bytes = qm_sg_ents * sizeof(*sg_table);
 	edesc->drv_req.app_ctx = req;
@@ -1033,6 +1055,14 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
 		qm_sg_ents += 1 + mapped_dst_nents;
 	}
 
+	if (unlikely(qm_sg_ents > CAAM_QI_MAX_ABLKCIPHER_SG)) {
+		dev_err(qidev, "Insufficient S/G entries: %d > %lu\n",
+			qm_sg_ents, CAAM_QI_MAX_ABLKCIPHER_SG);
+		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
+			   iv_dma, ivsize, GIVENCRYPT, 0, 0);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	/* allocate space for base edesc and link tables */
 	edesc = qi_cache_alloc(GFP_DMA | flags);
 	if (!edesc) {
diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c
index 53aed58164164..01284faeee69a 100644
--- a/drivers/crypto/caam/qi.c
+++ b/drivers/crypto/caam/qi.c
@@ -24,9 +24,6 @@
  */
 #define MAX_RSP_FQ_BACKLOG_PER_CPU	256
 
-/* Length of a single buffer in the QI driver memory cache */
-#define CAAM_QI_MEMCACHE_SIZE	512
-
 #define CAAM_QI_ENQUEUE_RETRIES	10000
 
 #define CAAM_NAPI_WEIGHT	63
diff --git a/drivers/crypto/caam/qi.h b/drivers/crypto/caam/qi.h
index 33b0433f5f222..ecb21f207637b 100644
--- a/drivers/crypto/caam/qi.h
+++ b/drivers/crypto/caam/qi.h
@@ -39,6 +39,9 @@
  */
 #define MAX_SDLEN	((CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN) / CAAM_CMD_SZ)
 
+/* Length of a single buffer in the QI driver memory cache */
+#define CAAM_QI_MEMCACHE_SIZE	768
+
 extern bool caam_congested __read_mostly;
 
 /*

From 36cda08f98b1a97159ae8b3b23937d9c324e5437 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
Date: Mon, 10 Jul 2017 08:40:32 +0300
Subject: [PATCH 029/116] crypto: caam/qi - fix AD length endianness in S/G
 entry
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Associated data (AD) length is read by CAAM from an S/G entry
that is initially filled by the GPP.
Accordingly, AD length has to be stored in CAAM endianness.

Fixes: b189817cf789 ("crypto: caam/qi - add ablkcipher and authenc algorithms")
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/caamalg_qi.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c
index 85878f163cdda..1fd83525deb46 100644
--- a/drivers/crypto/caam/caamalg_qi.c
+++ b/drivers/crypto/caam/caamalg_qi.c
@@ -399,6 +399,7 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
  * @iv_dma: dma address of iv for checking continuity and link table
  * @qm_sg_bytes: length of dma mapped h/w link table
  * @qm_sg_dma: bus physical mapped address of h/w link table
+ * @assoclen: associated data length, in CAAM endianness
  * @assoclen_dma: bus physical mapped address of req->assoclen
  * @drv_req: driver-specific request structure
  * @sgt: the h/w link table
@@ -409,6 +410,7 @@ struct aead_edesc {
 	dma_addr_t iv_dma;
 	int qm_sg_bytes;
 	dma_addr_t qm_sg_dma;
+	unsigned int assoclen;
 	dma_addr_t assoclen_dma;
 	struct caam_drv_req drv_req;
 #define CAAM_QI_MAX_AEAD_SG						\
@@ -684,7 +686,8 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 	edesc->drv_req.cbk = aead_done;
 	edesc->drv_req.drv_ctx = drv_ctx;
 
-	edesc->assoclen_dma = dma_map_single(qidev, &req->assoclen, 4,
+	edesc->assoclen = cpu_to_caam32(req->assoclen);
+	edesc->assoclen_dma = dma_map_single(qidev, &edesc->assoclen, 4,
 					     DMA_TO_DEVICE);
 	if (dma_mapping_error(qidev, edesc->assoclen_dma)) {
 		dev_err(qidev, "unable to map assoclen\n");

From 5747ff3091f7328a0c3e63eafedafe80d7953be8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
Date: Mon, 10 Jul 2017 08:40:33 +0300
Subject: [PATCH 030/116] crypto: caam/qi - explicitly set dma_ops
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since ARM64 commit 1dccb598df549 ("arm64: simplify dma_get_ops"),
dma_ops no longer default to swiotlb_dma_ops, but to dummy_dma_ops.

dma_ops have to be explicitly set in the driver - at least for ARM64.

Fixes: 67c2315def06 ("crypto: caam - add Queue Interface (QI) backend support")
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/qi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c
index 01284faeee69a..1c1f3faf63941 100644
--- a/drivers/crypto/caam/qi.c
+++ b/drivers/crypto/caam/qi.c
@@ -734,6 +734,7 @@ int caam_qi_init(struct platform_device *caam_pdev)
 	qi_pdev = platform_device_register_full(&qi_pdev_info);
 	if (IS_ERR(qi_pdev))
 		return PTR_ERR(qi_pdev);
+	set_dma_ops(&qi_pdev->dev, get_dma_ops(ctrldev));
 
 	ctrlpriv = dev_get_drvdata(ctrldev);
 	qidev = &qi_pdev->dev;

From b3b20461d887e08e049e6a2371faceda7c88195e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
Date: Mon, 10 Jul 2017 08:40:34 +0300
Subject: [PATCH 031/116] crypto: caam/qi - remove unused header sg_sw_sec4.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

sg_sw_sec4.h header is not used by caam/qi, thus remove its inclusion.

Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/caamalg_qi.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c
index 1fd83525deb46..2eefc4a26bc2d 100644
--- a/drivers/crypto/caam/caamalg_qi.c
+++ b/drivers/crypto/caam/caamalg_qi.c
@@ -12,7 +12,6 @@
 #include "intern.h"
 #include "desc_constr.h"
 #include "error.h"
-#include "sg_sw_sec4.h"
 #include "sg_sw_qm.h"
 #include "key_gen.h"
 #include "qi.h"

From c7a91eb80b75513c67c0e0b347ac9e9605944341 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
Date: Mon, 10 Jul 2017 08:40:35 +0300
Subject: [PATCH 032/116] crypto: caam/qi - lower driver verbosity
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change log level for some prints from dev_info() to dev_dbg(), low-level
details are needed only when debugging.

Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/qi.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c
index 1c1f3faf63941..6d5a010a1881b 100644
--- a/drivers/crypto/caam/qi.c
+++ b/drivers/crypto/caam/qi.c
@@ -201,8 +201,8 @@ static struct qman_fq *create_caam_req_fq(struct device *qidev,
 		goto init_req_fq_fail;
 	}
 
-	dev_info(qidev, "Allocated request FQ %u for CPU %u\n", req_fq->fqid,
-		 smp_processor_id());
+	dev_dbg(qidev, "Allocated request FQ %u for CPU %u\n", req_fq->fqid,
+		smp_processor_id());
 	return req_fq;
 
 init_req_fq_fail:
@@ -643,7 +643,7 @@ static int alloc_rsp_fq_cpu(struct device *qidev, unsigned int cpu)
 
 	per_cpu(pcpu_qipriv.rsp_fq, cpu) = fq;
 
-	dev_info(qidev, "Allocated response FQ %u for CPU %u", fq->fqid, cpu);
+	dev_dbg(qidev, "Allocated response FQ %u for CPU %u", fq->fqid, cpu);
 	return 0;
 }
 
@@ -676,7 +676,7 @@ static int init_cgr(struct device *qidev)
 		return ret;
 	}
 
-	dev_info(qidev, "Congestion threshold set to %llu\n", val);
+	dev_dbg(qidev, "Congestion threshold set to %llu\n", val);
 	return 0;
 }
 

From bcde1f78cab6741073c32edeecec96e4183d381b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
Date: Mon, 10 Jul 2017 08:40:36 +0300
Subject: [PATCH 033/116] crypto: caam - remove unused sg_to_sec4_sg_len()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

sg_to_sec4_sg_len() is no longer used since
commit 479bcc7c5b9e ("crypto: caam - Convert authenc to new AEAD interface")

Its functionality has been superseded by the usage of sg_nents_for_len()
returning the number of S/G entries corresponding to the provided length.

Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/sg_sw_sec4.h | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h
index c6adad09c9729..2f6bf162bb6c8 100644
--- a/drivers/crypto/caam/sg_sw_sec4.h
+++ b/drivers/crypto/caam/sg_sw_sec4.h
@@ -58,18 +58,3 @@ static inline void sg_to_sec4_sg_last(struct scatterlist *sg, int sg_count,
 	sec4_sg_ptr = sg_to_sec4_sg(sg, sg_count, sec4_sg_ptr, offset);
 	sec4_sg_ptr->len |= cpu_to_caam32(SEC4_SG_LEN_FIN);
 }
-
-static inline struct sec4_sg_entry *sg_to_sec4_sg_len(
-	struct scatterlist *sg, unsigned int total,
-	struct sec4_sg_entry *sec4_sg_ptr)
-{
-	do {
-		unsigned int len = min(sg_dma_len(sg), total);
-
-		dma_to_sec4_sg_one(sec4_sg_ptr, sg_dma_address(sg), len, 0);
-		sec4_sg_ptr++;
-		sg = sg_next(sg);
-		total -= len;
-	} while (total);
-	return sec4_sg_ptr - 1;
-}

From 60a3f737badb0951dd27e93d24cb967dcc1fb855 Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor-dan.ambarus@nxp.com>
Date: Mon, 10 Jul 2017 08:40:37 +0300
Subject: [PATCH 034/116] crypto: caam - remove unused variables in
 caam_drv_private
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Tudor Ambarus <tudor-dan.ambarus@nxp.com>
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/ctrl.c   | 1 -
 drivers/crypto/caam/intern.h | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index dd353e342c12b..7338f15b86741 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -444,7 +444,6 @@ static int caam_probe(struct platform_device *pdev)
 
 	dev = &pdev->dev;
 	dev_set_drvdata(dev, ctrlpriv);
-	ctrlpriv->pdev = pdev;
 	nprop = pdev->dev.of_node;
 
 	/* Enable clocking */
diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h
index 85b6c5835b8f4..9e3f3e0a7ffa1 100644
--- a/drivers/crypto/caam/intern.h
+++ b/drivers/crypto/caam/intern.h
@@ -64,12 +64,9 @@ struct caam_drv_private_jr {
  * Driver-private storage for a single CAAM block instance
  */
 struct caam_drv_private {
-
-	struct device *dev;
 #ifdef CONFIG_CAAM_QI
 	struct device *qidev;
 #endif
-	struct platform_device *pdev;
 
 	/* Physical-presence section */
 	struct caam_ctrl __iomem *ctrl; /* controller region */

From f366af462aef1dcaeab0f68b031e5c4c4eb860e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
Date: Mon, 10 Jul 2017 08:40:38 +0300
Subject: [PATCH 035/116] crypto: caam - clean-up in caam_init_rng()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Clean up the code, as indicated by Coccinelle.

Cc: Julia Lawall <julia.lawall@lip6.fr>
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/caamrng.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index 41398da3edf44..fde07d4ff0190 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -285,11 +285,7 @@ static int caam_init_rng(struct caam_rng_ctx *ctx, struct device *jrdev)
 	if (err)
 		return err;
 
-	err = caam_init_buf(ctx, 1);
-	if (err)
-		return err;
-
-	return 0;
+	return caam_init_buf(ctx, 1);
 }
 
 static struct hwrng caam_rng = {

From ad818080f69cbf064649d423e27af606c65e53ba Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor-dan.ambarus@nxp.com>
Date: Mon, 10 Jul 2017 08:40:39 +0300
Subject: [PATCH 036/116] crypto: caam - fix condition for the jump over key(s)
 command
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SELF condition has no meaning for the SERIAL sharing since the jobs
are executed in the same DECO.

Signed-off-by: Tudor Ambarus <tudor-dan.ambarus@nxp.com>
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/caamalg_desc.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/caam/caamalg_desc.c b/drivers/crypto/caam/caamalg_desc.c
index 6f9c7ec0e3398..530c14ee32de3 100644
--- a/drivers/crypto/caam/caamalg_desc.c
+++ b/drivers/crypto/caam/caamalg_desc.c
@@ -599,7 +599,7 @@ void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
 
 	/* skip key loading if they are loaded due to sharing */
 	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-				   JUMP_COND_SHRD | JUMP_COND_SELF);
+				   JUMP_COND_SHRD);
 	if (cdata->key_inline)
 		append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
 				  cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
@@ -688,8 +688,7 @@ void cnstr_shdsc_gcm_decap(u32 * const desc, struct alginfo *cdata,
 
 	/* skip key loading if they are loaded due to sharing */
 	key_jump_cmd = append_jump(desc, JUMP_JSL |
-				   JUMP_TEST_ALL | JUMP_COND_SHRD |
-				   JUMP_COND_SELF);
+				   JUMP_TEST_ALL | JUMP_COND_SHRD);
 	if (cdata->key_inline)
 		append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
 				  cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);

From 2bbb6983887fefc8026beab01198d30f47b7bd22 Mon Sep 17 00:00:00 2001
From: Harald Freudenberger <freude@linux.vnet.ibm.com>
Date: Tue, 11 Jul 2017 09:36:22 +0200
Subject: [PATCH 037/116] hwrng: use rng source with best quality

This patch rewoks the hwrng to always use the
rng source with best entropy quality.

On registation and unregistration the hwrng now
tries to choose the best (= highest quality value)
rng source. The handling of the internal list
of registered rng sources is now always sorted
by quality and the top most rng chosen.

Signed-off-by: Harald Freudenberger <freude@linux.vnet.ibm.com>
Reviewed-by: PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/core.c | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 503a41dfa1936..e9dda162d4d2a 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -29,6 +29,7 @@
 
 static struct hwrng *current_rng;
 static struct task_struct *hwrng_fill;
+/* list of registered rngs, sorted decending by quality */
 static LIST_HEAD(rng_list);
 /* Protects rng_list and current_rng */
 static DEFINE_MUTEX(rng_mutex);
@@ -417,6 +418,7 @@ int hwrng_register(struct hwrng *rng)
 {
 	int err = -EINVAL;
 	struct hwrng *old_rng, *tmp;
+	struct list_head *rng_list_ptr;
 
 	if (!rng->name || (!rng->data_read && !rng->read))
 		goto out;
@@ -432,14 +434,25 @@ int hwrng_register(struct hwrng *rng)
 	init_completion(&rng->cleanup_done);
 	complete(&rng->cleanup_done);
 
+	/* rng_list is sorted by decreasing quality */
+	list_for_each(rng_list_ptr, &rng_list) {
+		tmp = list_entry(rng_list_ptr, struct hwrng, list);
+		if (tmp->quality < rng->quality)
+			break;
+	}
+	list_add_tail(&rng->list, rng_list_ptr);
+
 	old_rng = current_rng;
 	err = 0;
-	if (!old_rng) {
+	if (!old_rng || (rng->quality > old_rng->quality)) {
+		/*
+		 * Set new rng as current as the new rng source
+		 * provides better entropy quality.
+		 */
 		err = set_current_rng(rng);
 		if (err)
 			goto out_unlock;
 	}
-	list_add_tail(&rng->list, &rng_list);
 
 	if (old_rng && !rng->init) {
 		/*
@@ -466,12 +479,12 @@ void hwrng_unregister(struct hwrng *rng)
 	list_del(&rng->list);
 	if (current_rng == rng) {
 		drop_current_rng();
+		/* rng_list is sorted by quality, use the best (=first) one */
 		if (!list_empty(&rng_list)) {
-			struct hwrng *tail;
-
-			tail = list_entry(rng_list.prev, struct hwrng, list);
+			struct hwrng *new_rng;
 
-			set_current_rng(tail);
+			new_rng = list_entry(rng_list.next, struct hwrng, list);
+			set_current_rng(new_rng);
 		}
 	}
 

From 10a515ddb5f19a1ff0b9882c430b4427843169f3 Mon Sep 17 00:00:00 2001
From: Harald Freudenberger <freude@linux.vnet.ibm.com>
Date: Tue, 11 Jul 2017 09:36:23 +0200
Subject: [PATCH 038/116] hwrng: remember rng chosen by user

When a user chooses a rng source via sysfs attribute
this rng should be sticky, even when other sources
with better quality to register. This patch introduces
a simple way to remember the user's choice. This is
reflected by a new sysfs attribute file 'rng_selected'
which shows if the current rng has been chosen by
userspace. The new attribute file shows '1' for user
selected rng and '0' otherwise.

Signed-off-by: Harald Freudenberger <freude@linux.vnet.ibm.com>
Reviewed-by: PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/core.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index e9dda162d4d2a..9701ac7d8b471 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -28,6 +28,8 @@
 #define RNG_MODULE_NAME		"hw_random"
 
 static struct hwrng *current_rng;
+/* the current rng has been explicitly chosen by user via sysfs */
+static int cur_rng_set_by_user;
 static struct task_struct *hwrng_fill;
 /* list of registered rngs, sorted decending by quality */
 static LIST_HEAD(rng_list);
@@ -304,6 +306,7 @@ static ssize_t hwrng_attr_current_store(struct device *dev,
 	list_for_each_entry(rng, &rng_list, list) {
 		if (sysfs_streq(rng->name, buf)) {
 			err = 0;
+			cur_rng_set_by_user = 1;
 			if (rng != current_rng)
 				err = set_current_rng(rng);
 			break;
@@ -352,16 +355,27 @@ static ssize_t hwrng_attr_available_show(struct device *dev,
 	return strlen(buf);
 }
 
+static ssize_t hwrng_attr_selected_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", cur_rng_set_by_user);
+}
+
 static DEVICE_ATTR(rng_current, S_IRUGO | S_IWUSR,
 		   hwrng_attr_current_show,
 		   hwrng_attr_current_store);
 static DEVICE_ATTR(rng_available, S_IRUGO,
 		   hwrng_attr_available_show,
 		   NULL);
+static DEVICE_ATTR(rng_selected, S_IRUGO,
+		   hwrng_attr_selected_show,
+		   NULL);
 
 static struct attribute *rng_dev_attrs[] = {
 	&dev_attr_rng_current.attr,
 	&dev_attr_rng_available.attr,
+	&dev_attr_rng_selected.attr,
 	NULL
 };
 
@@ -444,10 +458,12 @@ int hwrng_register(struct hwrng *rng)
 
 	old_rng = current_rng;
 	err = 0;
-	if (!old_rng || (rng->quality > old_rng->quality)) {
+	if (!old_rng ||
+	    (!cur_rng_set_by_user && rng->quality > old_rng->quality)) {
 		/*
 		 * Set new rng as current as the new rng source
-		 * provides better entropy quality.
+		 * provides better entropy quality and was not
+		 * chosen by userspace.
 		 */
 		err = set_current_rng(rng);
 		if (err)
@@ -479,6 +495,7 @@ void hwrng_unregister(struct hwrng *rng)
 	list_del(&rng->list);
 	if (current_rng == rng) {
 		drop_current_rng();
+		cur_rng_set_by_user = 0;
 		/* rng_list is sorted by quality, use the best (=first) one */
 		if (!list_empty(&rng_list)) {
 			struct hwrng *new_rng;

From e870456d8e7c8d57c059ea479b5aadbb55ff4c3a Mon Sep 17 00:00:00 2001
From: Stephan Mueller <smueller@chronox.de>
Date: Sun, 25 Jun 2017 17:12:39 +0200
Subject: [PATCH 039/116] crypto: algif_skcipher - overhaul memory management

The updated memory management is described in the top part of the code.
As one benefit of the changed memory management, the AIO and synchronous
operation is now implemented in one common function. The AF_ALG
operation uses the async kernel crypto API interface for each cipher
operation. Thus, the only difference between the AIO and sync operation
types visible from user space is:

1. the callback function to be invoked when the asynchronous operation
   is completed

2. whether to wait for the completion of the kernel crypto API operation
   or not

In addition, the code structure is adjusted to match the structure of
algif_aead for easier code assessment.

The user space interface changed slightly as follows: the old AIO
operation returned zero upon success and < 0 in case of an error to user
space. As all other AF_ALG interfaces (including the sync skcipher
interface) returned the number of processed bytes upon success and < 0
in case of an error, the new skcipher interface (regardless of AIO or
sync) returns the number of processed bytes in case of success.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algif_skcipher.c | 561 ++++++++++++++++++++--------------------
 1 file changed, 279 insertions(+), 282 deletions(-)

diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index 43839b00fe6c4..968d094f0bcc0 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -10,6 +10,21 @@
  * Software Foundation; either version 2 of the License, or (at your option)
  * any later version.
  *
+ * The following concept of the memory management is used:
+ *
+ * The kernel maintains two SGLs, the TX SGL and the RX SGL. The TX SGL is
+ * filled by user space with the data submitted via sendpage/sendmsg. Filling
+ * up the TX SGL does not cause a crypto operation -- the data will only be
+ * tracked by the kernel. Upon receipt of one recvmsg call, the caller must
+ * provide a buffer which is tracked with the RX SGL.
+ *
+ * During the processing of the recvmsg operation, the cipher request is
+ * allocated and prepared. As part of the recvmsg operation, the processed
+ * TX buffers are extracted from the TX SGL into a separate SGL.
+ *
+ * After the completion of the crypto operation, the RX SGL and the cipher
+ * request is released. The extracted TX SGL parts are released together with
+ * the RX SGL release.
  */
 
 #include <crypto/scatterwalk.h>
@@ -24,109 +39,94 @@
 #include <linux/net.h>
 #include <net/sock.h>
 
-struct skcipher_sg_list {
+struct skcipher_tsgl {
 	struct list_head list;
-
 	int cur;
-
 	struct scatterlist sg[0];
 };
 
+struct skcipher_rsgl {
+	struct af_alg_sgl sgl;
+	struct list_head list;
+	size_t sg_num_bytes;
+};
+
+struct skcipher_async_req {
+	struct kiocb *iocb;
+	struct sock *sk;
+
+	struct skcipher_rsgl first_sgl;
+	struct list_head rsgl_list;
+
+	struct scatterlist *tsgl;
+	unsigned int tsgl_entries;
+
+	unsigned int areqlen;
+	struct skcipher_request req;
+};
+
 struct skcipher_tfm {
 	struct crypto_skcipher *skcipher;
 	bool has_key;
 };
 
 struct skcipher_ctx {
-	struct list_head tsgl;
-	struct af_alg_sgl rsgl;
+	struct list_head tsgl_list;
 
 	void *iv;
 
 	struct af_alg_completion completion;
 
-	atomic_t inflight;
 	size_t used;
+	size_t rcvused;
 
-	unsigned int len;
 	bool more;
 	bool merge;
 	bool enc;
 
-	struct skcipher_request req;
-};
-
-struct skcipher_async_rsgl {
-	struct af_alg_sgl sgl;
-	struct list_head list;
-};
-
-struct skcipher_async_req {
-	struct kiocb *iocb;
-	struct skcipher_async_rsgl first_sgl;
-	struct list_head list;
-	struct scatterlist *tsg;
-	atomic_t *inflight;
-	struct skcipher_request req;
+	unsigned int len;
 };
 
-#define MAX_SGL_ENTS ((4096 - sizeof(struct skcipher_sg_list)) / \
+#define MAX_SGL_ENTS ((4096 - sizeof(struct skcipher_tsgl)) / \
 		      sizeof(struct scatterlist) - 1)
 
-static void skcipher_free_async_sgls(struct skcipher_async_req *sreq)
+static inline int skcipher_sndbuf(struct sock *sk)
 {
-	struct skcipher_async_rsgl *rsgl, *tmp;
-	struct scatterlist *sgl;
-	struct scatterlist *sg;
-	int i, n;
-
-	list_for_each_entry_safe(rsgl, tmp, &sreq->list, list) {
-		af_alg_free_sg(&rsgl->sgl);
-		if (rsgl != &sreq->first_sgl)
-			kfree(rsgl);
-	}
-	sgl = sreq->tsg;
-	n = sg_nents(sgl);
-	for_each_sg(sgl, sg, n, i)
-		put_page(sg_page(sg));
+	struct alg_sock *ask = alg_sk(sk);
+	struct skcipher_ctx *ctx = ask->private;
 
-	kfree(sreq->tsg);
+	return max_t(int, max_t(int, sk->sk_sndbuf & PAGE_MASK, PAGE_SIZE) -
+			  ctx->used, 0);
 }
 
-static void skcipher_async_cb(struct crypto_async_request *req, int err)
+static inline bool skcipher_writable(struct sock *sk)
 {
-	struct skcipher_async_req *sreq = req->data;
-	struct kiocb *iocb = sreq->iocb;
-
-	atomic_dec(sreq->inflight);
-	skcipher_free_async_sgls(sreq);
-	kzfree(sreq);
-	iocb->ki_complete(iocb, err, err);
+	return PAGE_SIZE <= skcipher_sndbuf(sk);
 }
 
-static inline int skcipher_sndbuf(struct sock *sk)
+static inline int skcipher_rcvbuf(struct sock *sk)
 {
 	struct alg_sock *ask = alg_sk(sk);
 	struct skcipher_ctx *ctx = ask->private;
 
-	return max_t(int, max_t(int, sk->sk_sndbuf & PAGE_MASK, PAGE_SIZE) -
-			  ctx->used, 0);
+	return max_t(int, max_t(int, sk->sk_rcvbuf & PAGE_MASK, PAGE_SIZE) -
+			  ctx->rcvused, 0);
 }
 
-static inline bool skcipher_writable(struct sock *sk)
+static inline bool skcipher_readable(struct sock *sk)
 {
-	return PAGE_SIZE <= skcipher_sndbuf(sk);
+	return PAGE_SIZE <= skcipher_rcvbuf(sk);
 }
 
-static int skcipher_alloc_sgl(struct sock *sk)
+static int skcipher_alloc_tsgl(struct sock *sk)
 {
 	struct alg_sock *ask = alg_sk(sk);
 	struct skcipher_ctx *ctx = ask->private;
-	struct skcipher_sg_list *sgl;
+	struct skcipher_tsgl *sgl;
 	struct scatterlist *sg = NULL;
 
-	sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
-	if (!list_empty(&ctx->tsgl))
+	sgl = list_entry(ctx->tsgl_list.prev, struct skcipher_tsgl, list);
+	if (!list_empty(&ctx->tsgl_list))
 		sg = sgl->sg;
 
 	if (!sg || sgl->cur >= MAX_SGL_ENTS) {
@@ -142,31 +142,66 @@ static int skcipher_alloc_sgl(struct sock *sk)
 		if (sg)
 			sg_chain(sg, MAX_SGL_ENTS + 1, sgl->sg);
 
-		list_add_tail(&sgl->list, &ctx->tsgl);
+		list_add_tail(&sgl->list, &ctx->tsgl_list);
 	}
 
 	return 0;
 }
 
-static void skcipher_pull_sgl(struct sock *sk, size_t used, int put)
+static unsigned int skcipher_count_tsgl(struct sock *sk, size_t bytes)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct skcipher_ctx *ctx = ask->private;
+	struct skcipher_tsgl *sgl, *tmp;
+	unsigned int i;
+	unsigned int sgl_count = 0;
+
+	if (!bytes)
+		return 0;
+
+	list_for_each_entry_safe(sgl, tmp, &ctx->tsgl_list, list) {
+		struct scatterlist *sg = sgl->sg;
+
+		for (i = 0; i < sgl->cur; i++) {
+			sgl_count++;
+			if (sg[i].length >= bytes)
+				return sgl_count;
+
+			bytes -= sg[i].length;
+		}
+	}
+
+	return sgl_count;
+}
+
+static void skcipher_pull_tsgl(struct sock *sk, size_t used,
+			       struct scatterlist *dst)
 {
 	struct alg_sock *ask = alg_sk(sk);
 	struct skcipher_ctx *ctx = ask->private;
-	struct skcipher_sg_list *sgl;
+	struct skcipher_tsgl *sgl;
 	struct scatterlist *sg;
-	int i;
+	unsigned int i;
 
-	while (!list_empty(&ctx->tsgl)) {
-		sgl = list_first_entry(&ctx->tsgl, struct skcipher_sg_list,
+	while (!list_empty(&ctx->tsgl_list)) {
+		sgl = list_first_entry(&ctx->tsgl_list, struct skcipher_tsgl,
 				       list);
 		sg = sgl->sg;
 
 		for (i = 0; i < sgl->cur; i++) {
 			size_t plen = min_t(size_t, used, sg[i].length);
+			struct page *page = sg_page(sg + i);
 
-			if (!sg_page(sg + i))
+			if (!page)
 				continue;
 
+			/*
+			 * Assumption: caller created skcipher_count_tsgl(len)
+			 * SG entries in dst.
+			 */
+			if (dst)
+				sg_set_page(dst + i, page, plen, sg[i].offset);
+
 			sg[i].length -= plen;
 			sg[i].offset += plen;
 
@@ -175,27 +210,48 @@ static void skcipher_pull_sgl(struct sock *sk, size_t used, int put)
 
 			if (sg[i].length)
 				return;
-			if (put)
-				put_page(sg_page(sg + i));
+
+			if (!dst)
+				put_page(page);
 			sg_assign_page(sg + i, NULL);
 		}
 
 		list_del(&sgl->list);
-		sock_kfree_s(sk, sgl,
-			     sizeof(*sgl) + sizeof(sgl->sg[0]) *
-					    (MAX_SGL_ENTS + 1));
+		sock_kfree_s(sk, sgl, sizeof(*sgl) + sizeof(sgl->sg[0]) *
+						     (MAX_SGL_ENTS + 1));
 	}
 
 	if (!ctx->used)
 		ctx->merge = 0;
 }
 
-static void skcipher_free_sgl(struct sock *sk)
+static void skcipher_free_areq_sgls(struct skcipher_async_req *areq)
 {
+	struct sock *sk = areq->sk;
 	struct alg_sock *ask = alg_sk(sk);
 	struct skcipher_ctx *ctx = ask->private;
+	struct skcipher_rsgl *rsgl, *tmp;
+	struct scatterlist *tsgl;
+	struct scatterlist *sg;
+	unsigned int i;
+
+	list_for_each_entry_safe(rsgl, tmp, &areq->rsgl_list, list) {
+		ctx->rcvused -= rsgl->sg_num_bytes;
+		af_alg_free_sg(&rsgl->sgl);
+		list_del(&rsgl->list);
+		if (rsgl != &areq->first_sgl)
+			sock_kfree_s(sk, rsgl, sizeof(*rsgl));
+	}
+
+	tsgl = areq->tsgl;
+	for_each_sg(tsgl, sg, areq->tsgl_entries, i) {
+		if (!sg_page(sg))
+			continue;
+		put_page(sg_page(sg));
+	}
 
-	skcipher_pull_sgl(sk, ctx->used, 1);
+	if (areq->tsgl && areq->tsgl_entries)
+		sock_kfree_s(sk, tsgl, areq->tsgl_entries * sizeof(*tsgl));
 }
 
 static int skcipher_wait_for_wmem(struct sock *sk, unsigned flags)
@@ -302,7 +358,7 @@ static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
 	struct skcipher_tfm *skc = pask->private;
 	struct crypto_skcipher *tfm = skc->skcipher;
 	unsigned ivsize = crypto_skcipher_ivsize(tfm);
-	struct skcipher_sg_list *sgl;
+	struct skcipher_tsgl *sgl;
 	struct af_alg_control con = {};
 	long copied = 0;
 	bool enc = 0;
@@ -349,8 +405,8 @@ static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
 		size_t plen;
 
 		if (ctx->merge) {
-			sgl = list_entry(ctx->tsgl.prev,
-					 struct skcipher_sg_list, list);
+			sgl = list_entry(ctx->tsgl_list.prev,
+					 struct skcipher_tsgl, list);
 			sg = sgl->sg + sgl->cur - 1;
 			len = min_t(unsigned long, len,
 				    PAGE_SIZE - sg->offset - sg->length);
@@ -379,11 +435,12 @@ static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
 
 		len = min_t(unsigned long, len, skcipher_sndbuf(sk));
 
-		err = skcipher_alloc_sgl(sk);
+		err = skcipher_alloc_tsgl(sk);
 		if (err)
 			goto unlock;
 
-		sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
+		sgl = list_entry(ctx->tsgl_list.prev, struct skcipher_tsgl,
+				 list);
 		sg = sgl->sg;
 		if (sgl->cur)
 			sg_unmark_end(sg + sgl->cur - 1);
@@ -435,7 +492,7 @@ static ssize_t skcipher_sendpage(struct socket *sock, struct page *page,
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
 	struct skcipher_ctx *ctx = ask->private;
-	struct skcipher_sg_list *sgl;
+	struct skcipher_tsgl *sgl;
 	int err = -EINVAL;
 
 	if (flags & MSG_SENDPAGE_NOTLAST)
@@ -454,12 +511,12 @@ static ssize_t skcipher_sendpage(struct socket *sock, struct page *page,
 			goto unlock;
 	}
 
-	err = skcipher_alloc_sgl(sk);
+	err = skcipher_alloc_tsgl(sk);
 	if (err)
 		goto unlock;
 
 	ctx->merge = 0;
-	sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
+	sgl = list_entry(ctx->tsgl_list.prev, struct skcipher_tsgl, list);
 
 	if (sgl->cur)
 		sg_unmark_end(sgl->sg + sgl->cur - 1);
@@ -480,25 +537,29 @@ static ssize_t skcipher_sendpage(struct socket *sock, struct page *page,
 	return err ?: size;
 }
 
-static int skcipher_all_sg_nents(struct skcipher_ctx *ctx)
+static void skcipher_async_cb(struct crypto_async_request *req, int err)
 {
-	struct skcipher_sg_list *sgl;
-	struct scatterlist *sg;
-	int nents = 0;
+	struct skcipher_async_req *areq = req->data;
+	struct sock *sk = areq->sk;
+	struct kiocb *iocb = areq->iocb;
+	unsigned int resultlen;
 
-	list_for_each_entry(sgl, &ctx->tsgl, list) {
-		sg = sgl->sg;
+	lock_sock(sk);
 
-		while (!sg->length)
-			sg++;
+	/* Buffer size written by crypto operation. */
+	resultlen = areq->req.cryptlen;
 
-		nents += sg_nents(sg);
-	}
-	return nents;
+	skcipher_free_areq_sgls(areq);
+	sock_kfree_s(sk, areq, areq->areqlen);
+	__sock_put(sk);
+
+	iocb->ki_complete(iocb, err ? err : resultlen, 0);
+
+	release_sock(sk);
 }
 
-static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg,
-				  int flags)
+static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
+			     size_t ignored, int flags)
 {
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
@@ -507,215 +568,166 @@ static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg,
 	struct skcipher_ctx *ctx = ask->private;
 	struct skcipher_tfm *skc = pask->private;
 	struct crypto_skcipher *tfm = skc->skcipher;
-	struct skcipher_sg_list *sgl;
-	struct scatterlist *sg;
-	struct skcipher_async_req *sreq;
-	struct skcipher_request *req;
-	struct skcipher_async_rsgl *last_rsgl = NULL;
-	unsigned int txbufs = 0, len = 0, tx_nents;
-	unsigned int reqsize = crypto_skcipher_reqsize(tfm);
-	unsigned int ivsize = crypto_skcipher_ivsize(tfm);
-	int err = -ENOMEM;
-	bool mark = false;
-	char *iv;
-
-	sreq = kzalloc(sizeof(*sreq) + reqsize + ivsize, GFP_KERNEL);
-	if (unlikely(!sreq))
-		goto out;
-
-	req = &sreq->req;
-	iv = (char *)(req + 1) + reqsize;
-	sreq->iocb = msg->msg_iocb;
-	INIT_LIST_HEAD(&sreq->list);
-	sreq->inflight = &ctx->inflight;
+	unsigned int bs = crypto_skcipher_blocksize(tfm);
+	unsigned int areqlen = sizeof(struct skcipher_async_req) +
+			       crypto_skcipher_reqsize(tfm);
+	struct skcipher_async_req *areq;
+	struct skcipher_rsgl *last_rsgl = NULL;
+	int err = 0;
+	size_t len = 0;
 
-	lock_sock(sk);
-	tx_nents = skcipher_all_sg_nents(ctx);
-	sreq->tsg = kcalloc(tx_nents, sizeof(*sg), GFP_KERNEL);
-	if (unlikely(!sreq->tsg))
-		goto unlock;
-	sg_init_table(sreq->tsg, tx_nents);
-	memcpy(iv, ctx->iv, ivsize);
-	skcipher_request_set_tfm(req, tfm);
-	skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP,
-				      skcipher_async_cb, sreq);
+	/* Allocate cipher request for current operation. */
+	areq = sock_kmalloc(sk, areqlen, GFP_KERNEL);
+	if (unlikely(!areq))
+		return -ENOMEM;
+	areq->areqlen = areqlen;
+	areq->sk = sk;
+	INIT_LIST_HEAD(&areq->rsgl_list);
+	areq->tsgl = NULL;
+	areq->tsgl_entries = 0;
 
-	while (iov_iter_count(&msg->msg_iter)) {
-		struct skcipher_async_rsgl *rsgl;
-		int used;
+	/* convert iovecs of output buffers into RX SGL */
+	while (msg_data_left(msg)) {
+		struct skcipher_rsgl *rsgl;
+		size_t seglen;
+
+		/* limit the amount of readable buffers */
+		if (!skcipher_readable(sk))
+			break;
 
 		if (!ctx->used) {
 			err = skcipher_wait_for_data(sk, flags);
 			if (err)
 				goto free;
 		}
-		sgl = list_first_entry(&ctx->tsgl,
-				       struct skcipher_sg_list, list);
-		sg = sgl->sg;
 
-		while (!sg->length)
-			sg++;
-
-		used = min_t(unsigned long, ctx->used,
-			     iov_iter_count(&msg->msg_iter));
-		used = min_t(unsigned long, used, sg->length);
-
-		if (txbufs == tx_nents) {
-			struct scatterlist *tmp;
-			int x;
-			/* Ran out of tx slots in async request
-			 * need to expand */
-			tmp = kcalloc(tx_nents * 2, sizeof(*tmp),
-				      GFP_KERNEL);
-			if (!tmp) {
-				err = -ENOMEM;
-				goto free;
-			}
+		seglen = min_t(size_t, ctx->used, msg_data_left(msg));
 
-			sg_init_table(tmp, tx_nents * 2);
-			for (x = 0; x < tx_nents; x++)
-				sg_set_page(&tmp[x], sg_page(&sreq->tsg[x]),
-					    sreq->tsg[x].length,
-					    sreq->tsg[x].offset);
-			kfree(sreq->tsg);
-			sreq->tsg = tmp;
-			tx_nents *= 2;
-			mark = true;
-		}
-		/* Need to take over the tx sgl from ctx
-		 * to the asynch req - these sgls will be freed later */
-		sg_set_page(sreq->tsg + txbufs++, sg_page(sg), sg->length,
-			    sg->offset);
-
-		if (list_empty(&sreq->list)) {
-			rsgl = &sreq->first_sgl;
-			list_add_tail(&rsgl->list, &sreq->list);
+		if (list_empty(&areq->rsgl_list)) {
+			rsgl = &areq->first_sgl;
 		} else {
-			rsgl = kmalloc(sizeof(*rsgl), GFP_KERNEL);
+			rsgl = sock_kmalloc(sk, sizeof(*rsgl), GFP_KERNEL);
 			if (!rsgl) {
 				err = -ENOMEM;
 				goto free;
 			}
-			list_add_tail(&rsgl->list, &sreq->list);
 		}
 
-		used = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, used);
-		err = used;
-		if (used < 0)
+		rsgl->sgl.npages = 0;
+		list_add_tail(&rsgl->list, &areq->rsgl_list);
+
+		/* make one iovec available as scatterlist */
+		err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen);
+		if (err < 0)
 			goto free;
+
+		/* chain the new scatterlist with previous one */
 		if (last_rsgl)
 			af_alg_link_sg(&last_rsgl->sgl, &rsgl->sgl);
 
 		last_rsgl = rsgl;
-		len += used;
-		skcipher_pull_sgl(sk, used, 0);
-		iov_iter_advance(&msg->msg_iter, used);
+		len += err;
+		ctx->rcvused += err;
+		rsgl->sg_num_bytes = err;
+		iov_iter_advance(&msg->msg_iter, err);
 	}
 
-	if (mark)
-		sg_mark_end(sreq->tsg + txbufs - 1);
+	/* Process only as much RX buffers for which we have TX data */
+	if (len > ctx->used)
+		len = ctx->used;
+
+	/*
+	 * If more buffers are to be expected to be processed, process only
+	 * full block size buffers.
+	 */
+	if (ctx->more || len < ctx->used)
+		len -= len % bs;
+
+	/*
+	 * Create a per request TX SGL for this request which tracks the
+	 * SG entries from the global TX SGL.
+	 */
+	areq->tsgl_entries = skcipher_count_tsgl(sk, len);
+	if (!areq->tsgl_entries)
+		areq->tsgl_entries = 1;
+	areq->tsgl = sock_kmalloc(sk, sizeof(*areq->tsgl) * areq->tsgl_entries,
+				  GFP_KERNEL);
+	if (!areq->tsgl) {
+		err = -ENOMEM;
+		goto free;
+	}
+	sg_init_table(areq->tsgl, areq->tsgl_entries);
+	skcipher_pull_tsgl(sk, len, areq->tsgl);
+
+	/* Initialize the crypto operation */
+	skcipher_request_set_tfm(&areq->req, tfm);
+	skcipher_request_set_crypt(&areq->req, areq->tsgl,
+				   areq->first_sgl.sgl.sg, len, ctx->iv);
+
+	if (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) {
+		/* AIO operation */
+		areq->iocb = msg->msg_iocb;
+		skcipher_request_set_callback(&areq->req,
+					      CRYPTO_TFM_REQ_MAY_SLEEP,
+					      skcipher_async_cb, areq);
+		err = ctx->enc ? crypto_skcipher_encrypt(&areq->req) :
+				 crypto_skcipher_decrypt(&areq->req);
+	} else {
+		/* Synchronous operation */
+		skcipher_request_set_callback(&areq->req,
+					      CRYPTO_TFM_REQ_MAY_SLEEP |
+					      CRYPTO_TFM_REQ_MAY_BACKLOG,
+					      af_alg_complete,
+					      &ctx->completion);
+		err = af_alg_wait_for_completion(ctx->enc ?
+					crypto_skcipher_encrypt(&areq->req) :
+					crypto_skcipher_decrypt(&areq->req),
+						 &ctx->completion);
+	}
 
-	skcipher_request_set_crypt(req, sreq->tsg, sreq->first_sgl.sgl.sg,
-				   len, iv);
-	err = ctx->enc ? crypto_skcipher_encrypt(req) :
-			 crypto_skcipher_decrypt(req);
+	/* AIO operation in progress */
 	if (err == -EINPROGRESS) {
-		atomic_inc(&ctx->inflight);
-		err = -EIOCBQUEUED;
-		sreq = NULL;
-		goto unlock;
+		sock_hold(sk);
+		return -EIOCBQUEUED;
 	}
+
 free:
-	skcipher_free_async_sgls(sreq);
-unlock:
-	skcipher_wmem_wakeup(sk);
-	release_sock(sk);
-	kzfree(sreq);
-out:
-	return err;
+	skcipher_free_areq_sgls(areq);
+	if (areq)
+		sock_kfree_s(sk, areq, areqlen);
+
+	return err ? err : len;
 }
 
-static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg,
-				 int flags)
+static int skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
+			    size_t ignored, int flags)
 {
 	struct sock *sk = sock->sk;
-	struct alg_sock *ask = alg_sk(sk);
-	struct sock *psk = ask->parent;
-	struct alg_sock *pask = alg_sk(psk);
-	struct skcipher_ctx *ctx = ask->private;
-	struct skcipher_tfm *skc = pask->private;
-	struct crypto_skcipher *tfm = skc->skcipher;
-	unsigned bs = crypto_skcipher_blocksize(tfm);
-	struct skcipher_sg_list *sgl;
-	struct scatterlist *sg;
-	int err = -EAGAIN;
-	int used;
-	long copied = 0;
+	int ret = 0;
 
 	lock_sock(sk);
 	while (msg_data_left(msg)) {
-		if (!ctx->used) {
-			err = skcipher_wait_for_data(sk, flags);
-			if (err)
-				goto unlock;
+		int err = _skcipher_recvmsg(sock, msg, ignored, flags);
+
+		/*
+		 * This error covers -EIOCBQUEUED which implies that we can
+		 * only handle one AIO request. If the caller wants to have
+		 * multiple AIO requests in parallel, he must make multiple
+		 * separate AIO calls.
+		 */
+		if (err <= 0) {
+			if (err == -EIOCBQUEUED)
+				ret = err;
+			goto out;
 		}
 
-		used = min_t(unsigned long, ctx->used, msg_data_left(msg));
-
-		used = af_alg_make_sg(&ctx->rsgl, &msg->msg_iter, used);
-		err = used;
-		if (err < 0)
-			goto unlock;
-
-		if (ctx->more || used < ctx->used)
-			used -= used % bs;
-
-		err = -EINVAL;
-		if (!used)
-			goto free;
-
-		sgl = list_first_entry(&ctx->tsgl,
-				       struct skcipher_sg_list, list);
-		sg = sgl->sg;
-
-		while (!sg->length)
-			sg++;
-
-		skcipher_request_set_crypt(&ctx->req, sg, ctx->rsgl.sg, used,
-					   ctx->iv);
-
-		err = af_alg_wait_for_completion(
-				ctx->enc ?
-					crypto_skcipher_encrypt(&ctx->req) :
-					crypto_skcipher_decrypt(&ctx->req),
-				&ctx->completion);
-
-free:
-		af_alg_free_sg(&ctx->rsgl);
-
-		if (err)
-			goto unlock;
-
-		copied += used;
-		skcipher_pull_sgl(sk, used, 1);
-		iov_iter_advance(&msg->msg_iter, used);
+		ret += err;
 	}
 
-	err = 0;
-
-unlock:
+out:
 	skcipher_wmem_wakeup(sk);
 	release_sock(sk);
-
-	return copied ?: err;
-}
-
-static int skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
-			    size_t ignored, int flags)
-{
-	return (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) ?
-		skcipher_recvmsg_async(sock, msg, flags) :
-		skcipher_recvmsg_sync(sock, msg, flags);
+	return ret;
 }
 
 static unsigned int skcipher_poll(struct file *file, struct socket *sock,
@@ -895,26 +907,16 @@ static int skcipher_setkey(void *private, const u8 *key, unsigned int keylen)
 	return err;
 }
 
-static void skcipher_wait(struct sock *sk)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	int ctr = 0;
-
-	while (atomic_read(&ctx->inflight) && ctr++ < 100)
-		msleep(100);
-}
-
 static void skcipher_sock_destruct(struct sock *sk)
 {
 	struct alg_sock *ask = alg_sk(sk);
 	struct skcipher_ctx *ctx = ask->private;
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(&ctx->req);
-
-	if (atomic_read(&ctx->inflight))
-		skcipher_wait(sk);
+	struct sock *psk = ask->parent;
+	struct alg_sock *pask = alg_sk(psk);
+	struct skcipher_tfm *skc = pask->private;
+	struct crypto_skcipher *tfm = skc->skcipher;
 
-	skcipher_free_sgl(sk);
+	skcipher_pull_tsgl(sk, ctx->used, NULL);
 	sock_kzfree_s(sk, ctx->iv, crypto_skcipher_ivsize(tfm));
 	sock_kfree_s(sk, ctx, ctx->len);
 	af_alg_release_parent(sk);
@@ -926,7 +928,7 @@ static int skcipher_accept_parent_nokey(void *private, struct sock *sk)
 	struct alg_sock *ask = alg_sk(sk);
 	struct skcipher_tfm *tfm = private;
 	struct crypto_skcipher *skcipher = tfm->skcipher;
-	unsigned int len = sizeof(*ctx) + crypto_skcipher_reqsize(skcipher);
+	unsigned int len = sizeof(*ctx);
 
 	ctx = sock_kmalloc(sk, len, GFP_KERNEL);
 	if (!ctx)
@@ -941,22 +943,17 @@ static int skcipher_accept_parent_nokey(void *private, struct sock *sk)
 
 	memset(ctx->iv, 0, crypto_skcipher_ivsize(skcipher));
 
-	INIT_LIST_HEAD(&ctx->tsgl);
+	INIT_LIST_HEAD(&ctx->tsgl_list);
 	ctx->len = len;
 	ctx->used = 0;
+	ctx->rcvused = 0;
 	ctx->more = 0;
 	ctx->merge = 0;
 	ctx->enc = 0;
-	atomic_set(&ctx->inflight, 0);
 	af_alg_init_completion(&ctx->completion);
 
 	ask->private = ctx;
 
-	skcipher_request_set_tfm(&ctx->req, skcipher);
-	skcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_SLEEP |
-						 CRYPTO_TFM_REQ_MAY_BACKLOG,
-				      af_alg_complete, &ctx->completion);
-
 	sk->sk_destruct = skcipher_sock_destruct;
 
 	return 0;

From d887c52d6ae43aeebd249b5f2f1333e60236aa60 Mon Sep 17 00:00:00 2001
From: Stephan Mueller <smueller@chronox.de>
Date: Sun, 25 Jun 2017 17:12:59 +0200
Subject: [PATCH 040/116] crypto: algif_aead - overhaul memory management

The updated memory management is described in the top part of the code.
As one benefit of the changed memory management, the AIO and synchronous
operation is now implemented in one common function. The AF_ALG
operation uses the async kernel crypto API interface for each cipher
operation. Thus, the only difference between the AIO and sync operation
types visible from user space is:

1. the callback function to be invoked when the asynchronous operation
   is completed

2. whether to wait for the completion of the kernel crypto API operation
   or not

The change includes the overhaul of the TX and RX SGL handling. The TX
SGL holding the data sent from user space to the kernel is now dynamic
similar to algif_skcipher. This dynamic nature allows a continuous
operation of a thread sending data and a second thread receiving the
data. These threads do not need to synchronize as the kernel processes
as much data from the TX SGL to fill the RX SGL.

The caller reading the data from the kernel defines the amount of data
to be processed. Considering that the interface covers AEAD
authenticating ciphers, the reader must provide the buffer in the
correct size. Thus the reader defines the encryption size.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algif_aead.c | 766 +++++++++++++++++++++++++-------------------
 1 file changed, 442 insertions(+), 324 deletions(-)

diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index be117495eb43b..9755aac0fe26f 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -5,12 +5,26 @@
  *
  * This file provides the user-space API for AEAD ciphers.
  *
- * This file is derived from algif_skcipher.c.
- *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
  * Software Foundation; either version 2 of the License, or (at your option)
  * any later version.
+ *
+ * The following concept of the memory management is used:
+ *
+ * The kernel maintains two SGLs, the TX SGL and the RX SGL. The TX SGL is
+ * filled by user space with the data submitted via sendpage/sendmsg. Filling
+ * up the TX SGL does not cause a crypto operation -- the data will only be
+ * tracked by the kernel. Upon receipt of one recvmsg call, the caller must
+ * provide a buffer which is tracked with the RX SGL.
+ *
+ * During the processing of the recvmsg operation, the cipher request is
+ * allocated and prepared. As part of the recvmsg operation, the processed
+ * TX buffers are extracted from the TX SGL into a separate SGL.
+ *
+ * After the completion of the crypto operation, the RX SGL and the cipher
+ * request is released. The extracted TX SGL parts are released together with
+ * the RX SGL release.
  */
 
 #include <crypto/internal/aead.h>
@@ -25,24 +39,32 @@
 #include <linux/net.h>
 #include <net/sock.h>
 
-struct aead_sg_list {
-	unsigned int cur;
-	struct scatterlist sg[ALG_MAX_PAGES];
+struct aead_tsgl {
+	struct list_head list;
+	unsigned int cur;		/* Last processed SG entry */
+	struct scatterlist sg[0];	/* Array of SGs forming the SGL */
 };
 
-struct aead_async_rsgl {
+struct aead_rsgl {
 	struct af_alg_sgl sgl;
 	struct list_head list;
+	size_t sg_num_bytes;		/* Bytes of data in that SGL */
 };
 
 struct aead_async_req {
-	struct scatterlist *tsgl;
-	struct aead_async_rsgl first_rsgl;
-	struct list_head list;
 	struct kiocb *iocb;
 	struct sock *sk;
-	unsigned int tsgls;
-	char iv[];
+
+	struct aead_rsgl first_rsgl;	/* First RX SG */
+	struct list_head rsgl_list;	/* Track RX SGs */
+
+	struct scatterlist *tsgl;	/* priv. TX SGL of buffers to process */
+	unsigned int tsgl_entries;	/* number of entries in priv. TX SGL */
+
+	unsigned int outlen;		/* Filled output buf length */
+
+	unsigned int areqlen;		/* Length of this data struct */
+	struct aead_request aead_req;	/* req ctx trails this struct */
 };
 
 struct aead_tfm {
@@ -51,25 +73,26 @@ struct aead_tfm {
 };
 
 struct aead_ctx {
-	struct aead_sg_list tsgl;
-	struct aead_async_rsgl first_rsgl;
-	struct list_head list;
+	struct list_head tsgl_list;	/* Link to TX SGL */
 
 	void *iv;
+	size_t aead_assoclen;
 
-	struct af_alg_completion completion;
+	struct af_alg_completion completion;	/* sync work queue */
 
-	unsigned long used;
+	size_t used;		/* TX bytes sent to kernel */
+	size_t rcvused;		/* total RX bytes to be processed by kernel */
 
-	unsigned int len;
-	bool more;
-	bool merge;
-	bool enc;
+	bool more;		/* More data to be expected? */
+	bool merge;		/* Merge new data into existing SG */
+	bool enc;		/* Crypto operation: enc, dec */
 
-	size_t aead_assoclen;
-	struct aead_request aead_req;
+	unsigned int len;	/* Length of allocated memory for this struct */
 };
 
+#define MAX_SGL_ENTS ((4096 - sizeof(struct aead_tsgl)) / \
+		      sizeof(struct scatterlist) - 1)
+
 static inline int aead_sndbuf(struct sock *sk)
 {
 	struct alg_sock *ask = alg_sk(sk);
@@ -84,9 +107,29 @@ static inline bool aead_writable(struct sock *sk)
 	return PAGE_SIZE <= aead_sndbuf(sk);
 }
 
-static inline bool aead_sufficient_data(struct aead_ctx *ctx)
+static inline int aead_rcvbuf(struct sock *sk)
 {
-	unsigned as = crypto_aead_authsize(crypto_aead_reqtfm(&ctx->aead_req));
+	struct alg_sock *ask = alg_sk(sk);
+	struct aead_ctx *ctx = ask->private;
+
+	return max_t(int, max_t(int, sk->sk_rcvbuf & PAGE_MASK, PAGE_SIZE) -
+			  ctx->rcvused, 0);
+}
+
+static inline bool aead_readable(struct sock *sk)
+{
+	return PAGE_SIZE <= aead_rcvbuf(sk);
+}
+
+static inline bool aead_sufficient_data(struct sock *sk)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct sock *psk = ask->parent;
+	struct alg_sock *pask = alg_sk(psk);
+	struct aead_ctx *ctx = ask->private;
+	struct aead_tfm *aeadc = pask->private;
+	struct crypto_aead *tfm = aeadc->aead;
+	unsigned int as = crypto_aead_authsize(tfm);
 
 	/*
 	 * The minimum amount of memory needed for an AEAD cipher is
@@ -95,33 +138,166 @@ static inline bool aead_sufficient_data(struct aead_ctx *ctx)
 	return ctx->used >= ctx->aead_assoclen + (ctx->enc ? 0 : as);
 }
 
-static void aead_reset_ctx(struct aead_ctx *ctx)
+static int aead_alloc_tsgl(struct sock *sk)
 {
-	struct aead_sg_list *sgl = &ctx->tsgl;
+	struct alg_sock *ask = alg_sk(sk);
+	struct aead_ctx *ctx = ask->private;
+	struct aead_tsgl *sgl;
+	struct scatterlist *sg = NULL;
 
-	sg_init_table(sgl->sg, ALG_MAX_PAGES);
-	sgl->cur = 0;
-	ctx->used = 0;
-	ctx->more = 0;
-	ctx->merge = 0;
+	sgl = list_entry(ctx->tsgl_list.prev, struct aead_tsgl, list);
+	if (!list_empty(&ctx->tsgl_list))
+		sg = sgl->sg;
+
+	if (!sg || sgl->cur >= MAX_SGL_ENTS) {
+		sgl = sock_kmalloc(sk, sizeof(*sgl) +
+				       sizeof(sgl->sg[0]) * (MAX_SGL_ENTS + 1),
+				   GFP_KERNEL);
+		if (!sgl)
+			return -ENOMEM;
+
+		sg_init_table(sgl->sg, MAX_SGL_ENTS + 1);
+		sgl->cur = 0;
+
+		if (sg)
+			sg_chain(sg, MAX_SGL_ENTS + 1, sgl->sg);
+
+		list_add_tail(&sgl->list, &ctx->tsgl_list);
+	}
+
+	return 0;
+}
+
+static unsigned int aead_count_tsgl(struct sock *sk, size_t bytes)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct aead_ctx *ctx = ask->private;
+	struct aead_tsgl *sgl, *tmp;
+	unsigned int i;
+	unsigned int sgl_count = 0;
+
+	if (!bytes)
+		return 0;
+
+	list_for_each_entry_safe(sgl, tmp, &ctx->tsgl_list, list) {
+		struct scatterlist *sg = sgl->sg;
+
+		for (i = 0; i < sgl->cur; i++) {
+			sgl_count++;
+			if (sg[i].length >= bytes)
+				return sgl_count;
+
+			bytes -= sg[i].length;
+		}
+	}
+
+	return sgl_count;
 }
 
-static void aead_put_sgl(struct sock *sk)
+static void aead_pull_tsgl(struct sock *sk, size_t used,
+			   struct scatterlist *dst)
 {
 	struct alg_sock *ask = alg_sk(sk);
 	struct aead_ctx *ctx = ask->private;
-	struct aead_sg_list *sgl = &ctx->tsgl;
-	struct scatterlist *sg = sgl->sg;
+	struct aead_tsgl *sgl;
+	struct scatterlist *sg;
 	unsigned int i;
 
-	for (i = 0; i < sgl->cur; i++) {
-		if (!sg_page(sg + i))
+	while (!list_empty(&ctx->tsgl_list)) {
+		sgl = list_first_entry(&ctx->tsgl_list, struct aead_tsgl,
+				       list);
+		sg = sgl->sg;
+
+		for (i = 0; i < sgl->cur; i++) {
+			size_t plen = min_t(size_t, used, sg[i].length);
+			struct page *page = sg_page(sg + i);
+
+			if (!page)
+				continue;
+
+			/*
+			 * Assumption: caller created aead_count_tsgl(len)
+			 * SG entries in dst.
+			 */
+			if (dst)
+				sg_set_page(dst + i, page, plen, sg[i].offset);
+
+			sg[i].length -= plen;
+			sg[i].offset += plen;
+
+			used -= plen;
+			ctx->used -= plen;
+
+			if (sg[i].length)
+				return;
+
+			if (!dst)
+				put_page(page);
+			sg_assign_page(sg + i, NULL);
+		}
+
+		list_del(&sgl->list);
+		sock_kfree_s(sk, sgl, sizeof(*sgl) + sizeof(sgl->sg[0]) *
+						     (MAX_SGL_ENTS + 1));
+	}
+
+	if (!ctx->used)
+		ctx->merge = 0;
+}
+
+static void aead_free_areq_sgls(struct aead_async_req *areq)
+{
+	struct sock *sk = areq->sk;
+	struct alg_sock *ask = alg_sk(sk);
+	struct aead_ctx *ctx = ask->private;
+	struct aead_rsgl *rsgl, *tmp;
+	struct scatterlist *tsgl;
+	struct scatterlist *sg;
+	unsigned int i;
+
+	list_for_each_entry_safe(rsgl, tmp, &areq->rsgl_list, list) {
+		ctx->rcvused -= rsgl->sg_num_bytes;
+		af_alg_free_sg(&rsgl->sgl);
+		list_del(&rsgl->list);
+		if (rsgl != &areq->first_rsgl)
+			sock_kfree_s(sk, rsgl, sizeof(*rsgl));
+	}
+
+	tsgl = areq->tsgl;
+	for_each_sg(tsgl, sg, areq->tsgl_entries, i) {
+		if (!sg_page(sg))
 			continue;
+		put_page(sg_page(sg));
+	}
+
+	if (areq->tsgl && areq->tsgl_entries)
+		sock_kfree_s(sk, tsgl, areq->tsgl_entries * sizeof(*tsgl));
+}
+
+static int aead_wait_for_wmem(struct sock *sk, unsigned int flags)
+{
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+	int err = -ERESTARTSYS;
+	long timeout;
+
+	if (flags & MSG_DONTWAIT)
+		return -EAGAIN;
 
-		put_page(sg_page(sg + i));
-		sg_assign_page(sg + i, NULL);
+	sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+
+	add_wait_queue(sk_sleep(sk), &wait);
+	for (;;) {
+		if (signal_pending(current))
+			break;
+		timeout = MAX_SCHEDULE_TIMEOUT;
+		if (sk_wait_event(sk, &timeout, aead_writable(sk), &wait)) {
+			err = 0;
+			break;
+		}
 	}
-	aead_reset_ctx(ctx);
+	remove_wait_queue(sk_sleep(sk), &wait);
+
+	return err;
 }
 
 static void aead_wmem_wakeup(struct sock *sk)
@@ -153,6 +329,7 @@ static int aead_wait_for_data(struct sock *sk, unsigned flags)
 		return -EAGAIN;
 
 	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+
 	add_wait_queue(sk_sleep(sk), &wait);
 	for (;;) {
 		if (signal_pending(current))
@@ -176,8 +353,6 @@ static void aead_data_wakeup(struct sock *sk)
 	struct aead_ctx *ctx = ask->private;
 	struct socket_wq *wq;
 
-	if (ctx->more)
-		return;
 	if (!ctx->used)
 		return;
 
@@ -195,15 +370,18 @@ static int aead_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 {
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
+	struct sock *psk = ask->parent;
+	struct alg_sock *pask = alg_sk(psk);
 	struct aead_ctx *ctx = ask->private;
-	unsigned ivsize =
-		crypto_aead_ivsize(crypto_aead_reqtfm(&ctx->aead_req));
-	struct aead_sg_list *sgl = &ctx->tsgl;
+	struct aead_tfm *aeadc = pask->private;
+	struct crypto_aead *tfm = aeadc->aead;
+	unsigned int ivsize = crypto_aead_ivsize(tfm);
+	struct aead_tsgl *sgl;
 	struct af_alg_control con = {};
 	long copied = 0;
 	bool enc = 0;
 	bool init = 0;
-	int err = -EINVAL;
+	int err = 0;
 
 	if (msg->msg_controllen) {
 		err = af_alg_cmsg_send(msg, &con);
@@ -227,8 +405,10 @@ static int aead_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 	}
 
 	lock_sock(sk);
-	if (!ctx->more && ctx->used)
+	if (!ctx->more && ctx->used) {
+		err = -EINVAL;
 		goto unlock;
+	}
 
 	if (init) {
 		ctx->enc = enc;
@@ -239,11 +419,14 @@ static int aead_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 	}
 
 	while (size) {
+		struct scatterlist *sg;
 		size_t len = size;
-		struct scatterlist *sg = NULL;
+		size_t plen;
 
 		/* use the existing memory in an allocated page */
 		if (ctx->merge) {
+			sgl = list_entry(ctx->tsgl_list.prev,
+					 struct aead_tsgl, list);
 			sg = sgl->sg + sgl->cur - 1;
 			len = min_t(unsigned long, len,
 				    PAGE_SIZE - sg->offset - sg->length);
@@ -264,57 +447,60 @@ static int aead_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 		}
 
 		if (!aead_writable(sk)) {
-			/* user space sent too much data */
-			aead_put_sgl(sk);
-			err = -EMSGSIZE;
-			goto unlock;
+			err = aead_wait_for_wmem(sk, msg->msg_flags);
+			if (err)
+				goto unlock;
 		}
 
 		/* allocate a new page */
 		len = min_t(unsigned long, size, aead_sndbuf(sk));
-		while (len) {
-			size_t plen = 0;
 
-			if (sgl->cur >= ALG_MAX_PAGES) {
-				aead_put_sgl(sk);
-				err = -E2BIG;
-				goto unlock;
-			}
+		err = aead_alloc_tsgl(sk);
+		if (err)
+			goto unlock;
+
+		sgl = list_entry(ctx->tsgl_list.prev, struct aead_tsgl,
+				 list);
+		sg = sgl->sg;
+		if (sgl->cur)
+			sg_unmark_end(sg + sgl->cur - 1);
+
+		do {
+			unsigned int i = sgl->cur;
 
-			sg = sgl->sg + sgl->cur;
 			plen = min_t(size_t, len, PAGE_SIZE);
 
-			sg_assign_page(sg, alloc_page(GFP_KERNEL));
-			err = -ENOMEM;
-			if (!sg_page(sg))
+			sg_assign_page(sg + i, alloc_page(GFP_KERNEL));
+			if (!sg_page(sg + i)) {
+				err = -ENOMEM;
 				goto unlock;
+			}
 
-			err = memcpy_from_msg(page_address(sg_page(sg)),
+			err = memcpy_from_msg(page_address(sg_page(sg + i)),
 					      msg, plen);
 			if (err) {
-				__free_page(sg_page(sg));
-				sg_assign_page(sg, NULL);
+				__free_page(sg_page(sg + i));
+				sg_assign_page(sg + i, NULL);
 				goto unlock;
 			}
 
-			sg->offset = 0;
-			sg->length = plen;
+			sg[i].length = plen;
 			len -= plen;
 			ctx->used += plen;
 			copied += plen;
-			sgl->cur++;
 			size -= plen;
-			ctx->merge = plen & (PAGE_SIZE - 1);
-		}
+			sgl->cur++;
+		} while (len && sgl->cur < MAX_SGL_ENTS);
+
+		if (!size)
+			sg_mark_end(sg + sgl->cur - 1);
+
+		ctx->merge = plen & (PAGE_SIZE - 1);
 	}
 
 	err = 0;
 
 	ctx->more = msg->msg_flags & MSG_MORE;
-	if (!ctx->more && !aead_sufficient_data(ctx)) {
-		aead_put_sgl(sk);
-		err = -EMSGSIZE;
-	}
 
 unlock:
 	aead_data_wakeup(sk);
@@ -329,15 +515,12 @@ static ssize_t aead_sendpage(struct socket *sock, struct page *page,
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
 	struct aead_ctx *ctx = ask->private;
-	struct aead_sg_list *sgl = &ctx->tsgl;
+	struct aead_tsgl *sgl;
 	int err = -EINVAL;
 
 	if (flags & MSG_SENDPAGE_NOTLAST)
 		flags |= MSG_MORE;
 
-	if (sgl->cur >= ALG_MAX_PAGES)
-		return -E2BIG;
-
 	lock_sock(sk);
 	if (!ctx->more && ctx->used)
 		goto unlock;
@@ -346,13 +529,22 @@ static ssize_t aead_sendpage(struct socket *sock, struct page *page,
 		goto done;
 
 	if (!aead_writable(sk)) {
-		/* user space sent too much data */
-		aead_put_sgl(sk);
-		err = -EMSGSIZE;
-		goto unlock;
+		err = aead_wait_for_wmem(sk, flags);
+		if (err)
+			goto unlock;
 	}
 
+	err = aead_alloc_tsgl(sk);
+	if (err)
+		goto unlock;
+
 	ctx->merge = 0;
+	sgl = list_entry(ctx->tsgl_list.prev, struct aead_tsgl, list);
+
+	if (sgl->cur)
+		sg_unmark_end(sgl->sg + sgl->cur - 1);
+
+	sg_mark_end(sgl->sg + sgl->cur);
 
 	get_page(page);
 	sg_set_page(sgl->sg + sgl->cur, page, size, offset);
@@ -363,11 +555,6 @@ static ssize_t aead_sendpage(struct socket *sock, struct page *page,
 
 done:
 	ctx->more = flags & MSG_MORE;
-	if (!ctx->more && !aead_sufficient_data(ctx)) {
-		aead_put_sgl(sk);
-		err = -EMSGSIZE;
-	}
-
 unlock:
 	aead_data_wakeup(sk);
 	release_sock(sk);
@@ -375,204 +562,52 @@ static ssize_t aead_sendpage(struct socket *sock, struct page *page,
 	return err ?: size;
 }
 
-#define GET_ASYM_REQ(req, tfm) (struct aead_async_req *) \
-		((char *)req + sizeof(struct aead_request) + \
-		 crypto_aead_reqsize(tfm))
-
- #define GET_REQ_SIZE(tfm) sizeof(struct aead_async_req) + \
-	crypto_aead_reqsize(tfm) + crypto_aead_ivsize(tfm) + \
-	sizeof(struct aead_request)
-
 static void aead_async_cb(struct crypto_async_request *_req, int err)
 {
-	struct aead_request *req = _req->data;
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aead_async_req *areq = GET_ASYM_REQ(req, tfm);
+	struct aead_async_req *areq = _req->data;
 	struct sock *sk = areq->sk;
-	struct scatterlist *sg = areq->tsgl;
-	struct aead_async_rsgl *rsgl;
 	struct kiocb *iocb = areq->iocb;
-	unsigned int i, reqlen = GET_REQ_SIZE(tfm);
-
-	list_for_each_entry(rsgl, &areq->list, list) {
-		af_alg_free_sg(&rsgl->sgl);
-		if (rsgl != &areq->first_rsgl)
-			sock_kfree_s(sk, rsgl, sizeof(*rsgl));
-	}
-
-	for (i = 0; i < areq->tsgls; i++)
-		put_page(sg_page(sg + i));
-
-	sock_kfree_s(sk, areq->tsgl, sizeof(*areq->tsgl) * areq->tsgls);
-	sock_kfree_s(sk, req, reqlen);
-	__sock_put(sk);
-	iocb->ki_complete(iocb, err, err);
-}
-
-static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
-			      int flags)
-{
-	struct sock *sk = sock->sk;
-	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	struct crypto_aead *tfm = crypto_aead_reqtfm(&ctx->aead_req);
-	struct aead_async_req *areq;
-	struct aead_request *req = NULL;
-	struct aead_sg_list *sgl = &ctx->tsgl;
-	struct aead_async_rsgl *last_rsgl = NULL, *rsgl;
-	unsigned int as = crypto_aead_authsize(tfm);
-	unsigned int i, reqlen = GET_REQ_SIZE(tfm);
-	int err = -ENOMEM;
-	unsigned long used;
-	size_t outlen = 0;
-	size_t usedpages = 0;
+	unsigned int resultlen;
 
 	lock_sock(sk);
-	if (ctx->more) {
-		err = aead_wait_for_data(sk, flags);
-		if (err)
-			goto unlock;
-	}
-
-	if (!aead_sufficient_data(ctx))
-		goto unlock;
-
-	used = ctx->used;
-	if (ctx->enc)
-		outlen = used + as;
-	else
-		outlen = used - as;
-
-	req = sock_kmalloc(sk, reqlen, GFP_KERNEL);
-	if (unlikely(!req))
-		goto unlock;
-
-	areq = GET_ASYM_REQ(req, tfm);
-	memset(&areq->first_rsgl, '\0', sizeof(areq->first_rsgl));
-	INIT_LIST_HEAD(&areq->list);
-	areq->iocb = msg->msg_iocb;
-	areq->sk = sk;
-	memcpy(areq->iv, ctx->iv, crypto_aead_ivsize(tfm));
-	aead_request_set_tfm(req, tfm);
-	aead_request_set_ad(req, ctx->aead_assoclen);
-	aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-				  aead_async_cb, req);
-	used -= ctx->aead_assoclen;
-
-	/* take over all tx sgls from ctx */
-	areq->tsgl = sock_kmalloc(sk,
-				  sizeof(*areq->tsgl) * max_t(u32, sgl->cur, 1),
-				  GFP_KERNEL);
-	if (unlikely(!areq->tsgl))
-		goto free;
-
-	sg_init_table(areq->tsgl, max_t(u32, sgl->cur, 1));
-	for (i = 0; i < sgl->cur; i++)
-		sg_set_page(&areq->tsgl[i], sg_page(&sgl->sg[i]),
-			    sgl->sg[i].length, sgl->sg[i].offset);
-
-	areq->tsgls = sgl->cur;
-
-	/* create rx sgls */
-	while (outlen > usedpages && iov_iter_count(&msg->msg_iter)) {
-		size_t seglen = min_t(size_t, iov_iter_count(&msg->msg_iter),
-				      (outlen - usedpages));
-
-		if (list_empty(&areq->list)) {
-			rsgl = &areq->first_rsgl;
-
-		} else {
-			rsgl = sock_kmalloc(sk, sizeof(*rsgl), GFP_KERNEL);
-			if (unlikely(!rsgl)) {
-				err = -ENOMEM;
-				goto free;
-			}
-		}
-		rsgl->sgl.npages = 0;
-		list_add_tail(&rsgl->list, &areq->list);
-
-		/* make one iovec available as scatterlist */
-		err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen);
-		if (err < 0)
-			goto free;
-
-		usedpages += err;
-
-		/* chain the new scatterlist with previous one */
-		if (last_rsgl)
-			af_alg_link_sg(&last_rsgl->sgl, &rsgl->sgl);
-
-		last_rsgl = rsgl;
 
-		iov_iter_advance(&msg->msg_iter, err);
-	}
+	/* Buffer size written by crypto operation. */
+	resultlen = areq->outlen;
 
-	/* ensure output buffer is sufficiently large */
-	if (usedpages < outlen) {
-		err = -EINVAL;
-		goto unlock;
-	}
+	aead_free_areq_sgls(areq);
+	sock_kfree_s(sk, areq, areq->areqlen);
+	__sock_put(sk);
 
-	aead_request_set_crypt(req, areq->tsgl, areq->first_rsgl.sgl.sg, used,
-			       areq->iv);
-	err = ctx->enc ? crypto_aead_encrypt(req) : crypto_aead_decrypt(req);
-	if (err) {
-		if (err == -EINPROGRESS) {
-			sock_hold(sk);
-			err = -EIOCBQUEUED;
-			aead_reset_ctx(ctx);
-			goto unlock;
-		} else if (err == -EBADMSG) {
-			aead_put_sgl(sk);
-		}
-		goto free;
-	}
-	aead_put_sgl(sk);
+	iocb->ki_complete(iocb, err ? err : resultlen, 0);
 
-free:
-	list_for_each_entry(rsgl, &areq->list, list) {
-		af_alg_free_sg(&rsgl->sgl);
-		if (rsgl != &areq->first_rsgl)
-			sock_kfree_s(sk, rsgl, sizeof(*rsgl));
-	}
-	if (areq->tsgl)
-		sock_kfree_s(sk, areq->tsgl, sizeof(*areq->tsgl) * areq->tsgls);
-	if (req)
-		sock_kfree_s(sk, req, reqlen);
-unlock:
-	aead_wmem_wakeup(sk);
 	release_sock(sk);
-	return err ? err : outlen;
 }
 
-static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
+static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
+			 size_t ignored, int flags)
 {
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
+	struct sock *psk = ask->parent;
+	struct alg_sock *pask = alg_sk(psk);
 	struct aead_ctx *ctx = ask->private;
-	unsigned as = crypto_aead_authsize(crypto_aead_reqtfm(&ctx->aead_req));
-	struct aead_sg_list *sgl = &ctx->tsgl;
-	struct aead_async_rsgl *last_rsgl = NULL;
-	struct aead_async_rsgl *rsgl, *tmp;
-	int err = -EINVAL;
-	unsigned long used = 0;
-	size_t outlen = 0;
-	size_t usedpages = 0;
-
-	lock_sock(sk);
+	struct aead_tfm *aeadc = pask->private;
+	struct crypto_aead *tfm = aeadc->aead;
+	unsigned int as = crypto_aead_authsize(tfm);
+	unsigned int areqlen =
+		sizeof(struct aead_async_req) + crypto_aead_reqsize(tfm);
+	struct aead_async_req *areq;
+	struct aead_rsgl *last_rsgl = NULL;
+	int err = 0;
+	size_t used = 0;		/* [in]  TX bufs to be en/decrypted */
+	size_t outlen = 0;		/* [out] RX bufs produced by kernel */
+	size_t usedpages = 0;		/* [in]  RX bufs to be used from user */
+	size_t processed = 0;		/* [in]  TX bufs to be consumed */
 
 	/*
-	 * Please see documentation of aead_request_set_crypt for the
-	 * description of the AEAD memory structure expected from the caller.
+	 * Data length provided by caller via sendmsg/sendpage that has not
+	 * yet been processed.
 	 */
-
-	if (ctx->more) {
-		err = aead_wait_for_data(sk, flags);
-		if (err)
-			goto unlock;
-	}
-
-	/* data length provided by caller via sendmsg/sendpage */
 	used = ctx->used;
 
 	/*
@@ -584,8 +619,8 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
 	 * the error message in sendmsg/sendpage and still call recvmsg. This
 	 * check here protects the kernel integrity.
 	 */
-	if (!aead_sufficient_data(ctx))
-		goto unlock;
+	if (!aead_sufficient_data(sk))
+		return -EINVAL;
 
 	/*
 	 * Calculate the minimum output buffer size holding the result of the
@@ -606,84 +641,170 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
 	 */
 	used -= ctx->aead_assoclen;
 
-	/* convert iovecs of output buffers into scatterlists */
-	while (outlen > usedpages && iov_iter_count(&msg->msg_iter)) {
-		size_t seglen = min_t(size_t, iov_iter_count(&msg->msg_iter),
-				      (outlen - usedpages));
+	/* Allocate cipher request for current operation. */
+	areq = sock_kmalloc(sk, areqlen, GFP_KERNEL);
+	if (unlikely(!areq))
+		return -ENOMEM;
+	areq->areqlen = areqlen;
+	areq->sk = sk;
+	INIT_LIST_HEAD(&areq->rsgl_list);
+	areq->tsgl = NULL;
+	areq->tsgl_entries = 0;
+
+	/* convert iovecs of output buffers into RX SGL */
+	while (outlen > usedpages && msg_data_left(msg)) {
+		struct aead_rsgl *rsgl;
+		size_t seglen;
+
+		/* limit the amount of readable buffers */
+		if (!aead_readable(sk))
+			break;
 
-		if (list_empty(&ctx->list)) {
-			rsgl = &ctx->first_rsgl;
+		if (!ctx->used) {
+			err = aead_wait_for_data(sk, flags);
+			if (err)
+				goto free;
+		}
+
+		seglen = min_t(size_t, (outlen - usedpages),
+			       msg_data_left(msg));
+
+		if (list_empty(&areq->rsgl_list)) {
+			rsgl = &areq->first_rsgl;
 		} else {
 			rsgl = sock_kmalloc(sk, sizeof(*rsgl), GFP_KERNEL);
 			if (unlikely(!rsgl)) {
 				err = -ENOMEM;
-				goto unlock;
+				goto free;
 			}
 		}
+
 		rsgl->sgl.npages = 0;
-		list_add_tail(&rsgl->list, &ctx->list);
+		list_add_tail(&rsgl->list, &areq->rsgl_list);
 
 		/* make one iovec available as scatterlist */
 		err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen);
 		if (err < 0)
-			goto unlock;
-		usedpages += err;
+			goto free;
+
 		/* chain the new scatterlist with previous one */
 		if (last_rsgl)
 			af_alg_link_sg(&last_rsgl->sgl, &rsgl->sgl);
 
 		last_rsgl = rsgl;
-
+		usedpages += err;
+		ctx->rcvused += err;
+		rsgl->sg_num_bytes = err;
 		iov_iter_advance(&msg->msg_iter, err);
 	}
 
-	/* ensure output buffer is sufficiently large */
+	/*
+	 * Ensure output buffer is sufficiently large. If the caller provides
+	 * less buffer space, only use the relative required input size. This
+	 * allows AIO operation where the caller sent all data to be processed
+	 * and the AIO operation performs the operation on the different chunks
+	 * of the input data.
+	 */
 	if (usedpages < outlen) {
-		err = -EINVAL;
-		goto unlock;
-	}
+		size_t less = outlen - usedpages;
 
-	sg_mark_end(sgl->sg + sgl->cur - 1);
-	aead_request_set_crypt(&ctx->aead_req, sgl->sg, ctx->first_rsgl.sgl.sg,
-			       used, ctx->iv);
-	aead_request_set_ad(&ctx->aead_req, ctx->aead_assoclen);
+		if (used < less) {
+			err = -EINVAL;
+			goto free;
+		}
+		used -= less;
+		outlen -= less;
+	}
 
-	err = af_alg_wait_for_completion(ctx->enc ?
-					 crypto_aead_encrypt(&ctx->aead_req) :
-					 crypto_aead_decrypt(&ctx->aead_req),
+	/*
+	 * Create a per request TX SGL for this request which tracks the
+	 * SG entries from the global TX SGL.
+	 */
+	processed = used + ctx->aead_assoclen;
+	areq->tsgl_entries = aead_count_tsgl(sk, processed);
+	if (!areq->tsgl_entries)
+		areq->tsgl_entries = 1;
+	areq->tsgl = sock_kmalloc(sk, sizeof(*areq->tsgl) * areq->tsgl_entries,
+				  GFP_KERNEL);
+	if (!areq->tsgl) {
+		err = -ENOMEM;
+		goto free;
+	}
+	sg_init_table(areq->tsgl, areq->tsgl_entries);
+	aead_pull_tsgl(sk, processed, areq->tsgl);
+
+	/* Initialize the crypto operation */
+	aead_request_set_crypt(&areq->aead_req, areq->tsgl,
+			       areq->first_rsgl.sgl.sg, used, ctx->iv);
+	aead_request_set_ad(&areq->aead_req, ctx->aead_assoclen);
+	aead_request_set_tfm(&areq->aead_req, tfm);
+
+	if (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) {
+		/* AIO operation */
+		areq->iocb = msg->msg_iocb;
+		aead_request_set_callback(&areq->aead_req,
+					  CRYPTO_TFM_REQ_MAY_BACKLOG,
+					  aead_async_cb, areq);
+		err = ctx->enc ? crypto_aead_encrypt(&areq->aead_req) :
+				 crypto_aead_decrypt(&areq->aead_req);
+	} else {
+		/* Synchronous operation */
+		aead_request_set_callback(&areq->aead_req,
+					  CRYPTO_TFM_REQ_MAY_BACKLOG,
+					  af_alg_complete, &ctx->completion);
+		err = af_alg_wait_for_completion(ctx->enc ?
+					 crypto_aead_encrypt(&areq->aead_req) :
+					 crypto_aead_decrypt(&areq->aead_req),
 					 &ctx->completion);
-
-	if (err) {
-		/* EBADMSG implies a valid cipher operation took place */
-		if (err == -EBADMSG)
-			aead_put_sgl(sk);
-
-		goto unlock;
 	}
 
-	aead_put_sgl(sk);
-	err = 0;
+	/* AIO operation in progress */
+	if (err == -EINPROGRESS) {
+		sock_hold(sk);
 
-unlock:
-	list_for_each_entry_safe(rsgl, tmp, &ctx->list, list) {
-		af_alg_free_sg(&rsgl->sgl);
-		list_del(&rsgl->list);
-		if (rsgl != &ctx->first_rsgl)
-			sock_kfree_s(sk, rsgl, sizeof(*rsgl));
+		/* Remember output size that will be generated. */
+		areq->outlen = outlen;
+
+		return -EIOCBQUEUED;
 	}
-	INIT_LIST_HEAD(&ctx->list);
-	aead_wmem_wakeup(sk);
-	release_sock(sk);
+
+free:
+	aead_free_areq_sgls(areq);
+	if (areq)
+		sock_kfree_s(sk, areq, areqlen);
 
 	return err ? err : outlen;
 }
 
-static int aead_recvmsg(struct socket *sock, struct msghdr *msg, size_t ignored,
-			int flags)
+static int aead_recvmsg(struct socket *sock, struct msghdr *msg,
+			size_t ignored, int flags)
 {
-	return (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) ?
-		aead_recvmsg_async(sock, msg, flags) :
-		aead_recvmsg_sync(sock, msg, flags);
+	struct sock *sk = sock->sk;
+	int ret = 0;
+
+	lock_sock(sk);
+	while (msg_data_left(msg)) {
+		int err = _aead_recvmsg(sock, msg, ignored, flags);
+
+		/*
+		 * This error covers -EIOCBQUEUED which implies that we can
+		 * only handle one AIO request. If the caller wants to have
+		 * multiple AIO requests in parallel, he must make multiple
+		 * separate AIO calls.
+		 */
+		if (err <= 0) {
+			if (err == -EIOCBQUEUED || err == -EBADMSG)
+				ret = err;
+			goto out;
+		}
+
+		ret += err;
+	}
+
+out:
+	aead_wmem_wakeup(sk);
+	release_sock(sk);
+	return ret;
 }
 
 static unsigned int aead_poll(struct file *file, struct socket *sock,
@@ -874,11 +995,13 @@ static void aead_sock_destruct(struct sock *sk)
 {
 	struct alg_sock *ask = alg_sk(sk);
 	struct aead_ctx *ctx = ask->private;
-	unsigned int ivlen = crypto_aead_ivsize(
-				crypto_aead_reqtfm(&ctx->aead_req));
+	struct sock *psk = ask->parent;
+	struct alg_sock *pask = alg_sk(psk);
+	struct aead_tfm *aeadc = pask->private;
+	struct crypto_aead *tfm = aeadc->aead;
+	unsigned int ivlen = crypto_aead_ivsize(tfm);
 
-	WARN_ON(refcount_read(&sk->sk_refcnt) != 0);
-	aead_put_sgl(sk);
+	aead_pull_tsgl(sk, ctx->used, NULL);
 	sock_kzfree_s(sk, ctx->iv, ivlen);
 	sock_kfree_s(sk, ctx, ctx->len);
 	af_alg_release_parent(sk);
@@ -890,7 +1013,7 @@ static int aead_accept_parent_nokey(void *private, struct sock *sk)
 	struct alg_sock *ask = alg_sk(sk);
 	struct aead_tfm *tfm = private;
 	struct crypto_aead *aead = tfm->aead;
-	unsigned int len = sizeof(*ctx) + crypto_aead_reqsize(aead);
+	unsigned int len = sizeof(*ctx);
 	unsigned int ivlen = crypto_aead_ivsize(aead);
 
 	ctx = sock_kmalloc(sk, len, GFP_KERNEL);
@@ -905,23 +1028,18 @@ static int aead_accept_parent_nokey(void *private, struct sock *sk)
 	}
 	memset(ctx->iv, 0, ivlen);
 
+	INIT_LIST_HEAD(&ctx->tsgl_list);
 	ctx->len = len;
 	ctx->used = 0;
+	ctx->rcvused = 0;
 	ctx->more = 0;
 	ctx->merge = 0;
 	ctx->enc = 0;
-	ctx->tsgl.cur = 0;
 	ctx->aead_assoclen = 0;
 	af_alg_init_completion(&ctx->completion);
-	sg_init_table(ctx->tsgl.sg, ALG_MAX_PAGES);
-	INIT_LIST_HEAD(&ctx->list);
 
 	ask->private = ctx;
 
-	aead_request_set_tfm(&ctx->aead_req, aead);
-	aead_request_set_callback(&ctx->aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-				  af_alg_complete, &ctx->completion);
-
 	sk->sk_destruct = aead_sock_destruct;
 
 	return 0;

From 430f13389bdafa6fd9ce2999fed01dca8a5d79ae Mon Sep 17 00:00:00 2001
From: Xulin Sun <xulin.sun@windriver.com>
Date: Thu, 13 Jul 2017 05:21:01 -0400
Subject: [PATCH 041/116] crypto: caam - free qman_fq after kill_fq
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

kill_fq removes a complete frame queue, it needs to free the qman_fq
in the last. Else kmemleak will report the below warning:

unreferenced object 0xffff800073085c80 (size 128):
  comm "cryptomgr_test", pid 199, jiffies 4294937850 (age 67.840s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 a0 80 7e 00 00 80 ff ff
    00 00 00 00 00 00 00 00 04 00 04 00 5c 01 00 00
  backtrace:
    [<ffff8000001e5760>] create_object+0xf8/0x258
    [<ffff800000994e38>] kmemleak_alloc+0x58/0xa0
    [<ffff8000001d5f18>] kmem_cache_alloc_trace+0x2c8/0x358
    [<ffff8000007e8410>] create_caam_req_fq+0x40/0x170
    [<ffff8000007e870c>] caam_drv_ctx_update+0x54/0x248
    [<ffff8000007fca54>] aead_setkey+0x154/0x300
    [<ffff800000452120>] setkey+0x50/0xf0
    [<ffff80000045b144>] __test_aead+0x5ec/0x1028
    [<ffff80000045c28c>] test_aead+0x44/0xc8
    [<ffff80000045c368>] alg_test_aead+0x58/0xd0
    [<ffff80000045bdb4>] alg_test+0x14c/0x308
    [<ffff8000004588e8>] cryptomgr_test+0x50/0x58
    [<ffff8000000c3b2c>] kthread+0xdc/0xf0
    [<ffff800000083c00>] ret_from_fork+0x10/0x50

And check where the function kill_fq() is called to remove
the additional kfree to qman_fq and avoid re-calling the released qman_fq.

Signed-off-by: Xulin Sun <xulin.sun@windriver.com>
Acked-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/qi.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c
index 6d5a010a1881b..9a4151ac5e37c 100644
--- a/drivers/crypto/caam/qi.c
+++ b/drivers/crypto/caam/qi.c
@@ -275,6 +275,7 @@ static int kill_fq(struct device *qidev, struct qman_fq *fq)
 		dev_err(qidev, "OOS of FQID: %u failed\n", fq->fqid);
 
 	qman_destroy_fq(fq);
+	kfree(fq);
 
 	return ret;
 }
@@ -340,8 +341,7 @@ int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc)
 		drv_ctx->req_fq = old_fq;
 
 		if (kill_fq(qidev, new_fq))
-			dev_warn(qidev, "New CAAM FQ: %u kill failed\n",
-				 new_fq->fqid);
+			dev_warn(qidev, "New CAAM FQ kill failed\n");
 
 		return ret;
 	}
@@ -371,10 +371,9 @@ int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc)
 		drv_ctx->req_fq = old_fq;
 
 		if (kill_fq(qidev, new_fq))
-			dev_warn(qidev, "New CAAM FQ: %u kill failed\n",
-				 new_fq->fqid);
+			dev_warn(qidev, "New CAAM FQ kill failed\n");
 	} else if (kill_fq(qidev, old_fq)) {
-		dev_warn(qidev, "Old CAAM FQ: %u kill failed\n", old_fq->fqid);
+		dev_warn(qidev, "Old CAAM FQ kill failed\n");
 	}
 
 	return 0;
@@ -508,7 +507,6 @@ int caam_qi_shutdown(struct device *qidev)
 
 		if (kill_fq(qidev, per_cpu(pcpu_qipriv.rsp_fq, i)))
 			dev_err(qidev, "Rsp FQ kill failed, cpu: %d\n", i);
-		kfree(per_cpu(pcpu_qipriv.rsp_fq, i));
 	}
 
 	/*

From 391775191f38f807efebc1c3e58c79785d8dd526 Mon Sep 17 00:00:00 2001
From: "lionel.debieve@st.com" <lionel.debieve@st.com>
Date: Thu, 13 Jul 2017 15:06:31 +0200
Subject: [PATCH 042/116] crypto: stm32 - CRC use relaxed function

In case of arm soc support, readl and writel will
be optimized using relaxed functions

Signed-off-by: Lionel Debieve <lionel.debieve@st.com>
Reviewed-by: Fabien Dessenne <fabien.dessenne@st.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/stm32/stm32_crc32.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/crypto/stm32/stm32_crc32.c b/drivers/crypto/stm32/stm32_crc32.c
index ec83b1e6bfe8d..04d2f4ba310bb 100644
--- a/drivers/crypto/stm32/stm32_crc32.c
+++ b/drivers/crypto/stm32/stm32_crc32.c
@@ -107,12 +107,12 @@ static int stm32_crc_init(struct shash_desc *desc)
 	spin_unlock_bh(&crc_list.lock);
 
 	/* Reset, set key, poly and configure in bit reverse mode */
-	writel(bitrev32(mctx->key), ctx->crc->regs + CRC_INIT);
-	writel(bitrev32(mctx->poly), ctx->crc->regs + CRC_POL);
-	writel(CRC_CR_RESET | CRC_CR_REVERSE, ctx->crc->regs + CRC_CR);
+	writel_relaxed(bitrev32(mctx->key), ctx->crc->regs + CRC_INIT);
+	writel_relaxed(bitrev32(mctx->poly), ctx->crc->regs + CRC_POL);
+	writel_relaxed(CRC_CR_RESET | CRC_CR_REVERSE, ctx->crc->regs + CRC_CR);
 
 	/* Store partial result */
-	ctx->partial = readl(ctx->crc->regs + CRC_DR);
+	ctx->partial = readl_relaxed(ctx->crc->regs + CRC_DR);
 	ctx->crc->nb_pending_bytes = 0;
 
 	return 0;
@@ -135,7 +135,8 @@ static int stm32_crc_update(struct shash_desc *desc, const u8 *d8,
 
 		if (crc->nb_pending_bytes == sizeof(u32)) {
 			/* Process completed pending data */
-			writel(*(u32 *)crc->pending_data, crc->regs + CRC_DR);
+			writel_relaxed(*(u32 *)crc->pending_data,
+				       crc->regs + CRC_DR);
 			crc->nb_pending_bytes = 0;
 		}
 	}
@@ -143,10 +144,10 @@ static int stm32_crc_update(struct shash_desc *desc, const u8 *d8,
 	d32 = (u32 *)d8;
 	for (i = 0; i < length >> 2; i++)
 		/* Process 32 bits data */
-		writel(*(d32++), crc->regs + CRC_DR);
+		writel_relaxed(*(d32++), crc->regs + CRC_DR);
 
 	/* Store partial result */
-	ctx->partial = readl(crc->regs + CRC_DR);
+	ctx->partial = readl_relaxed(crc->regs + CRC_DR);
 
 	/* Check for pending data (non 32 bits) */
 	length &= 3;

From 0373d085870ab036146002d1ba3128855e26c953 Mon Sep 17 00:00:00 2001
From: "lionel.debieve@st.com" <lionel.debieve@st.com>
Date: Thu, 13 Jul 2017 15:06:32 +0200
Subject: [PATCH 043/116] crypto: stm32 - solve crc issue during unbind

Use the correct unregister_shashes function to
to remove the registered algo

Signed-off-by: Lionel Debieve <lionel.debieve@st.com>
Reviewed-by: Fabien Dessenne <fabien.dessenne@st.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/stm32/stm32_crc32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/stm32/stm32_crc32.c b/drivers/crypto/stm32/stm32_crc32.c
index 04d2f4ba310bb..090582baecfe8 100644
--- a/drivers/crypto/stm32/stm32_crc32.c
+++ b/drivers/crypto/stm32/stm32_crc32.c
@@ -296,7 +296,7 @@ static int stm32_crc_remove(struct platform_device *pdev)
 	list_del(&crc->list);
 	spin_unlock(&crc_list.lock);
 
-	crypto_unregister_shash(algs);
+	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
 
 	clk_disable_unprepare(crc->clk);
 

From c35af01d939a865b666591ddc653d082258a1135 Mon Sep 17 00:00:00 2001
From: "lionel.debieve@st.com" <lionel.debieve@st.com>
Date: Thu, 13 Jul 2017 15:06:33 +0200
Subject: [PATCH 044/116] crypto: stm32 - Rename module to use generic crypto

The complete stm32 module is rename as crypto
in order to use generic naming

Signed-off-by: Lionel Debieve <lionel.debieve@st.com>
Reviewed-by: Fabien Dessenne <fabien.dessenne@st.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Makefile       | 2 +-
 drivers/crypto/stm32/Kconfig  | 6 +++---
 drivers/crypto/stm32/Makefile | 3 +--
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index de629165fde7d..b12eb3c994302 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -36,7 +36,7 @@ obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/
 obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/
 obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o
 obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o
-obj-$(CONFIG_CRYPTO_DEV_STM32) += stm32/
+obj-$(CONFIG_ARCH_STM32) += stm32/
 obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sunxi-ss/
 obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
 obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
diff --git a/drivers/crypto/stm32/Kconfig b/drivers/crypto/stm32/Kconfig
index 09b4ec87c2124..7dd14f8d294ef 100644
--- a/drivers/crypto/stm32/Kconfig
+++ b/drivers/crypto/stm32/Kconfig
@@ -1,7 +1,7 @@
-config CRYPTO_DEV_STM32
-	tristate "Support for STM32 crypto accelerators"
+config CRC_DEV_STM32
+	tristate "Support for STM32 crc accelerators"
 	depends on ARCH_STM32
 	select CRYPTO_HASH
 	help
           This enables support for the CRC32 hw accelerator which can be found
-	  on STMicroelectronis STM32 SOC.
+	  on STMicroelectronics STM32 SOC.
diff --git a/drivers/crypto/stm32/Makefile b/drivers/crypto/stm32/Makefile
index 73b4c6e47f5fe..4db2f283068d7 100644
--- a/drivers/crypto/stm32/Makefile
+++ b/drivers/crypto/stm32/Makefile
@@ -1,2 +1 @@
-obj-$(CONFIG_CRYPTO_DEV_STM32) += stm32_cryp.o
-stm32_cryp-objs := stm32_crc32.o
+obj-$(CONFIG_CRC_DEV_STM32) += stm32_crc32.o

From 7bb952cdfae25f4d9d4dd8c4812c302f57aac4b6 Mon Sep 17 00:00:00 2001
From: "lionel.debieve@st.com" <lionel.debieve@st.com>
Date: Thu, 13 Jul 2017 15:32:26 +0200
Subject: [PATCH 045/116] dt-bindings: Document STM32 HASH bindings

This adds documentation of device tree bindings for the STM32
HASH controller.

Signed-off-by: Lionel Debieve <lionel.debieve@st.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../bindings/crypto/st,stm32-hash.txt         | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/crypto/st,stm32-hash.txt

diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-hash.txt b/Documentation/devicetree/bindings/crypto/st,stm32-hash.txt
new file mode 100644
index 0000000000000..04fc246f02f79
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/st,stm32-hash.txt
@@ -0,0 +1,30 @@
+* STMicroelectronics STM32 HASH
+
+Required properties:
+- compatible: Should contain entries for this and backward compatible
+  HASH versions:
+  - "st,stm32f456-hash" for stm32 F456.
+  - "st,stm32f756-hash" for stm32 F756.
+- reg: The address and length of the peripheral registers space
+- interrupts: the interrupt specifier for the HASH
+- clocks: The input clock of the HASH instance
+
+Optional properties:
+- resets: The input reset of the HASH instance
+- dmas: DMA specifiers for the HASH. See the DMA client binding,
+	 Documentation/devicetree/bindings/dma/dma.txt
+- dma-names: DMA request name. Should be "in" if a dma is present.
+- dma-maxburst: Set number of maximum dma burst supported
+
+Example:
+
+hash1: hash@50060400 {
+	compatible = "st,stm32f756-hash";
+	reg = <0x50060400 0x400>;
+	interrupts = <80>;
+	clocks = <&rcc 0 STM32F7_AHB2_CLOCK(HASH)>;
+	resets = <&rcc STM32F7_AHB2_RESET(HASH)>;
+	dmas = <&dma2 7 2 0x400 0x0>;
+	dma-names = "in";
+	dma-maxburst = <0>;
+};

From 8a1012d3f2abcef43470f879dbfd72651818e059 Mon Sep 17 00:00:00 2001
From: "lionel.debieve@st.com" <lionel.debieve@st.com>
Date: Thu, 13 Jul 2017 15:32:27 +0200
Subject: [PATCH 046/116] crypto: stm32 - Support for STM32 HASH module

This module register a HASH module that support multiples
algorithms: MD5, SHA1, SHA224, SHA256.

It includes the support of HMAC hardware processing corresponding
to the supported algorithms. DMA or IRQ mode are used depending
on data length.

Signed-off-by: Lionel Debieve <lionel.debieve@st.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/stm32/Kconfig      |   13 +
 drivers/crypto/stm32/Makefile     |    1 +
 drivers/crypto/stm32/stm32-hash.c | 1575 +++++++++++++++++++++++++++++
 3 files changed, 1589 insertions(+)
 create mode 100644 drivers/crypto/stm32/stm32-hash.c

diff --git a/drivers/crypto/stm32/Kconfig b/drivers/crypto/stm32/Kconfig
index 7dd14f8d294ef..602332e027296 100644
--- a/drivers/crypto/stm32/Kconfig
+++ b/drivers/crypto/stm32/Kconfig
@@ -5,3 +5,16 @@ config CRC_DEV_STM32
 	help
           This enables support for the CRC32 hw accelerator which can be found
 	  on STMicroelectronics STM32 SOC.
+
+config HASH_DEV_STM32
+	tristate "Support for STM32 hash accelerators"
+	depends on ARCH_STM32
+	depends on HAS_DMA
+	select CRYPTO_HASH
+	select CRYPTO_MD5
+	select CRYPTO_SHA1
+	select CRYPTO_SHA256
+	select CRYPTO_ENGINE
+	help
+          This enables support for the HASH hw accelerator which can be found
+	  on STMicroelectronics STM32 SOC.
diff --git a/drivers/crypto/stm32/Makefile b/drivers/crypto/stm32/Makefile
index 4db2f283068d7..73cd56cad0cc1 100644
--- a/drivers/crypto/stm32/Makefile
+++ b/drivers/crypto/stm32/Makefile
@@ -1 +1,2 @@
 obj-$(CONFIG_CRC_DEV_STM32) += stm32_crc32.o
+obj-$(CONFIG_HASH_DEV_STM32) += stm32-hash.o
\ No newline at end of file
diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c
new file mode 100644
index 0000000000000..b585ce54a8028
--- /dev/null
+++ b/drivers/crypto/stm32/stm32-hash.c
@@ -0,0 +1,1575 @@
+/*
+ * This file is part of STM32 Crypto driver for Linux.
+ *
+ * Copyright (C) 2017, STMicroelectronics - All Rights Reserved
+ * Author(s): Lionel DEBIEVE <lionel.debieve@st.com> for STMicroelectronics.
+ *
+ * License terms: GPL V2.0.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/crypto.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+
+#include <crypto/engine.h>
+#include <crypto/hash.h>
+#include <crypto/md5.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/sha.h>
+#include <crypto/internal/hash.h>
+
+#define HASH_CR				0x00
+#define HASH_DIN			0x04
+#define HASH_STR			0x08
+#define HASH_IMR			0x20
+#define HASH_SR				0x24
+#define HASH_CSR(x)			(0x0F8 + ((x) * 0x04))
+#define HASH_HREG(x)			(0x310 + ((x) * 0x04))
+#define HASH_HWCFGR			0x3F0
+#define HASH_VER			0x3F4
+#define HASH_ID				0x3F8
+
+/* Control Register */
+#define HASH_CR_INIT			BIT(2)
+#define HASH_CR_DMAE			BIT(3)
+#define HASH_CR_DATATYPE_POS		4
+#define HASH_CR_MODE			BIT(6)
+#define HASH_CR_MDMAT			BIT(13)
+#define HASH_CR_DMAA			BIT(14)
+#define HASH_CR_LKEY			BIT(16)
+
+#define HASH_CR_ALGO_SHA1		0x0
+#define HASH_CR_ALGO_MD5		0x80
+#define HASH_CR_ALGO_SHA224		0x40000
+#define HASH_CR_ALGO_SHA256		0x40080
+
+/* Interrupt */
+#define HASH_DINIE			BIT(0)
+#define HASH_DCIE			BIT(1)
+
+/* Interrupt Mask */
+#define HASH_MASK_CALC_COMPLETION	BIT(0)
+#define HASH_MASK_DATA_INPUT		BIT(1)
+
+/* Context swap register */
+#define HASH_CSR_REGISTER_NUMBER	53
+
+/* Status Flags */
+#define HASH_SR_DATA_INPUT_READY	BIT(0)
+#define HASH_SR_OUTPUT_READY		BIT(1)
+#define HASH_SR_DMA_ACTIVE		BIT(2)
+#define HASH_SR_BUSY			BIT(3)
+
+/* STR Register */
+#define HASH_STR_NBLW_MASK		GENMASK(4, 0)
+#define HASH_STR_DCAL			BIT(8)
+
+#define HASH_FLAGS_INIT			BIT(0)
+#define HASH_FLAGS_OUTPUT_READY		BIT(1)
+#define HASH_FLAGS_CPU			BIT(2)
+#define HASH_FLAGS_DMA_READY		BIT(3)
+#define HASH_FLAGS_DMA_ACTIVE		BIT(4)
+#define HASH_FLAGS_HMAC_INIT		BIT(5)
+#define HASH_FLAGS_HMAC_FINAL		BIT(6)
+#define HASH_FLAGS_HMAC_KEY		BIT(7)
+
+#define HASH_FLAGS_FINAL		BIT(15)
+#define HASH_FLAGS_FINUP		BIT(16)
+#define HASH_FLAGS_ALGO_MASK		GENMASK(21, 18)
+#define HASH_FLAGS_MD5			BIT(18)
+#define HASH_FLAGS_SHA1			BIT(19)
+#define HASH_FLAGS_SHA224		BIT(20)
+#define HASH_FLAGS_SHA256		BIT(21)
+#define HASH_FLAGS_ERRORS		BIT(22)
+#define HASH_FLAGS_HMAC			BIT(23)
+
+#define HASH_OP_UPDATE			1
+#define HASH_OP_FINAL			2
+
+enum stm32_hash_data_format {
+	HASH_DATA_32_BITS		= 0x0,
+	HASH_DATA_16_BITS		= 0x1,
+	HASH_DATA_8_BITS		= 0x2,
+	HASH_DATA_1_BIT			= 0x3
+};
+
+#define HASH_BUFLEN			256
+#define HASH_LONG_KEY			64
+#define HASH_MAX_KEY_SIZE		(SHA256_BLOCK_SIZE * 8)
+#define HASH_QUEUE_LENGTH		16
+#define HASH_DMA_THRESHOLD		50
+
+struct stm32_hash_ctx {
+	struct stm32_hash_dev	*hdev;
+	unsigned long		flags;
+
+	u8			key[HASH_MAX_KEY_SIZE];
+	int			keylen;
+};
+
+struct stm32_hash_request_ctx {
+	struct stm32_hash_dev	*hdev;
+	unsigned long		flags;
+	unsigned long		op;
+
+	u8 digest[SHA256_DIGEST_SIZE] __aligned(sizeof(u32));
+	size_t			digcnt;
+	size_t			bufcnt;
+	size_t			buflen;
+
+	/* DMA */
+	struct scatterlist	*sg;
+	unsigned int		offset;
+	unsigned int		total;
+	struct scatterlist	sg_key;
+
+	dma_addr_t		dma_addr;
+	size_t			dma_ct;
+	int			nents;
+
+	u8			data_type;
+
+	u8 buffer[HASH_BUFLEN] __aligned(sizeof(u32));
+
+	/* Export Context */
+	u32			*hw_context;
+};
+
+struct stm32_hash_algs_info {
+	struct ahash_alg	*algs_list;
+	size_t			size;
+};
+
+struct stm32_hash_pdata {
+	struct stm32_hash_algs_info	*algs_info;
+	size_t				algs_info_size;
+};
+
+struct stm32_hash_dev {
+	struct list_head	list;
+	struct device		*dev;
+	struct clk		*clk;
+	struct reset_control	*rst;
+	void __iomem		*io_base;
+	phys_addr_t		phys_base;
+	u32			dma_mode;
+	u32			dma_maxburst;
+
+	spinlock_t		lock; /* lock to protect queue */
+
+	struct ahash_request	*req;
+	struct crypto_engine	*engine;
+
+	int			err;
+	unsigned long		flags;
+
+	struct dma_chan		*dma_lch;
+	struct completion	dma_completion;
+
+	const struct stm32_hash_pdata	*pdata;
+};
+
+struct stm32_hash_drv {
+	struct list_head	dev_list;
+	spinlock_t		lock; /* List protection access */
+};
+
+static struct stm32_hash_drv stm32_hash = {
+	.dev_list = LIST_HEAD_INIT(stm32_hash.dev_list),
+	.lock = __SPIN_LOCK_UNLOCKED(stm32_hash.lock),
+};
+
+static void stm32_hash_dma_callback(void *param);
+
+static inline u32 stm32_hash_read(struct stm32_hash_dev *hdev, u32 offset)
+{
+	return readl_relaxed(hdev->io_base + offset);
+}
+
+static inline void stm32_hash_write(struct stm32_hash_dev *hdev,
+				    u32 offset, u32 value)
+{
+	writel_relaxed(value, hdev->io_base + offset);
+}
+
+static inline int stm32_hash_wait_busy(struct stm32_hash_dev *hdev)
+{
+	u32 status;
+
+	return readl_relaxed_poll_timeout(hdev->io_base + HASH_SR, status,
+				   !(status & HASH_SR_BUSY), 10, 10000);
+}
+
+static void stm32_hash_set_nblw(struct stm32_hash_dev *hdev, int length)
+{
+	u32 reg;
+
+	reg = stm32_hash_read(hdev, HASH_STR);
+	reg &= ~(HASH_STR_NBLW_MASK);
+	reg |= (8U * ((length) % 4U));
+	stm32_hash_write(hdev, HASH_STR, reg);
+}
+
+static int stm32_hash_write_key(struct stm32_hash_dev *hdev)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(hdev->req);
+	struct stm32_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	u32 reg;
+	int keylen = ctx->keylen;
+	void *key = ctx->key;
+
+	if (keylen) {
+		stm32_hash_set_nblw(hdev, keylen);
+
+		while (keylen > 0) {
+			stm32_hash_write(hdev, HASH_DIN, *(u32 *)key);
+			keylen -= 4;
+			key += 4;
+		}
+
+		reg = stm32_hash_read(hdev, HASH_STR);
+		reg |= HASH_STR_DCAL;
+		stm32_hash_write(hdev, HASH_STR, reg);
+
+		return -EINPROGRESS;
+	}
+
+	return 0;
+}
+
+static void stm32_hash_write_ctrl(struct stm32_hash_dev *hdev)
+{
+	struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(hdev->req);
+	struct stm32_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+	u32 reg = HASH_CR_INIT;
+
+	if (!(hdev->flags & HASH_FLAGS_INIT)) {
+		switch (rctx->flags & HASH_FLAGS_ALGO_MASK) {
+		case HASH_FLAGS_MD5:
+			reg |= HASH_CR_ALGO_MD5;
+			break;
+		case HASH_FLAGS_SHA1:
+			reg |= HASH_CR_ALGO_SHA1;
+			break;
+		case HASH_FLAGS_SHA224:
+			reg |= HASH_CR_ALGO_SHA224;
+			break;
+		case HASH_FLAGS_SHA256:
+			reg |= HASH_CR_ALGO_SHA256;
+			break;
+		default:
+			reg |= HASH_CR_ALGO_MD5;
+		}
+
+		reg |= (rctx->data_type << HASH_CR_DATATYPE_POS);
+
+		if (rctx->flags & HASH_FLAGS_HMAC) {
+			hdev->flags |= HASH_FLAGS_HMAC;
+			reg |= HASH_CR_MODE;
+			if (ctx->keylen > HASH_LONG_KEY)
+				reg |= HASH_CR_LKEY;
+		}
+
+		stm32_hash_write(hdev, HASH_IMR, HASH_DCIE);
+
+		stm32_hash_write(hdev, HASH_CR, reg);
+
+		hdev->flags |= HASH_FLAGS_INIT;
+
+		dev_dbg(hdev->dev, "Write Control %x\n", reg);
+	}
+}
+
+static void stm32_hash_append_sg(struct stm32_hash_request_ctx *rctx)
+{
+	size_t count;
+
+	while ((rctx->bufcnt < rctx->buflen) && rctx->total) {
+		count = min(rctx->sg->length - rctx->offset, rctx->total);
+		count = min(count, rctx->buflen - rctx->bufcnt);
+
+		if (count <= 0) {
+			if ((rctx->sg->length == 0) && !sg_is_last(rctx->sg)) {
+				rctx->sg = sg_next(rctx->sg);
+				continue;
+			} else {
+				break;
+			}
+		}
+
+		scatterwalk_map_and_copy(rctx->buffer + rctx->bufcnt, rctx->sg,
+					 rctx->offset, count, 0);
+
+		rctx->bufcnt += count;
+		rctx->offset += count;
+		rctx->total -= count;
+
+		if (rctx->offset == rctx->sg->length) {
+			rctx->sg = sg_next(rctx->sg);
+			if (rctx->sg)
+				rctx->offset = 0;
+			else
+				rctx->total = 0;
+		}
+	}
+}
+
+static int stm32_hash_xmit_cpu(struct stm32_hash_dev *hdev,
+			       const u8 *buf, size_t length, int final)
+{
+	unsigned int count, len32;
+	const u32 *buffer = (const u32 *)buf;
+	u32 reg;
+
+	if (final)
+		hdev->flags |= HASH_FLAGS_FINAL;
+
+	len32 = DIV_ROUND_UP(length, sizeof(u32));
+
+	dev_dbg(hdev->dev, "%s: length: %d, final: %x len32 %i\n",
+		__func__, length, final, len32);
+
+	hdev->flags |= HASH_FLAGS_CPU;
+
+	stm32_hash_write_ctrl(hdev);
+
+	if (stm32_hash_wait_busy(hdev))
+		return -ETIMEDOUT;
+
+	if ((hdev->flags & HASH_FLAGS_HMAC) &&
+	    (hdev->flags & ~HASH_FLAGS_HMAC_KEY)) {
+		hdev->flags |= HASH_FLAGS_HMAC_KEY;
+		stm32_hash_write_key(hdev);
+		if (stm32_hash_wait_busy(hdev))
+			return -ETIMEDOUT;
+	}
+
+	for (count = 0; count < len32; count++)
+		stm32_hash_write(hdev, HASH_DIN, buffer[count]);
+
+	if (final) {
+		stm32_hash_set_nblw(hdev, length);
+		reg = stm32_hash_read(hdev, HASH_STR);
+		reg |= HASH_STR_DCAL;
+		stm32_hash_write(hdev, HASH_STR, reg);
+		if (hdev->flags & HASH_FLAGS_HMAC) {
+			if (stm32_hash_wait_busy(hdev))
+				return -ETIMEDOUT;
+			stm32_hash_write_key(hdev);
+		}
+		return -EINPROGRESS;
+	}
+
+	return 0;
+}
+
+static int stm32_hash_update_cpu(struct stm32_hash_dev *hdev)
+{
+	struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req);
+	int bufcnt, err = 0, final;
+
+	dev_dbg(hdev->dev, "%s flags %lx\n", __func__, rctx->flags);
+
+	final = (rctx->flags & HASH_FLAGS_FINUP);
+
+	while ((rctx->total >= rctx->buflen) ||
+	       (rctx->bufcnt + rctx->total >= rctx->buflen)) {
+		stm32_hash_append_sg(rctx);
+		bufcnt = rctx->bufcnt;
+		rctx->bufcnt = 0;
+		err = stm32_hash_xmit_cpu(hdev, rctx->buffer, bufcnt, 0);
+	}
+
+	stm32_hash_append_sg(rctx);
+
+	if (final) {
+		bufcnt = rctx->bufcnt;
+		rctx->bufcnt = 0;
+		err = stm32_hash_xmit_cpu(hdev, rctx->buffer, bufcnt,
+					  (rctx->flags & HASH_FLAGS_FINUP));
+	}
+
+	return err;
+}
+
+static int stm32_hash_xmit_dma(struct stm32_hash_dev *hdev,
+			       struct scatterlist *sg, int length, int mdma)
+{
+	struct dma_async_tx_descriptor *in_desc;
+	dma_cookie_t cookie;
+	u32 reg;
+	int err;
+
+	in_desc = dmaengine_prep_slave_sg(hdev->dma_lch, sg, 1,
+					  DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT |
+					  DMA_CTRL_ACK);
+	if (!in_desc) {
+		dev_err(hdev->dev, "dmaengine_prep_slave error\n");
+		return -ENOMEM;
+	}
+
+	reinit_completion(&hdev->dma_completion);
+	in_desc->callback = stm32_hash_dma_callback;
+	in_desc->callback_param = hdev;
+
+	hdev->flags |= HASH_FLAGS_FINAL;
+	hdev->flags |= HASH_FLAGS_DMA_ACTIVE;
+
+	reg = stm32_hash_read(hdev, HASH_CR);
+
+	if (mdma)
+		reg |= HASH_CR_MDMAT;
+	else
+		reg &= ~HASH_CR_MDMAT;
+
+	reg |= HASH_CR_DMAE;
+
+	stm32_hash_write(hdev, HASH_CR, reg);
+
+	stm32_hash_set_nblw(hdev, length);
+
+	cookie = dmaengine_submit(in_desc);
+	err = dma_submit_error(cookie);
+	if (err)
+		return -ENOMEM;
+
+	dma_async_issue_pending(hdev->dma_lch);
+
+	if (!wait_for_completion_interruptible_timeout(&hdev->dma_completion,
+						       msecs_to_jiffies(100)))
+		err = -ETIMEDOUT;
+
+	if (dma_async_is_tx_complete(hdev->dma_lch, cookie,
+				     NULL, NULL) != DMA_COMPLETE)
+		err = -ETIMEDOUT;
+
+	if (err) {
+		dev_err(hdev->dev, "DMA Error %i\n", err);
+		dmaengine_terminate_all(hdev->dma_lch);
+		return err;
+	}
+
+	return -EINPROGRESS;
+}
+
+static void stm32_hash_dma_callback(void *param)
+{
+	struct stm32_hash_dev *hdev = param;
+
+	complete(&hdev->dma_completion);
+
+	hdev->flags |= HASH_FLAGS_DMA_READY;
+}
+
+static int stm32_hash_hmac_dma_send(struct stm32_hash_dev *hdev)
+{
+	struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(hdev->req);
+	struct stm32_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	int err;
+
+	if (ctx->keylen < HASH_DMA_THRESHOLD || (hdev->dma_mode == 1)) {
+		err = stm32_hash_write_key(hdev);
+		if (stm32_hash_wait_busy(hdev))
+			return -ETIMEDOUT;
+	} else {
+		if (!(hdev->flags & HASH_FLAGS_HMAC_KEY))
+			sg_init_one(&rctx->sg_key, ctx->key,
+				    ALIGN(ctx->keylen, sizeof(u32)));
+
+		rctx->dma_ct = dma_map_sg(hdev->dev, &rctx->sg_key, 1,
+					  DMA_TO_DEVICE);
+		if (rctx->dma_ct == 0) {
+			dev_err(hdev->dev, "dma_map_sg error\n");
+			return -ENOMEM;
+		}
+
+		err = stm32_hash_xmit_dma(hdev, &rctx->sg_key, ctx->keylen, 0);
+
+		dma_unmap_sg(hdev->dev, &rctx->sg_key, 1, DMA_TO_DEVICE);
+	}
+
+	return err;
+}
+
+static int stm32_hash_dma_init(struct stm32_hash_dev *hdev)
+{
+	struct dma_slave_config dma_conf;
+	int err;
+
+	memset(&dma_conf, 0, sizeof(dma_conf));
+
+	dma_conf.direction = DMA_MEM_TO_DEV;
+	dma_conf.dst_addr = hdev->phys_base + HASH_DIN;
+	dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	dma_conf.src_maxburst = hdev->dma_maxburst;
+	dma_conf.dst_maxburst = hdev->dma_maxburst;
+	dma_conf.device_fc = false;
+
+	hdev->dma_lch = dma_request_slave_channel(hdev->dev, "in");
+	if (!hdev->dma_lch) {
+		dev_err(hdev->dev, "Couldn't acquire a slave DMA channel.\n");
+		return -EBUSY;
+	}
+
+	err = dmaengine_slave_config(hdev->dma_lch, &dma_conf);
+	if (err) {
+		dma_release_channel(hdev->dma_lch);
+		hdev->dma_lch = NULL;
+		dev_err(hdev->dev, "Couldn't configure DMA slave.\n");
+		return err;
+	}
+
+	init_completion(&hdev->dma_completion);
+
+	return 0;
+}
+
+static int stm32_hash_dma_send(struct stm32_hash_dev *hdev)
+{
+	struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req);
+	struct scatterlist sg[1], *tsg;
+	int err = 0, len = 0, reg, ncp;
+	unsigned int i;
+	const u32 *buffer = (const u32 *)rctx->buffer;
+
+	rctx->sg = hdev->req->src;
+	rctx->total = hdev->req->nbytes;
+
+	rctx->nents = sg_nents(rctx->sg);
+
+	if (rctx->nents < 0)
+		return -EINVAL;
+
+	stm32_hash_write_ctrl(hdev);
+
+	if (hdev->flags & HASH_FLAGS_HMAC) {
+		err = stm32_hash_hmac_dma_send(hdev);
+		if (err != -EINPROGRESS)
+			return err;
+	}
+
+	for_each_sg(rctx->sg, tsg, rctx->nents, i) {
+		len = sg->length;
+
+		sg[0] = *tsg;
+		if (sg_is_last(sg)) {
+			if (hdev->dma_mode == 1) {
+				len = (ALIGN(sg->length, 16) - 16);
+
+				ncp = sg_pcopy_to_buffer(
+					rctx->sg, rctx->nents,
+					rctx->buffer, sg->length - len,
+					rctx->total - sg->length + len);
+
+				sg->length = len;
+			} else {
+				if (!(IS_ALIGNED(sg->length, sizeof(u32)))) {
+					len = sg->length;
+					sg->length = ALIGN(sg->length,
+							   sizeof(u32));
+				}
+			}
+		}
+
+		rctx->dma_ct = dma_map_sg(hdev->dev, sg, 1,
+					  DMA_TO_DEVICE);
+		if (rctx->dma_ct == 0) {
+			dev_err(hdev->dev, "dma_map_sg error\n");
+			return -ENOMEM;
+		}
+
+		err = stm32_hash_xmit_dma(hdev, sg, len,
+					  !sg_is_last(sg));
+
+		dma_unmap_sg(hdev->dev, sg, 1, DMA_TO_DEVICE);
+
+		if (err == -ENOMEM)
+			return err;
+	}
+
+	if (hdev->dma_mode == 1) {
+		if (stm32_hash_wait_busy(hdev))
+			return -ETIMEDOUT;
+		reg = stm32_hash_read(hdev, HASH_CR);
+		reg &= ~HASH_CR_DMAE;
+		reg |= HASH_CR_DMAA;
+		stm32_hash_write(hdev, HASH_CR, reg);
+
+		for (i = 0; i < DIV_ROUND_UP(ncp, sizeof(u32)); i++)
+			stm32_hash_write(hdev, HASH_DIN, buffer[i]);
+
+		stm32_hash_set_nblw(hdev, ncp);
+		reg = stm32_hash_read(hdev, HASH_STR);
+		reg |= HASH_STR_DCAL;
+		stm32_hash_write(hdev, HASH_STR, reg);
+		err = -EINPROGRESS;
+	}
+
+	if (hdev->flags & HASH_FLAGS_HMAC) {
+		if (stm32_hash_wait_busy(hdev))
+			return -ETIMEDOUT;
+		err = stm32_hash_hmac_dma_send(hdev);
+	}
+
+	return err;
+}
+
+static struct stm32_hash_dev *stm32_hash_find_dev(struct stm32_hash_ctx *ctx)
+{
+	struct stm32_hash_dev *hdev = NULL, *tmp;
+
+	spin_lock_bh(&stm32_hash.lock);
+	if (!ctx->hdev) {
+		list_for_each_entry(tmp, &stm32_hash.dev_list, list) {
+			hdev = tmp;
+			break;
+		}
+		ctx->hdev = hdev;
+	} else {
+		hdev = ctx->hdev;
+	}
+
+	spin_unlock_bh(&stm32_hash.lock);
+
+	return hdev;
+}
+
+static bool stm32_hash_dma_aligned_data(struct ahash_request *req)
+{
+	struct scatterlist *sg;
+	struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
+	struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx);
+	int i;
+
+	if (req->nbytes <= HASH_DMA_THRESHOLD)
+		return false;
+
+	if (sg_nents(req->src) > 1) {
+		if (hdev->dma_mode == 1)
+			return false;
+		for_each_sg(req->src, sg, sg_nents(req->src), i) {
+			if ((!IS_ALIGNED(sg->length, sizeof(u32))) &&
+			    (!sg_is_last(sg)))
+				return false;
+		}
+	}
+
+	if (req->src->offset % 4)
+		return false;
+
+	return true;
+}
+
+static int stm32_hash_init(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct stm32_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req);
+	struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx);
+
+	rctx->hdev = hdev;
+
+	rctx->flags = HASH_FLAGS_CPU;
+
+	rctx->digcnt = crypto_ahash_digestsize(tfm);
+	switch (rctx->digcnt) {
+	case MD5_DIGEST_SIZE:
+		rctx->flags |= HASH_FLAGS_MD5;
+		break;
+	case SHA1_DIGEST_SIZE:
+		rctx->flags |= HASH_FLAGS_SHA1;
+		break;
+	case SHA224_DIGEST_SIZE:
+		rctx->flags |= HASH_FLAGS_SHA224;
+		break;
+	case SHA256_DIGEST_SIZE:
+		rctx->flags |= HASH_FLAGS_SHA256;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	rctx->bufcnt = 0;
+	rctx->buflen = HASH_BUFLEN;
+	rctx->total = 0;
+	rctx->offset = 0;
+	rctx->data_type = HASH_DATA_8_BITS;
+
+	memset(rctx->buffer, 0, HASH_BUFLEN);
+
+	if (ctx->flags & HASH_FLAGS_HMAC)
+		rctx->flags |= HASH_FLAGS_HMAC;
+
+	dev_dbg(hdev->dev, "%s Flags %lx\n", __func__, rctx->flags);
+
+	return 0;
+}
+
+static int stm32_hash_update_req(struct stm32_hash_dev *hdev)
+{
+	return stm32_hash_update_cpu(hdev);
+}
+
+static int stm32_hash_final_req(struct stm32_hash_dev *hdev)
+{
+	struct ahash_request *req = hdev->req;
+	struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req);
+	int err;
+
+	if (!(rctx->flags & HASH_FLAGS_CPU))
+		err = stm32_hash_dma_send(hdev);
+	else
+		err = stm32_hash_xmit_cpu(hdev, rctx->buffer, rctx->bufcnt, 1);
+
+	rctx->bufcnt = 0;
+
+	return err;
+}
+
+static void stm32_hash_copy_hash(struct ahash_request *req)
+{
+	struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req);
+	u32 *hash = (u32 *)rctx->digest;
+	unsigned int i, hashsize;
+
+	switch (rctx->flags & HASH_FLAGS_ALGO_MASK) {
+	case HASH_FLAGS_MD5:
+		hashsize = MD5_DIGEST_SIZE;
+		break;
+	case HASH_FLAGS_SHA1:
+		hashsize = SHA1_DIGEST_SIZE;
+		break;
+	case HASH_FLAGS_SHA224:
+		hashsize = SHA224_DIGEST_SIZE;
+		break;
+	case HASH_FLAGS_SHA256:
+		hashsize = SHA256_DIGEST_SIZE;
+		break;
+	default:
+		return;
+	}
+
+	for (i = 0; i < hashsize / sizeof(u32); i++)
+		hash[i] = be32_to_cpu(stm32_hash_read(rctx->hdev,
+						      HASH_HREG(i)));
+}
+
+static int stm32_hash_finish(struct ahash_request *req)
+{
+	struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req);
+
+	if (!req->result)
+		return -EINVAL;
+
+	memcpy(req->result, rctx->digest, rctx->digcnt);
+
+	return 0;
+}
+
+static void stm32_hash_finish_req(struct ahash_request *req, int err)
+{
+	struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req);
+	struct stm32_hash_dev *hdev = rctx->hdev;
+
+	if (!err && (HASH_FLAGS_FINAL & hdev->flags)) {
+		stm32_hash_copy_hash(req);
+		err = stm32_hash_finish(req);
+		hdev->flags &= ~(HASH_FLAGS_FINAL | HASH_FLAGS_CPU |
+				 HASH_FLAGS_INIT | HASH_FLAGS_DMA_READY |
+				 HASH_FLAGS_OUTPUT_READY | HASH_FLAGS_HMAC |
+				 HASH_FLAGS_HMAC_INIT | HASH_FLAGS_HMAC_FINAL |
+				 HASH_FLAGS_HMAC_KEY);
+	} else {
+		rctx->flags |= HASH_FLAGS_ERRORS;
+	}
+
+	crypto_finalize_hash_request(hdev->engine, req, err);
+}
+
+static int stm32_hash_hw_init(struct stm32_hash_dev *hdev,
+			      struct stm32_hash_request_ctx *rctx)
+{
+	if (!(HASH_FLAGS_INIT & hdev->flags)) {
+		stm32_hash_write(hdev, HASH_CR, HASH_CR_INIT);
+		stm32_hash_write(hdev, HASH_STR, 0);
+		stm32_hash_write(hdev, HASH_DIN, 0);
+		stm32_hash_write(hdev, HASH_IMR, 0);
+		hdev->err = 0;
+	}
+
+	return 0;
+}
+
+static int stm32_hash_handle_queue(struct stm32_hash_dev *hdev,
+				   struct ahash_request *req)
+{
+	return crypto_transfer_hash_request_to_engine(hdev->engine, req);
+}
+
+static int stm32_hash_prepare_req(struct crypto_engine *engine,
+				  struct ahash_request *req)
+{
+	struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
+	struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx);
+	struct stm32_hash_request_ctx *rctx;
+
+	if (!hdev)
+		return -ENODEV;
+
+	hdev->req = req;
+
+	rctx = ahash_request_ctx(req);
+
+	dev_dbg(hdev->dev, "processing new req, op: %lu, nbytes %d\n",
+		rctx->op, req->nbytes);
+
+	return stm32_hash_hw_init(hdev, rctx);
+}
+
+static int stm32_hash_one_request(struct crypto_engine *engine,
+				  struct ahash_request *req)
+{
+	struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
+	struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx);
+	struct stm32_hash_request_ctx *rctx;
+	int err = 0;
+
+	if (!hdev)
+		return -ENODEV;
+
+	hdev->req = req;
+
+	rctx = ahash_request_ctx(req);
+
+	if (rctx->op == HASH_OP_UPDATE)
+		err = stm32_hash_update_req(hdev);
+	else if (rctx->op == HASH_OP_FINAL)
+		err = stm32_hash_final_req(hdev);
+
+	if (err != -EINPROGRESS)
+	/* done task will not finish it, so do it here */
+		stm32_hash_finish_req(req, err);
+
+	return 0;
+}
+
+static int stm32_hash_enqueue(struct ahash_request *req, unsigned int op)
+{
+	struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req);
+	struct stm32_hash_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct stm32_hash_dev *hdev = ctx->hdev;
+
+	rctx->op = op;
+
+	return stm32_hash_handle_queue(hdev, req);
+}
+
+static int stm32_hash_update(struct ahash_request *req)
+{
+	struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req);
+	int ret;
+
+	if (!req->nbytes || !(rctx->flags & HASH_FLAGS_CPU))
+		return 0;
+
+	rctx->total = req->nbytes;
+	rctx->sg = req->src;
+	rctx->offset = 0;
+
+	if ((rctx->bufcnt + rctx->total < rctx->buflen)) {
+		stm32_hash_append_sg(rctx);
+		return 0;
+	}
+
+	ret = stm32_hash_enqueue(req, HASH_OP_UPDATE);
+
+	if (rctx->flags & HASH_FLAGS_FINUP)
+		return ret;
+
+	return 0;
+}
+
+static int stm32_hash_final(struct ahash_request *req)
+{
+	struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req);
+
+	rctx->flags |= HASH_FLAGS_FINUP;
+
+	return stm32_hash_enqueue(req, HASH_OP_FINAL);
+}
+
+static int stm32_hash_finup(struct ahash_request *req)
+{
+	struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req);
+	struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
+	struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx);
+	int err1, err2;
+
+	rctx->flags |= HASH_FLAGS_FINUP;
+
+	if (hdev->dma_lch && stm32_hash_dma_aligned_data(req))
+		rctx->flags &= ~HASH_FLAGS_CPU;
+
+	err1 = stm32_hash_update(req);
+
+	if (err1 == -EINPROGRESS || err1 == -EBUSY)
+		return err1;
+
+	/*
+	 * final() has to be always called to cleanup resources
+	 * even if update() failed, except EINPROGRESS
+	 */
+	err2 = stm32_hash_final(req);
+
+	return err1 ?: err2;
+}
+
+static int stm32_hash_digest(struct ahash_request *req)
+{
+	return stm32_hash_init(req) ?: stm32_hash_finup(req);
+}
+
+static int stm32_hash_export(struct ahash_request *req, void *out)
+{
+	struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req);
+	struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
+	struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx);
+	u32 *preg;
+	unsigned int i;
+
+	while (!(stm32_hash_read(hdev, HASH_SR) & HASH_SR_DATA_INPUT_READY))
+		cpu_relax();
+
+	rctx->hw_context = kmalloc(sizeof(u32) * (3 + HASH_CSR_REGISTER_NUMBER),
+				   GFP_KERNEL);
+
+	preg = rctx->hw_context;
+
+	*preg++ = stm32_hash_read(hdev, HASH_IMR);
+	*preg++ = stm32_hash_read(hdev, HASH_STR);
+	*preg++ = stm32_hash_read(hdev, HASH_CR);
+	for (i = 0; i < HASH_CSR_REGISTER_NUMBER; i++)
+		*preg++ = stm32_hash_read(hdev, HASH_CSR(i));
+
+	memcpy(out, rctx, sizeof(*rctx));
+
+	return 0;
+}
+
+static int stm32_hash_import(struct ahash_request *req, const void *in)
+{
+	struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req);
+	struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
+	struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx);
+	const u32 *preg = in;
+	u32 reg;
+	unsigned int i;
+
+	memcpy(rctx, in, sizeof(*rctx));
+
+	preg = rctx->hw_context;
+
+	stm32_hash_write(hdev, HASH_IMR, *preg++);
+	stm32_hash_write(hdev, HASH_STR, *preg++);
+	stm32_hash_write(hdev, HASH_CR, *preg);
+	reg = *preg++ | HASH_CR_INIT;
+	stm32_hash_write(hdev, HASH_CR, reg);
+
+	for (i = 0; i < HASH_CSR_REGISTER_NUMBER; i++)
+		stm32_hash_write(hdev, HASH_CSR(i), *preg++);
+
+	kfree(rctx->hw_context);
+
+	return 0;
+}
+
+static int stm32_hash_setkey(struct crypto_ahash *tfm,
+			     const u8 *key, unsigned int keylen)
+{
+	struct stm32_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+	if (keylen <= HASH_MAX_KEY_SIZE) {
+		memcpy(ctx->key, key, keylen);
+		ctx->keylen = keylen;
+	} else {
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int stm32_hash_cra_init_algs(struct crypto_tfm *tfm,
+				    const char *algs_hmac_name)
+{
+	struct stm32_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct stm32_hash_request_ctx));
+
+	ctx->keylen = 0;
+
+	if (algs_hmac_name)
+		ctx->flags |= HASH_FLAGS_HMAC;
+
+	return 0;
+}
+
+static int stm32_hash_cra_init(struct crypto_tfm *tfm)
+{
+	return stm32_hash_cra_init_algs(tfm, NULL);
+}
+
+static int stm32_hash_cra_md5_init(struct crypto_tfm *tfm)
+{
+	return stm32_hash_cra_init_algs(tfm, "md5");
+}
+
+static int stm32_hash_cra_sha1_init(struct crypto_tfm *tfm)
+{
+	return stm32_hash_cra_init_algs(tfm, "sha1");
+}
+
+static int stm32_hash_cra_sha224_init(struct crypto_tfm *tfm)
+{
+	return stm32_hash_cra_init_algs(tfm, "sha224");
+}
+
+static int stm32_hash_cra_sha256_init(struct crypto_tfm *tfm)
+{
+	return stm32_hash_cra_init_algs(tfm, "sha256");
+}
+
+static irqreturn_t stm32_hash_irq_thread(int irq, void *dev_id)
+{
+	struct stm32_hash_dev *hdev = dev_id;
+	int err;
+
+	if (HASH_FLAGS_CPU & hdev->flags) {
+		if (HASH_FLAGS_OUTPUT_READY & hdev->flags) {
+			hdev->flags &= ~HASH_FLAGS_OUTPUT_READY;
+			goto finish;
+		}
+	} else if (HASH_FLAGS_DMA_READY & hdev->flags) {
+		if (HASH_FLAGS_DMA_ACTIVE & hdev->flags) {
+			hdev->flags &= ~HASH_FLAGS_DMA_ACTIVE;
+				goto finish;
+		}
+	}
+
+	return IRQ_HANDLED;
+
+finish:
+	/*Finish current request */
+	stm32_hash_finish_req(hdev->req, err);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t stm32_hash_irq_handler(int irq, void *dev_id)
+{
+	struct stm32_hash_dev *hdev = dev_id;
+	u32 reg;
+
+	reg = stm32_hash_read(hdev, HASH_SR);
+	if (reg & HASH_SR_OUTPUT_READY) {
+		reg &= ~HASH_SR_OUTPUT_READY;
+		stm32_hash_write(hdev, HASH_SR, reg);
+		hdev->flags |= HASH_FLAGS_OUTPUT_READY;
+		return IRQ_WAKE_THREAD;
+	}
+
+	return IRQ_NONE;
+}
+
+static struct ahash_alg algs_md5_sha1[] = {
+	{
+		.init = stm32_hash_init,
+		.update = stm32_hash_update,
+		.final = stm32_hash_final,
+		.finup = stm32_hash_finup,
+		.digest = stm32_hash_digest,
+		.export = stm32_hash_export,
+		.import = stm32_hash_import,
+		.halg = {
+			.digestsize = MD5_DIGEST_SIZE,
+			.statesize = sizeof(struct stm32_hash_request_ctx),
+			.base = {
+				.cra_name = "md5",
+				.cra_driver_name = "stm32-md5",
+				.cra_priority = 200,
+				.cra_flags = CRYPTO_ALG_TYPE_AHASH |
+					CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct stm32_hash_ctx),
+				.cra_alignmask = 3,
+				.cra_init = stm32_hash_cra_init,
+				.cra_module = THIS_MODULE,
+			}
+		}
+	},
+	{
+		.init = stm32_hash_init,
+		.update = stm32_hash_update,
+		.final = stm32_hash_final,
+		.finup = stm32_hash_finup,
+		.digest = stm32_hash_digest,
+		.export = stm32_hash_export,
+		.import = stm32_hash_import,
+		.setkey = stm32_hash_setkey,
+		.halg = {
+			.digestsize = MD5_DIGEST_SIZE,
+			.statesize = sizeof(struct stm32_hash_request_ctx),
+			.base = {
+				.cra_name = "hmac(md5)",
+				.cra_driver_name = "stm32-hmac-md5",
+				.cra_priority = 200,
+				.cra_flags = CRYPTO_ALG_TYPE_AHASH |
+					CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct stm32_hash_ctx),
+				.cra_alignmask = 3,
+				.cra_init = stm32_hash_cra_md5_init,
+				.cra_module = THIS_MODULE,
+			}
+		}
+	},
+	{
+		.init = stm32_hash_init,
+		.update = stm32_hash_update,
+		.final = stm32_hash_final,
+		.finup = stm32_hash_finup,
+		.digest = stm32_hash_digest,
+		.export = stm32_hash_export,
+		.import = stm32_hash_import,
+		.halg = {
+			.digestsize = SHA1_DIGEST_SIZE,
+			.statesize = sizeof(struct stm32_hash_request_ctx),
+			.base = {
+				.cra_name = "sha1",
+				.cra_driver_name = "stm32-sha1",
+				.cra_priority = 200,
+				.cra_flags = CRYPTO_ALG_TYPE_AHASH |
+					CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA1_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct stm32_hash_ctx),
+				.cra_alignmask = 3,
+				.cra_init = stm32_hash_cra_init,
+				.cra_module = THIS_MODULE,
+			}
+		}
+	},
+	{
+		.init = stm32_hash_init,
+		.update = stm32_hash_update,
+		.final = stm32_hash_final,
+		.finup = stm32_hash_finup,
+		.digest = stm32_hash_digest,
+		.export = stm32_hash_export,
+		.import = stm32_hash_import,
+		.setkey = stm32_hash_setkey,
+		.halg = {
+			.digestsize = SHA1_DIGEST_SIZE,
+			.statesize = sizeof(struct stm32_hash_request_ctx),
+			.base = {
+				.cra_name = "hmac(sha1)",
+				.cra_driver_name = "stm32-hmac-sha1",
+				.cra_priority = 200,
+				.cra_flags = CRYPTO_ALG_TYPE_AHASH |
+					CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA1_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct stm32_hash_ctx),
+				.cra_alignmask = 3,
+				.cra_init = stm32_hash_cra_sha1_init,
+				.cra_module = THIS_MODULE,
+			}
+		}
+	},
+};
+
+static struct ahash_alg algs_sha224_sha256[] = {
+	{
+		.init = stm32_hash_init,
+		.update = stm32_hash_update,
+		.final = stm32_hash_final,
+		.finup = stm32_hash_finup,
+		.digest = stm32_hash_digest,
+		.export = stm32_hash_export,
+		.import = stm32_hash_import,
+		.halg = {
+			.digestsize = SHA224_DIGEST_SIZE,
+			.statesize = sizeof(struct stm32_hash_request_ctx),
+			.base = {
+				.cra_name = "sha224",
+				.cra_driver_name = "stm32-sha224",
+				.cra_priority = 200,
+				.cra_flags = CRYPTO_ALG_TYPE_AHASH |
+					CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA224_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct stm32_hash_ctx),
+				.cra_alignmask = 3,
+				.cra_init = stm32_hash_cra_init,
+				.cra_module = THIS_MODULE,
+			}
+		}
+	},
+	{
+		.init = stm32_hash_init,
+		.update = stm32_hash_update,
+		.final = stm32_hash_final,
+		.finup = stm32_hash_finup,
+		.digest = stm32_hash_digest,
+		.setkey = stm32_hash_setkey,
+		.export = stm32_hash_export,
+		.import = stm32_hash_import,
+		.halg = {
+			.digestsize = SHA224_DIGEST_SIZE,
+			.statesize = sizeof(struct stm32_hash_request_ctx),
+			.base = {
+				.cra_name = "hmac(sha224)",
+				.cra_driver_name = "stm32-hmac-sha224",
+				.cra_priority = 200,
+				.cra_flags = CRYPTO_ALG_TYPE_AHASH |
+					CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA224_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct stm32_hash_ctx),
+				.cra_alignmask = 3,
+				.cra_init = stm32_hash_cra_sha224_init,
+				.cra_module = THIS_MODULE,
+			}
+		}
+	},
+	{
+		.init = stm32_hash_init,
+		.update = stm32_hash_update,
+		.final = stm32_hash_final,
+		.finup = stm32_hash_finup,
+		.digest = stm32_hash_digest,
+		.export = stm32_hash_export,
+		.import = stm32_hash_import,
+		.halg = {
+			.digestsize = SHA256_DIGEST_SIZE,
+			.statesize = sizeof(struct stm32_hash_request_ctx),
+			.base = {
+				.cra_name = "sha256",
+				.cra_driver_name = "stm32-sha256",
+				.cra_priority = 200,
+				.cra_flags = CRYPTO_ALG_TYPE_AHASH |
+					CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA256_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct stm32_hash_ctx),
+				.cra_alignmask = 3,
+				.cra_init = stm32_hash_cra_init,
+				.cra_module = THIS_MODULE,
+			}
+		}
+	},
+	{
+		.init = stm32_hash_init,
+		.update = stm32_hash_update,
+		.final = stm32_hash_final,
+		.finup = stm32_hash_finup,
+		.digest = stm32_hash_digest,
+		.export = stm32_hash_export,
+		.import = stm32_hash_import,
+		.setkey = stm32_hash_setkey,
+		.halg = {
+			.digestsize = SHA256_DIGEST_SIZE,
+			.statesize = sizeof(struct stm32_hash_request_ctx),
+			.base = {
+				.cra_name = "hmac(sha256)",
+				.cra_driver_name = "stm32-hmac-sha256",
+				.cra_priority = 200,
+				.cra_flags = CRYPTO_ALG_TYPE_AHASH |
+					CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA256_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct stm32_hash_ctx),
+				.cra_alignmask = 3,
+				.cra_init = stm32_hash_cra_sha256_init,
+				.cra_module = THIS_MODULE,
+			}
+		}
+	},
+};
+
+static int stm32_hash_register_algs(struct stm32_hash_dev *hdev)
+{
+	unsigned int i, j;
+	int err;
+
+	for (i = 0; i < hdev->pdata->algs_info_size; i++) {
+		for (j = 0; j < hdev->pdata->algs_info[i].size; j++) {
+			err = crypto_register_ahash(
+				&hdev->pdata->algs_info[i].algs_list[j]);
+			if (err)
+				goto err_algs;
+		}
+	}
+
+	return 0;
+err_algs:
+	dev_err(hdev->dev, "Algo %d : %d failed\n", i, j);
+	for (; i--; ) {
+		for (; j--;)
+			crypto_unregister_ahash(
+				&hdev->pdata->algs_info[i].algs_list[j]);
+	}
+
+	return err;
+}
+
+static int stm32_hash_unregister_algs(struct stm32_hash_dev *hdev)
+{
+	unsigned int i, j;
+
+	for (i = 0; i < hdev->pdata->algs_info_size; i++) {
+		for (j = 0; j < hdev->pdata->algs_info[i].size; j++)
+			crypto_unregister_ahash(
+				&hdev->pdata->algs_info[i].algs_list[j]);
+	}
+
+	return 0;
+}
+
+static struct stm32_hash_algs_info stm32_hash_algs_info_stm32f4[] = {
+	{
+		.algs_list	= algs_md5_sha1,
+		.size		= ARRAY_SIZE(algs_md5_sha1),
+	},
+};
+
+static const struct stm32_hash_pdata stm32_hash_pdata_stm32f4 = {
+	.algs_info	= stm32_hash_algs_info_stm32f4,
+	.algs_info_size	= ARRAY_SIZE(stm32_hash_algs_info_stm32f4),
+};
+
+static struct stm32_hash_algs_info stm32_hash_algs_info_stm32f7[] = {
+	{
+		.algs_list	= algs_md5_sha1,
+		.size		= ARRAY_SIZE(algs_md5_sha1),
+	},
+	{
+		.algs_list	= algs_sha224_sha256,
+		.size		= ARRAY_SIZE(algs_sha224_sha256),
+	},
+};
+
+static const struct stm32_hash_pdata stm32_hash_pdata_stm32f7 = {
+	.algs_info	= stm32_hash_algs_info_stm32f7,
+	.algs_info_size	= ARRAY_SIZE(stm32_hash_algs_info_stm32f7),
+};
+
+static const struct of_device_id stm32_hash_of_match[] = {
+	{
+		.compatible = "st,stm32f456-hash",
+		.data = &stm32_hash_pdata_stm32f4,
+	},
+	{
+		.compatible = "st,stm32f756-hash",
+		.data = &stm32_hash_pdata_stm32f7,
+	},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, stm32_hash_of_match);
+
+static int stm32_hash_get_of_match(struct stm32_hash_dev *hdev,
+				   struct device *dev)
+{
+	const struct of_device_id *match;
+	int err;
+
+	match = of_match_device(stm32_hash_of_match, dev);
+	if (!match) {
+		dev_err(dev, "no compatible OF match\n");
+		return -EINVAL;
+	}
+
+	err = of_property_read_u32(dev->of_node, "dma-maxburst",
+				   &hdev->dma_maxburst);
+
+	hdev->pdata = match->data;
+
+	return err;
+}
+
+static int stm32_hash_probe(struct platform_device *pdev)
+{
+	struct stm32_hash_dev *hdev;
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	int ret, irq;
+
+	hdev = devm_kzalloc(dev, sizeof(*hdev), GFP_KERNEL);
+	if (!hdev)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	hdev->io_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(hdev->io_base))
+		return PTR_ERR(hdev->io_base);
+
+	hdev->phys_base = res->start;
+
+	ret = stm32_hash_get_of_match(hdev, dev);
+	if (ret)
+		return ret;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(dev, "Cannot get IRQ resource\n");
+		return irq;
+	}
+
+	ret = devm_request_threaded_irq(dev, irq, stm32_hash_irq_handler,
+					stm32_hash_irq_thread, IRQF_ONESHOT,
+					dev_name(dev), hdev);
+	if (ret) {
+		dev_err(dev, "Cannot grab IRQ\n");
+		return ret;
+	}
+
+	hdev->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(hdev->clk)) {
+		dev_err(dev, "failed to get clock for hash (%lu)\n",
+			PTR_ERR(hdev->clk));
+		return PTR_ERR(hdev->clk);
+	}
+
+	ret = clk_prepare_enable(hdev->clk);
+	if (ret) {
+		dev_err(dev, "failed to enable hash clock (%d)\n", ret);
+		return ret;
+	}
+
+	hdev->rst = devm_reset_control_get(&pdev->dev, NULL);
+	if (!IS_ERR(hdev->rst)) {
+		reset_control_assert(hdev->rst);
+		udelay(2);
+		reset_control_deassert(hdev->rst);
+	}
+
+	hdev->dev = dev;
+
+	platform_set_drvdata(pdev, hdev);
+
+	ret = stm32_hash_dma_init(hdev);
+	if (ret)
+		dev_dbg(dev, "DMA mode not available\n");
+
+	spin_lock(&stm32_hash.lock);
+	list_add_tail(&hdev->list, &stm32_hash.dev_list);
+	spin_unlock(&stm32_hash.lock);
+
+	/* Initialize crypto engine */
+	hdev->engine = crypto_engine_alloc_init(dev, 1);
+	if (!hdev->engine) {
+		ret = -ENOMEM;
+		goto err_engine;
+	}
+
+	hdev->engine->prepare_hash_request = stm32_hash_prepare_req;
+	hdev->engine->hash_one_request = stm32_hash_one_request;
+
+	ret = crypto_engine_start(hdev->engine);
+	if (ret)
+		goto err_engine_start;
+
+	hdev->dma_mode = stm32_hash_read(hdev, HASH_HWCFGR);
+
+	/* Register algos */
+	ret = stm32_hash_register_algs(hdev);
+	if (ret)
+		goto err_algs;
+
+	dev_info(dev, "Init HASH done HW ver %x DMA mode %u\n",
+		 stm32_hash_read(hdev, HASH_VER), hdev->dma_mode);
+
+	return 0;
+
+err_algs:
+err_engine_start:
+	crypto_engine_exit(hdev->engine);
+err_engine:
+	spin_lock(&stm32_hash.lock);
+	list_del(&hdev->list);
+	spin_unlock(&stm32_hash.lock);
+
+	if (hdev->dma_lch)
+		dma_release_channel(hdev->dma_lch);
+
+	clk_disable_unprepare(hdev->clk);
+
+	return ret;
+}
+
+static int stm32_hash_remove(struct platform_device *pdev)
+{
+	static struct stm32_hash_dev *hdev;
+
+	hdev = platform_get_drvdata(pdev);
+	if (!hdev)
+		return -ENODEV;
+
+	stm32_hash_unregister_algs(hdev);
+
+	crypto_engine_exit(hdev->engine);
+
+	spin_lock(&stm32_hash.lock);
+	list_del(&hdev->list);
+	spin_unlock(&stm32_hash.lock);
+
+	if (hdev->dma_lch)
+		dma_release_channel(hdev->dma_lch);
+
+	clk_disable_unprepare(hdev->clk);
+
+	return 0;
+}
+
+static struct platform_driver stm32_hash_driver = {
+	.probe		= stm32_hash_probe,
+	.remove		= stm32_hash_remove,
+	.driver		= {
+		.name	= "stm32-hash",
+		.of_match_table	= stm32_hash_of_match,
+	}
+};
+
+module_platform_driver(stm32_hash_driver);
+
+MODULE_DESCRIPTION("STM32 SHA1/224/256 & MD5 (HMAC) hw accelerator driver");
+MODULE_AUTHOR("Lionel Debieve <lionel.debieve@st.com>");
+MODULE_LICENSE("GPL v2");

From c2176f0098d994050c88d34d37c551d033f2f18f Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sun, 16 Jul 2017 19:22:06 +0200
Subject: [PATCH 047/116] crypto: rng - ensure that the RNG is ready before
 using

Otherwise, we might be seeding the RNG using bad randomness, which is
dangerous. The one use of this function from within the kernel -- not
from userspace -- is being removed (keys/big_key), so that call site
isn't relevant in assessing this.

Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/rng.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/crypto/rng.c b/crypto/rng.c
index 5e8469244960f..b4a618668161d 100644
--- a/crypto/rng.c
+++ b/crypto/rng.c
@@ -43,12 +43,14 @@ int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen)
 		if (!buf)
 			return -ENOMEM;
 
-		get_random_bytes(buf, slen);
+		err = get_random_bytes_wait(buf, slen);
+		if (err)
+			goto out;
 		seed = buf;
 	}
 
 	err = crypto_rng_alg(tfm)->seed(tfm, seed, slen);
-
+out:
 	kzfree(buf);
 	return err;
 }

From 68cc652f83b9a4f79471db6c79ae0bfe5175eda3 Mon Sep 17 00:00:00 2001
From: Gary R Hook <gary.hook@amd.com>
Date: Mon, 17 Jul 2017 15:00:49 -0500
Subject: [PATCH 048/116] crypto: ccp - Update copyright dates for 2017.

Some updates this year have not had copyright dates changed in modified
files. Correct this for 2017.

Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-crypto-aes-galois.c | 2 +-
 drivers/crypto/ccp/ccp-crypto-des3.c       | 2 +-
 drivers/crypto/ccp/ccp-crypto-main.c       | 2 +-
 drivers/crypto/ccp/ccp-crypto-sha.c        | 2 +-
 drivers/crypto/ccp/ccp-crypto.h            | 2 +-
 drivers/crypto/ccp/ccp-dev-v3.c            | 2 +-
 drivers/crypto/ccp/ccp-dev-v5.c            | 2 +-
 drivers/crypto/ccp/ccp-dev.c               | 2 +-
 drivers/crypto/ccp/ccp-dev.h               | 2 +-
 drivers/crypto/ccp/ccp-dmaengine.c         | 2 +-
 drivers/crypto/ccp/ccp-ops.c               | 2 +-
 11 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-crypto-aes-galois.c b/drivers/crypto/ccp/ccp-crypto-aes-galois.c
index 38ee6f348ea9e..52313524a4dd4 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-galois.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-galois.c
@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) AES GCM crypto API support
  *
- * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2016,2017 Advanced Micro Devices, Inc.
  *
  * Author: Gary R Hook <gary.hook@amd.com>
  *
diff --git a/drivers/crypto/ccp/ccp-crypto-des3.c b/drivers/crypto/ccp/ccp-crypto-des3.c
index 5af7347ae03cd..ae87b741f9d57 100644
--- a/drivers/crypto/ccp/ccp-crypto-des3.c
+++ b/drivers/crypto/ccp/ccp-crypto-des3.c
@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) DES3 crypto API support
  *
- * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2016,2017 Advanced Micro Devices, Inc.
  *
  * Author: Gary R Hook <ghook@amd.com>
  *
diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c
index 8dccbddabef19..78b9d26a381f7 100644
--- a/drivers/crypto/ccp/ccp-crypto-main.c
+++ b/drivers/crypto/ccp/ccp-crypto-main.c
@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) crypto API support
  *
- * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  *
diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c
index ce97b3868f4a6..8b9b16d433f7e 100644
--- a/drivers/crypto/ccp/ccp-crypto-sha.c
+++ b/drivers/crypto/ccp/ccp-crypto-sha.c
@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) SHA crypto API support
  *
- * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>
diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h
index dd5bf15f06e5c..156b8233853fb 100644
--- a/drivers/crypto/ccp/ccp-crypto.h
+++ b/drivers/crypto/ccp/ccp-crypto.h
@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) crypto API support
  *
- * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  *
diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c
index 695fde887c118..c2861749e2ad3 100644
--- a/drivers/crypto/ccp/ccp-dev-v3.c
+++ b/drivers/crypto/ccp/ccp-dev-v3.c
@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) driver
  *
- * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>
diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c
index b0391f03b0fe0..b3526336d6089 100644
--- a/drivers/crypto/ccp/ccp-dev-v5.c
+++ b/drivers/crypto/ccp/ccp-dev-v5.c
@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) driver
  *
- * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2016,2017 Advanced Micro Devices, Inc.
  *
  * Author: Gary R Hook <gary.hook@amd.com>
  *
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
index b2d176164402e..52eb3ed25a267 100644
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) driver
  *
- * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index b959e2402aa2c..9320931d89dae 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) driver
  *
- * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>
diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c
index e00be01fbf5a0..901343dd513eb 100644
--- a/drivers/crypto/ccp/ccp-dmaengine.c
+++ b/drivers/crypto/ccp/ccp-dmaengine.c
@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) driver
  *
- * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2016,2017 Advanced Micro Devices, Inc.
  *
  * Author: Gary R Hook <gary.hook@amd.com>
  *
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index 78f29d459df87..e23d138fc1ceb 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) driver
  *
- * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>

From 6ba46c7d4d7e2b4f38c9d9891dee4ba1f5338f31 Mon Sep 17 00:00:00 2001
From: Gary R Hook <gary.hook@amd.com>
Date: Mon, 17 Jul 2017 15:16:13 -0500
Subject: [PATCH 049/116] crypto: ccp - Fix base RSA function for version 5
 CCPs

Version 5 devices have requirements for buffer lengths, as well as
parameter format (e.g. bits vs. bytes). Fix the base CCP driver
code to meet requirements all supported versions.

Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-dev-v5.c | 10 ++---
 drivers/crypto/ccp/ccp-ops.c    | 78 +++++++++++++++++++++------------
 2 files changed, 54 insertions(+), 34 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c
index b3526336d6089..5f9e82beadfd2 100644
--- a/drivers/crypto/ccp/ccp-dev-v5.c
+++ b/drivers/crypto/ccp/ccp-dev-v5.c
@@ -469,7 +469,7 @@ static int ccp5_perform_rsa(struct ccp_op *op)
 	CCP5_CMD_PROT(&desc) = 0;
 
 	function.raw = 0;
-	CCP_RSA_SIZE(&function) = op->u.rsa.mod_size >> 3;
+	CCP_RSA_SIZE(&function) = (op->u.rsa.mod_size + 7) >> 3;
 	CCP5_CMD_FUNCTION(&desc) = function.raw;
 
 	CCP5_CMD_LEN(&desc) = op->u.rsa.input_len;
@@ -484,10 +484,10 @@ static int ccp5_perform_rsa(struct ccp_op *op)
 	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
 	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
 
-	/* Exponent is in LSB memory */
-	CCP5_CMD_KEY_LO(&desc) = op->sb_key * LSB_ITEM_SIZE;
-	CCP5_CMD_KEY_HI(&desc) = 0;
-	CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
+	/* Key (Exponent) is in external memory */
+	CCP5_CMD_KEY_LO(&desc) = ccp_addr_lo(&op->exp.u.dma);
+	CCP5_CMD_KEY_HI(&desc) = ccp_addr_hi(&op->exp.u.dma);
+	CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
 
 	return ccp5_do_cmd(&desc, op->cmd_q);
 }
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index e23d138fc1ceb..1b757531f79a0 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -1731,8 +1731,7 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 {
 	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
-	struct ccp_dm_workarea exp, src;
-	struct ccp_data dst;
+	struct ccp_dm_workarea exp, src, dst;
 	struct ccp_op op;
 	unsigned int sb_count, i_len, o_len;
 	int ret;
@@ -1743,30 +1742,40 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
 		return -EINVAL;
 
+	memset(&op, 0, sizeof(op));
+	op.cmd_q = cmd_q;
+	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+
 	/* The RSA modulus must precede the message being acted upon, so
 	 * it must be copied to a DMA area where the message and the
 	 * modulus can be concatenated.  Therefore the input buffer
 	 * length required is twice the output buffer length (which
-	 * must be a multiple of 256-bits).
+	 * must be a multiple of 256-bits).  Compute o_len, i_len in bytes.
+	 * Buffer sizes must be a multiple of 32 bytes; rounding up may be
+	 * required.
 	 */
-	o_len = ((rsa->key_size + 255) / 256) * 32;
+	o_len = 32 * ((rsa->key_size + 255) / 256);
 	i_len = o_len * 2;
 
-	sb_count = o_len / CCP_SB_BYTES;
-
-	memset(&op, 0, sizeof(op));
-	op.cmd_q = cmd_q;
-	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
-	op.sb_key = cmd_q->ccp->vdata->perform->sballoc(cmd_q, sb_count);
-
-	if (!op.sb_key)
-		return -EIO;
+	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) {
+		/* sb_count is the number of storage block slots required
+		 * for the modulus.
+		 */
+		sb_count = o_len / CCP_SB_BYTES;
+		op.sb_key = cmd_q->ccp->vdata->perform->sballoc(cmd_q,
+								sb_count);
+		if (!op.sb_key)
+			return -EIO;
+	} else {
+		/* A version 5 device allows a modulus size that will not fit
+		 * in the LSB, so the command will transfer it from memory.
+		 * Set the sb key to the default, even though it's not used.
+		 */
+		op.sb_key = cmd_q->sb_key;
+	}
 
-	/* The RSA exponent may span multiple (32-byte) SB entries and must
-	 * be in little endian format. Reverse copy each 32-byte chunk
-	 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
-	 * and each byte within that chunk and do not perform any byte swap
-	 * operations on the passthru operation.
+	/* The RSA exponent must be in little endian format. Reverse its
+	 * byte order.
 	 */
 	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
 	if (ret)
@@ -1775,11 +1784,22 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	ret = ccp_reverse_set_dm_area(&exp, 0, rsa->exp, 0, rsa->exp_len);
 	if (ret)
 		goto e_exp;
-	ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key,
-			     CCP_PASSTHRU_BYTESWAP_NOOP);
-	if (ret) {
-		cmd->engine_error = cmd_q->cmd_error;
-		goto e_exp;
+
+	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) {
+		/* Copy the exponent to the local storage block, using
+		 * as many 32-byte blocks as were allocated above. It's
+		 * already little endian, so no further change is required.
+		 */
+		ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key,
+				     CCP_PASSTHRU_BYTESWAP_NOOP);
+		if (ret) {
+			cmd->engine_error = cmd_q->cmd_error;
+			goto e_exp;
+		}
+	} else {
+		/* The exponent can be retrieved from memory via DMA. */
+		op.exp.u.dma.address = exp.dma.address;
+		op.exp.u.dma.offset = 0;
 	}
 
 	/* Concatenate the modulus and the message. Both the modulus and
@@ -1798,8 +1818,7 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 		goto e_src;
 
 	/* Prepare the output area for the operation */
-	ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len,
-			    o_len, DMA_FROM_DEVICE);
+	ret = ccp_init_dm_workarea(&dst, cmd_q, o_len, DMA_FROM_DEVICE);
 	if (ret)
 		goto e_src;
 
@@ -1807,7 +1826,7 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	op.src.u.dma.address = src.dma.address;
 	op.src.u.dma.offset = 0;
 	op.src.u.dma.length = i_len;
-	op.dst.u.dma.address = dst.dm_wa.dma.address;
+	op.dst.u.dma.address = dst.dma.address;
 	op.dst.u.dma.offset = 0;
 	op.dst.u.dma.length = o_len;
 
@@ -1820,10 +1839,10 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 		goto e_dst;
 	}
 
-	ccp_reverse_get_dm_area(&dst.dm_wa, 0, rsa->dst, 0, rsa->mod_len);
+	ccp_reverse_get_dm_area(&dst, 0, rsa->dst, 0, rsa->mod_len);
 
 e_dst:
-	ccp_free_data(&dst, cmd_q);
+	ccp_dm_free(&dst);
 
 e_src:
 	ccp_dm_free(&src);
@@ -1832,7 +1851,8 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	ccp_dm_free(&exp);
 
 e_sb:
-	cmd_q->ccp->vdata->perform->sbfree(cmd_q, op.sb_key, sb_count);
+	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0))
+		cmd_q->ccp->vdata->perform->sbfree(cmd_q, op.sb_key, sb_count);
 
 	return ret;
 }

From 333706b8ed81b64b6c4241e493791a81bc8e6d43 Mon Sep 17 00:00:00 2001
From: Gary R Hook <gary.hook@amd.com>
Date: Mon, 17 Jul 2017 15:16:21 -0500
Subject: [PATCH 050/116] crypto: Add akcipher_set_reqsize() function

Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/internal/akcipher.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/crypto/internal/akcipher.h b/include/crypto/internal/akcipher.h
index 479a0078f0f70..805686ba2be47 100644
--- a/include/crypto/internal/akcipher.h
+++ b/include/crypto/internal/akcipher.h
@@ -38,6 +38,12 @@ static inline void *akcipher_request_ctx(struct akcipher_request *req)
 	return req->__ctx;
 }
 
+static inline void akcipher_set_reqsize(struct crypto_akcipher *akcipher,
+					unsigned int reqsize)
+{
+	crypto_akcipher_alg(akcipher)->reqsize = reqsize;
+}
+
 static inline void *akcipher_tfm_ctx(struct crypto_akcipher *tfm)
 {
 	return tfm->base.__crt_ctx;

From ceeec0afd684fdfde285469df0ead6c3ab53513e Mon Sep 17 00:00:00 2001
From: Gary R Hook <gary.hook@amd.com>
Date: Mon, 17 Jul 2017 15:16:32 -0500
Subject: [PATCH 051/116] crypto: ccp - Add support for RSA on the CCP

Wire up the CCP as an RSA cipher provider.

Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/Makefile          |   1 +
 drivers/crypto/ccp/ccp-crypto-main.c |  19 ++
 drivers/crypto/ccp/ccp-crypto-rsa.c  | 296 +++++++++++++++++++++++++++
 drivers/crypto/ccp/ccp-crypto.h      |  31 +++
 4 files changed, 347 insertions(+)
 create mode 100644 drivers/crypto/ccp/ccp-crypto-rsa.c

diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile
index 5f2adc5facfe8..57f8debfcfb38 100644
--- a/drivers/crypto/ccp/Makefile
+++ b/drivers/crypto/ccp/Makefile
@@ -15,4 +15,5 @@ ccp-crypto-objs := ccp-crypto-main.o \
 		   ccp-crypto-aes-xts.o \
 		   ccp-crypto-aes-galois.o \
 		   ccp-crypto-des3.o \
+		   ccp-crypto-rsa.o \
 		   ccp-crypto-sha.o
diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c
index 78b9d26a381f7..35a9de7fd475a 100644
--- a/drivers/crypto/ccp/ccp-crypto-main.c
+++ b/drivers/crypto/ccp/ccp-crypto-main.c
@@ -17,6 +17,7 @@
 #include <linux/ccp.h>
 #include <linux/scatterlist.h>
 #include <crypto/internal/hash.h>
+#include <crypto/internal/akcipher.h>
 
 #include "ccp-crypto.h"
 
@@ -37,10 +38,15 @@ static unsigned int des3_disable;
 module_param(des3_disable, uint, 0444);
 MODULE_PARM_DESC(des3_disable, "Disable use of 3DES - any non-zero value");
 
+static unsigned int rsa_disable;
+module_param(rsa_disable, uint, 0444);
+MODULE_PARM_DESC(rsa_disable, "Disable use of RSA - any non-zero value");
+
 /* List heads for the supported algorithms */
 static LIST_HEAD(hash_algs);
 static LIST_HEAD(cipher_algs);
 static LIST_HEAD(aead_algs);
+static LIST_HEAD(akcipher_algs);
 
 /* For any tfm, requests for that tfm must be returned on the order
  * received.  With multiple queues available, the CCP can process more
@@ -358,6 +364,12 @@ static int ccp_register_algs(void)
 			return ret;
 	}
 
+	if (!rsa_disable) {
+		ret = ccp_register_rsa_algs(&akcipher_algs);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 }
 
@@ -366,6 +378,7 @@ static void ccp_unregister_algs(void)
 	struct ccp_crypto_ahash_alg *ahash_alg, *ahash_tmp;
 	struct ccp_crypto_ablkcipher_alg *ablk_alg, *ablk_tmp;
 	struct ccp_crypto_aead *aead_alg, *aead_tmp;
+	struct ccp_crypto_akcipher_alg *akc_alg, *akc_tmp;
 
 	list_for_each_entry_safe(ahash_alg, ahash_tmp, &hash_algs, entry) {
 		crypto_unregister_ahash(&ahash_alg->alg);
@@ -384,6 +397,12 @@ static void ccp_unregister_algs(void)
 		list_del(&aead_alg->entry);
 		kfree(aead_alg);
 	}
+
+	list_for_each_entry_safe(akc_alg, akc_tmp, &akcipher_algs, entry) {
+		crypto_unregister_akcipher(&akc_alg->alg);
+		list_del(&akc_alg->entry);
+		kfree(akc_alg);
+	}
 }
 
 static int ccp_crypto_init(void)
diff --git a/drivers/crypto/ccp/ccp-crypto-rsa.c b/drivers/crypto/ccp/ccp-crypto-rsa.c
new file mode 100644
index 0000000000000..d5544943f5f0c
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-crypto-rsa.c
@@ -0,0 +1,296 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) RSA crypto API support
+ *
+ * Copyright (C) 2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Gary R Hook <gary.hook@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/scatterlist.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/rsa.h>
+#include <crypto/internal/akcipher.h>
+#include <crypto/akcipher.h>
+#include <crypto/scatterwalk.h>
+
+#include "ccp-crypto.h"
+
+static inline struct akcipher_request *akcipher_request_cast(
+	struct crypto_async_request *req)
+{
+	return container_of(req, struct akcipher_request, base);
+}
+
+static inline int ccp_copy_and_save_keypart(u8 **kpbuf, unsigned int *kplen,
+					    const u8 *buf, size_t sz)
+{
+	int nskip;
+
+	for (nskip = 0; nskip < sz; nskip++)
+		if (buf[nskip])
+			break;
+	*kplen = sz - nskip;
+	*kpbuf = kzalloc(*kplen, GFP_KERNEL);
+	if (!*kpbuf)
+		return -ENOMEM;
+	memcpy(*kpbuf, buf + nskip, *kplen);
+
+	return 0;
+}
+
+static int ccp_rsa_complete(struct crypto_async_request *async_req, int ret)
+{
+	struct akcipher_request *req = akcipher_request_cast(async_req);
+	struct ccp_rsa_req_ctx *rctx = akcipher_request_ctx(req);
+
+	if (ret)
+		return ret;
+
+	req->dst_len = rctx->cmd.u.rsa.key_size >> 3;
+
+	return 0;
+}
+
+static unsigned int ccp_rsa_maxsize(struct crypto_akcipher *tfm)
+{
+	return CCP_RSA_MAXMOD;
+}
+
+static int ccp_rsa_crypt(struct akcipher_request *req, bool encrypt)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct ccp_rsa_req_ctx *rctx = akcipher_request_ctx(req);
+	int ret = 0;
+
+	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
+	INIT_LIST_HEAD(&rctx->cmd.entry);
+	rctx->cmd.engine = CCP_ENGINE_RSA;
+
+	rctx->cmd.u.rsa.key_size = ctx->u.rsa.key_len; /* in bits */
+	if (encrypt) {
+		rctx->cmd.u.rsa.exp = &ctx->u.rsa.e_sg;
+		rctx->cmd.u.rsa.exp_len = ctx->u.rsa.e_len;
+	} else {
+		rctx->cmd.u.rsa.exp = &ctx->u.rsa.d_sg;
+		rctx->cmd.u.rsa.exp_len = ctx->u.rsa.d_len;
+	}
+	rctx->cmd.u.rsa.mod = &ctx->u.rsa.n_sg;
+	rctx->cmd.u.rsa.mod_len = ctx->u.rsa.n_len;
+	rctx->cmd.u.rsa.src = req->src;
+	rctx->cmd.u.rsa.src_len = req->src_len;
+	rctx->cmd.u.rsa.dst = req->dst;
+
+	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
+
+	return ret;
+}
+
+static int ccp_rsa_encrypt(struct akcipher_request *req)
+{
+	return ccp_rsa_crypt(req, true);
+}
+
+static int ccp_rsa_decrypt(struct akcipher_request *req)
+{
+	return ccp_rsa_crypt(req, false);
+}
+
+static int ccp_check_key_length(unsigned int len)
+{
+	/* In bits */
+	if (len < 8 || len > 4096)
+		return -EINVAL;
+	return 0;
+}
+
+static void ccp_rsa_free_key_bufs(struct ccp_ctx *ctx)
+{
+	/* Clean up old key data */
+	kzfree(ctx->u.rsa.e_buf);
+	ctx->u.rsa.e_buf = NULL;
+	ctx->u.rsa.e_len = 0;
+	kzfree(ctx->u.rsa.n_buf);
+	ctx->u.rsa.n_buf = NULL;
+	ctx->u.rsa.n_len = 0;
+	kzfree(ctx->u.rsa.d_buf);
+	ctx->u.rsa.d_buf = NULL;
+	ctx->u.rsa.d_len = 0;
+}
+
+static int ccp_rsa_setkey(struct crypto_akcipher *tfm, const void *key,
+			  unsigned int keylen, bool private)
+{
+	struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct rsa_key raw_key;
+	int ret;
+
+	ccp_rsa_free_key_bufs(ctx);
+	memset(&raw_key, 0, sizeof(raw_key));
+
+	/* Code borrowed from crypto/rsa.c */
+	if (private)
+		ret = rsa_parse_priv_key(&raw_key, key, keylen);
+	else
+		ret = rsa_parse_pub_key(&raw_key, key, keylen);
+	if (ret)
+		goto n_key;
+
+	ret = ccp_copy_and_save_keypart(&ctx->u.rsa.n_buf, &ctx->u.rsa.n_len,
+					raw_key.n, raw_key.n_sz);
+	if (ret)
+		goto key_err;
+	sg_init_one(&ctx->u.rsa.n_sg, ctx->u.rsa.n_buf, ctx->u.rsa.n_len);
+
+	ctx->u.rsa.key_len = ctx->u.rsa.n_len << 3; /* convert to bits */
+	if (ccp_check_key_length(ctx->u.rsa.key_len)) {
+		ret = -EINVAL;
+		goto key_err;
+	}
+
+	ret = ccp_copy_and_save_keypart(&ctx->u.rsa.e_buf, &ctx->u.rsa.e_len,
+					raw_key.e, raw_key.e_sz);
+	if (ret)
+		goto key_err;
+	sg_init_one(&ctx->u.rsa.e_sg, ctx->u.rsa.e_buf, ctx->u.rsa.e_len);
+
+	if (private) {
+		ret = ccp_copy_and_save_keypart(&ctx->u.rsa.d_buf,
+						&ctx->u.rsa.d_len,
+						raw_key.d, raw_key.d_sz);
+		if (ret)
+			goto key_err;
+		sg_init_one(&ctx->u.rsa.d_sg,
+			    ctx->u.rsa.d_buf, ctx->u.rsa.d_len);
+	}
+
+	return 0;
+
+key_err:
+	ccp_rsa_free_key_bufs(ctx);
+
+n_key:
+	return ret;
+}
+
+static int ccp_rsa_setprivkey(struct crypto_akcipher *tfm, const void *key,
+			      unsigned int keylen)
+{
+	return ccp_rsa_setkey(tfm, key, keylen, true);
+}
+
+static int ccp_rsa_setpubkey(struct crypto_akcipher *tfm, const void *key,
+			     unsigned int keylen)
+{
+	return ccp_rsa_setkey(tfm, key, keylen, false);
+}
+
+static int ccp_rsa_init_tfm(struct crypto_akcipher *tfm)
+{
+	struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm);
+
+	akcipher_set_reqsize(tfm, sizeof(struct ccp_rsa_req_ctx));
+	ctx->complete = ccp_rsa_complete;
+
+	return 0;
+}
+
+static void ccp_rsa_exit_tfm(struct crypto_akcipher *tfm)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(&tfm->base);
+
+	ccp_rsa_free_key_bufs(ctx);
+}
+
+static struct akcipher_alg ccp_rsa_defaults = {
+	.encrypt = ccp_rsa_encrypt,
+	.decrypt = ccp_rsa_decrypt,
+	.sign = ccp_rsa_decrypt,
+	.verify = ccp_rsa_encrypt,
+	.set_pub_key = ccp_rsa_setpubkey,
+	.set_priv_key = ccp_rsa_setprivkey,
+	.max_size = ccp_rsa_maxsize,
+	.init = ccp_rsa_init_tfm,
+	.exit = ccp_rsa_exit_tfm,
+	.base = {
+		.cra_name = "rsa",
+		.cra_driver_name = "rsa-ccp",
+		.cra_priority = CCP_CRA_PRIORITY,
+		.cra_module = THIS_MODULE,
+		.cra_ctxsize = 2 * sizeof(struct ccp_ctx),
+	},
+};
+
+struct ccp_rsa_def {
+	unsigned int version;
+	const char *name;
+	const char *driver_name;
+	unsigned int reqsize;
+	struct akcipher_alg *alg_defaults;
+};
+
+static struct ccp_rsa_def rsa_algs[] = {
+	{
+		.version	= CCP_VERSION(3, 0),
+		.name		= "rsa",
+		.driver_name	= "rsa-ccp",
+		.reqsize	= sizeof(struct ccp_rsa_req_ctx),
+		.alg_defaults	= &ccp_rsa_defaults,
+	}
+};
+
+int ccp_register_rsa_alg(struct list_head *head, const struct ccp_rsa_def *def)
+{
+	struct ccp_crypto_akcipher_alg *ccp_alg;
+	struct akcipher_alg *alg;
+	int ret;
+
+	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
+	if (!ccp_alg)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&ccp_alg->entry);
+
+	alg = &ccp_alg->alg;
+	*alg = *def->alg_defaults;
+	snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
+	snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+		 def->driver_name);
+	ret = crypto_register_akcipher(alg);
+	if (ret) {
+		pr_err("%s akcipher algorithm registration error (%d)\n",
+		       alg->base.cra_name, ret);
+		kfree(ccp_alg);
+		return ret;
+	}
+
+	list_add(&ccp_alg->entry, head);
+
+	return 0;
+}
+
+int ccp_register_rsa_algs(struct list_head *head)
+{
+	int i, ret;
+	unsigned int ccpversion = ccp_version();
+
+	/* Register the RSA algorithm in standard mode
+	 * This works for CCP v3 and later
+	 */
+	for (i = 0; i < ARRAY_SIZE(rsa_algs); i++) {
+		if (rsa_algs[i].version > ccpversion)
+			continue;
+		ret = ccp_register_rsa_alg(head, &rsa_algs[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h
index 156b8233853fb..aa53b97f6f006 100644
--- a/drivers/crypto/ccp/ccp-crypto.h
+++ b/drivers/crypto/ccp/ccp-crypto.h
@@ -24,6 +24,8 @@
 #include <crypto/ctr.h>
 #include <crypto/hash.h>
 #include <crypto/sha.h>
+#include <crypto/akcipher.h>
+#include <crypto/internal/rsa.h>
 
 #define	CCP_LOG_LEVEL	KERN_INFO
 
@@ -58,6 +60,12 @@ struct ccp_crypto_ahash_alg {
 	struct ahash_alg alg;
 };
 
+struct ccp_crypto_akcipher_alg {
+	struct list_head entry;
+
+	struct akcipher_alg alg;
+};
+
 static inline struct ccp_crypto_ablkcipher_alg *
 	ccp_crypto_ablkcipher_alg(struct crypto_tfm *tfm)
 {
@@ -227,12 +235,34 @@ struct ccp_sha_exp_ctx {
 	u8 buf[MAX_SHA_BLOCK_SIZE];
 };
 
+/***** RSA related defines *****/
+
+struct ccp_rsa_ctx {
+	unsigned int key_len; /* in bits */
+	struct scatterlist e_sg;
+	u8 *e_buf;
+	unsigned int e_len;
+	struct scatterlist n_sg;
+	u8 *n_buf;
+	unsigned int n_len;
+	struct scatterlist d_sg;
+	u8 *d_buf;
+	unsigned int d_len;
+};
+
+struct ccp_rsa_req_ctx {
+	struct ccp_cmd cmd;
+};
+
+#define	CCP_RSA_MAXMOD	(4 * 1024 / 8)
+
 /***** Common Context Structure *****/
 struct ccp_ctx {
 	int (*complete)(struct crypto_async_request *req, int ret);
 
 	union {
 		struct ccp_aes_ctx aes;
+		struct ccp_rsa_ctx rsa;
 		struct ccp_sha_ctx sha;
 		struct ccp_des3_ctx des3;
 	} u;
@@ -249,5 +279,6 @@ int ccp_register_aes_xts_algs(struct list_head *head);
 int ccp_register_aes_aeads(struct list_head *head);
 int ccp_register_sha_algs(struct list_head *head);
 int ccp_register_des3_algs(struct list_head *head);
+int ccp_register_rsa_algs(struct list_head *head);
 
 #endif

From e28c190db66830c04b403b7eba7f8a5b53c22ffc Mon Sep 17 00:00:00 2001
From: Gary R Hook <gary.hook@amd.com>
Date: Mon, 17 Jul 2017 15:16:42 -0500
Subject: [PATCH 052/116] csrypto: ccp - Expand RSA support for a v5 ccp

A version 5 CCP can handle an RSA modulus up to 16k bits.

Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-crypto-rsa.c | 5 ++++-
 drivers/crypto/ccp/ccp-crypto.h     | 1 +
 drivers/crypto/ccp/ccp-dev-v3.c     | 1 +
 drivers/crypto/ccp/ccp-dev-v5.c     | 2 ++
 drivers/crypto/ccp/ccp-dev.h        | 1 +
 drivers/crypto/ccp/ccp-ops.c        | 3 ++-
 drivers/crypto/ccp/sp-dev.h         | 1 +
 7 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-crypto-rsa.c b/drivers/crypto/ccp/ccp-crypto-rsa.c
index d5544943f5f0c..e6db8672d89c4 100644
--- a/drivers/crypto/ccp/ccp-crypto-rsa.c
+++ b/drivers/crypto/ccp/ccp-crypto-rsa.c
@@ -60,7 +60,10 @@ static int ccp_rsa_complete(struct crypto_async_request *async_req, int ret)
 
 static unsigned int ccp_rsa_maxsize(struct crypto_akcipher *tfm)
 {
-	return CCP_RSA_MAXMOD;
+	if (ccp_version() > CCP_VERSION(3, 0))
+		return CCP5_RSA_MAXMOD;
+	else
+		return CCP_RSA_MAXMOD;
 }
 
 static int ccp_rsa_crypt(struct akcipher_request *req, bool encrypt)
diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h
index aa53b97f6f006..67c7620029e38 100644
--- a/drivers/crypto/ccp/ccp-crypto.h
+++ b/drivers/crypto/ccp/ccp-crypto.h
@@ -255,6 +255,7 @@ struct ccp_rsa_req_ctx {
 };
 
 #define	CCP_RSA_MAXMOD	(4 * 1024 / 8)
+#define	CCP5_RSA_MAXMOD	(16 * 1024 / 8)
 
 /***** Common Context Structure *****/
 struct ccp_ctx {
diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c
index c2861749e2ad3..240bebbcb8ac7 100644
--- a/drivers/crypto/ccp/ccp-dev-v3.c
+++ b/drivers/crypto/ccp/ccp-dev-v3.c
@@ -597,4 +597,5 @@ const struct ccp_vdata ccpv3 = {
 	.setup = NULL,
 	.perform = &ccp3_actions,
 	.offset = 0x20000,
+	.rsamax = CCP_RSA_MAX_WIDTH,
 };
diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c
index 5f9e82beadfd2..6ace6dd5a239c 100644
--- a/drivers/crypto/ccp/ccp-dev-v5.c
+++ b/drivers/crypto/ccp/ccp-dev-v5.c
@@ -1112,6 +1112,7 @@ const struct ccp_vdata ccpv5a = {
 	.setup = ccp5_config,
 	.perform = &ccp5_actions,
 	.offset = 0x0,
+	.rsamax = CCP5_RSA_MAX_WIDTH,
 };
 
 const struct ccp_vdata ccpv5b = {
@@ -1120,4 +1121,5 @@ const struct ccp_vdata ccpv5b = {
 	.setup = ccp5other_config,
 	.perform = &ccp5_actions,
 	.offset = 0x0,
+	.rsamax = CCP5_RSA_MAX_WIDTH,
 };
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index 9320931d89dae..1f6deee117a73 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -202,6 +202,7 @@
 #define CCP_SHA_SB_COUNT		1
 
 #define CCP_RSA_MAX_WIDTH		4096
+#define CCP5_RSA_MAX_WIDTH		16384
 
 #define CCP_PASSTHRU_BLOCKSIZE		256
 #define CCP_PASSTHRU_MASKSIZE		32
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index 1b757531f79a0..40c062ad87263 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -1736,7 +1736,8 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	unsigned int sb_count, i_len, o_len;
 	int ret;
 
-	if (rsa->key_size > CCP_RSA_MAX_WIDTH)
+	/* Check against the maximum allowable size, in bits */
+	if (rsa->key_size > cmd_q->ccp->vdata->rsamax)
 		return -EINVAL;
 
 	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
diff --git a/drivers/crypto/ccp/sp-dev.h b/drivers/crypto/ccp/sp-dev.h
index 3520da4e20cfa..5ab486ade1ad9 100644
--- a/drivers/crypto/ccp/sp-dev.h
+++ b/drivers/crypto/ccp/sp-dev.h
@@ -40,6 +40,7 @@ struct ccp_vdata {
 	void (*setup)(struct ccp_device *);
 	const struct ccp_actions *perform;
 	const unsigned int offset;
+	const unsigned int rsamax;
 };
 /* Structure to hold SP device data */
 struct sp_dev_vdata {

From 297b9cebd2fc020f0bd3e0aac68b0758ab84e8d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
Date: Tue, 18 Jul 2017 18:30:47 +0300
Subject: [PATCH 053/116] crypto: caam/jr - add support for DPAA2 parts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for using the caam/jr backend on DPAA2-based SoCs.
These have some particularities we have to account for:
-HW S/G format is different
-Management Complex (MC) firmware initializes / manages (partially)
the CAAM block: MCFGR, QI enablement in QICTL, RNG

Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/caamhash.c   |  7 ++-
 drivers/crypto/caam/ctrl.c       | 45 +++++++++++-------
 drivers/crypto/caam/ctrl.h       |  2 +
 drivers/crypto/caam/jr.c         |  7 ++-
 drivers/crypto/caam/regs.h       |  1 +
 drivers/crypto/caam/sg_sw_qm2.h  | 81 ++++++++++++++++++++++++++++++++
 drivers/crypto/caam/sg_sw_sec4.h | 30 ++++++++++--
 7 files changed, 148 insertions(+), 25 deletions(-)
 create mode 100644 drivers/crypto/caam/sg_sw_qm2.h

diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 910ec61cae098..698580b60b2f1 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -791,8 +791,8 @@ static int ahash_update_ctx(struct ahash_request *req)
 							 to_hash - *buflen,
 							 *next_buflen, 0);
 		} else {
-			(edesc->sec4_sg + sec4_sg_src_index - 1)->len |=
-				cpu_to_caam32(SEC4_SG_LEN_FIN);
+			sg_to_sec4_set_last(edesc->sec4_sg + sec4_sg_src_index -
+					    1);
 		}
 
 		desc = edesc->hw_desc;
@@ -882,8 +882,7 @@ static int ahash_final_ctx(struct ahash_request *req)
 	if (ret)
 		goto unmap_ctx;
 
-	(edesc->sec4_sg + sec4_sg_src_index - 1)->len |=
-		cpu_to_caam32(SEC4_SG_LEN_FIN);
+	sg_to_sec4_set_last(edesc->sec4_sg + sec4_sg_src_index - 1);
 
 	edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
 					    sec4_sg_bytes, DMA_TO_DEVICE);
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index 7338f15b86741..fdbcba13824c2 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -17,6 +17,8 @@
 
 bool caam_little_end;
 EXPORT_SYMBOL(caam_little_end);
+bool caam_dpaa2;
+EXPORT_SYMBOL(caam_dpaa2);
 
 #ifdef CONFIG_CAAM_QI
 #include "qi.h"
@@ -319,8 +321,11 @@ static int caam_remove(struct platform_device *pdev)
 		caam_qi_shutdown(ctrlpriv->qidev);
 #endif
 
-	/* De-initialize RNG state handles initialized by this driver. */
-	if (ctrlpriv->rng4_sh_init)
+	/*
+	 * De-initialize RNG state handles initialized by this driver.
+	 * In case of DPAA 2.x, RNG is managed by MC firmware.
+	 */
+	if (!caam_dpaa2 && ctrlpriv->rng4_sh_init)
 		deinstantiate_rng(ctrldev, ctrlpriv->rng4_sh_init);
 
 	/* Shut down debug views */
@@ -552,12 +557,17 @@ static int caam_probe(struct platform_device *pdev)
 
 	/*
 	 * Enable DECO watchdogs and, if this is a PHYS_ADDR_T_64BIT kernel,
-	 * long pointers in master configuration register
+	 * long pointers in master configuration register.
+	 * In case of DPAA 2.x, Management Complex firmware performs
+	 * the configuration.
 	 */
-	clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK | MCFGR_LONG_PTR,
-		      MCFGR_AWCACHE_CACH | MCFGR_AWCACHE_BUFF |
-		      MCFGR_WDENABLE | MCFGR_LARGE_BURST |
-		      (sizeof(dma_addr_t) == sizeof(u64) ? MCFGR_LONG_PTR : 0));
+	caam_dpaa2 = !!(comp_params & CTPR_MS_DPAA2);
+	if (!caam_dpaa2)
+		clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK | MCFGR_LONG_PTR,
+			      MCFGR_AWCACHE_CACH | MCFGR_AWCACHE_BUFF |
+			      MCFGR_WDENABLE | MCFGR_LARGE_BURST |
+			      (sizeof(dma_addr_t) == sizeof(u64) ?
+			       MCFGR_LONG_PTR : 0));
 
 	/*
 	 *  Read the Compile Time paramters and SCFGR to determine
@@ -586,7 +596,9 @@ static int caam_probe(struct platform_device *pdev)
 			      JRSTART_JR3_START);
 
 	if (sizeof(dma_addr_t) == sizeof(u64)) {
-		if (of_device_is_compatible(nprop, "fsl,sec-v5.0"))
+		if (caam_dpaa2)
+			ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(49));
+		else if (of_device_is_compatible(nprop, "fsl,sec-v5.0"))
 			ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40));
 		else
 			ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(36));
@@ -629,11 +641,9 @@ static int caam_probe(struct platform_device *pdev)
 			ring++;
 		}
 
-	/* Check to see if QI present. If so, enable */
-	ctrlpriv->qi_present =
-			!!(rd_reg32(&ctrl->perfmon.comp_parms_ms) &
-			   CTPR_MS_QI_MASK);
-	if (ctrlpriv->qi_present) {
+	/* Check to see if (DPAA 1.x) QI present. If so, enable */
+	ctrlpriv->qi_present = !!(comp_params & CTPR_MS_QI_MASK);
+	if (ctrlpriv->qi_present && !caam_dpaa2) {
 		ctrlpriv->qi = (struct caam_queue_if __iomem __force *)
 			       ((__force uint8_t *)ctrl +
 				 BLOCK_OFFSET * QI_BLOCK_NUMBER
@@ -661,8 +671,10 @@ static int caam_probe(struct platform_device *pdev)
 	/*
 	 * If SEC has RNG version >= 4 and RNG state handle has not been
 	 * already instantiated, do RNG instantiation
+	 * In case of DPAA 2.x, RNG is managed by MC firmware.
 	 */
-	if ((cha_vid_ls & CHA_ID_LS_RNG_MASK) >> CHA_ID_LS_RNG_SHIFT >= 4) {
+	if (!caam_dpaa2 &&
+	    (cha_vid_ls & CHA_ID_LS_RNG_MASK) >> CHA_ID_LS_RNG_SHIFT >= 4) {
 		ctrlpriv->rng4_sh_init =
 			rd_reg32(&ctrl->r4tst[0].rdsta);
 		/*
@@ -730,8 +742,9 @@ static int caam_probe(struct platform_device *pdev)
 	/* Report "alive" for developer to see */
 	dev_info(dev, "device ID = 0x%016llx (Era %d)\n", caam_id,
 		 caam_get_era());
-	dev_info(dev, "job rings = %d, qi = %d\n",
-		 ctrlpriv->total_jobrs, ctrlpriv->qi_present);
+	dev_info(dev, "job rings = %d, qi = %d, dpaa2 = %s\n",
+		 ctrlpriv->total_jobrs, ctrlpriv->qi_present,
+		 caam_dpaa2 ? "yes" : "no");
 
 #ifdef CONFIG_DEBUG_FS
 
diff --git a/drivers/crypto/caam/ctrl.h b/drivers/crypto/caam/ctrl.h
index cac5402a46eba..7e7bf68c9ef5e 100644
--- a/drivers/crypto/caam/ctrl.h
+++ b/drivers/crypto/caam/ctrl.h
@@ -10,4 +10,6 @@
 /* Prototypes for backend-level services exposed to APIs */
 int caam_get_era(void);
 
+extern bool caam_dpaa2;
+
 #endif /* CTRL_H */
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index 1ccfb317d468d..d258953ff4883 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -9,6 +9,7 @@
 #include <linux/of_address.h>
 
 #include "compat.h"
+#include "ctrl.h"
 #include "regs.h"
 #include "jr.h"
 #include "desc.h"
@@ -499,7 +500,11 @@ static int caam_jr_probe(struct platform_device *pdev)
 	jrpriv->rregs = (struct caam_job_ring __iomem __force *)ctrl;
 
 	if (sizeof(dma_addr_t) == sizeof(u64)) {
-		if (of_device_is_compatible(nprop, "fsl,sec-v5.0-job-ring"))
+		if (caam_dpaa2)
+			error = dma_set_mask_and_coherent(jrdev,
+							  DMA_BIT_MASK(49));
+		else if (of_device_is_compatible(nprop,
+						 "fsl,sec-v5.0-job-ring"))
 			error = dma_set_mask_and_coherent(jrdev,
 							  DMA_BIT_MASK(40));
 		else
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index 84d2f838a063c..2b5efff9ec3ca 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -293,6 +293,7 @@ struct caam_perfmon {
 	u32 cha_rev_ls;		/* CRNR - CHA Rev No. Least significant half*/
 #define CTPR_MS_QI_SHIFT	25
 #define CTPR_MS_QI_MASK		(0x1ull << CTPR_MS_QI_SHIFT)
+#define CTPR_MS_DPAA2		BIT(13)
 #define CTPR_MS_VIRT_EN_INCL	0x00000001
 #define CTPR_MS_VIRT_EN_POR	0x00000002
 #define CTPR_MS_PG_SZ_MASK	0x10
diff --git a/drivers/crypto/caam/sg_sw_qm2.h b/drivers/crypto/caam/sg_sw_qm2.h
new file mode 100644
index 0000000000000..31b4407571460
--- /dev/null
+++ b/drivers/crypto/caam/sg_sw_qm2.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the names of the above-listed copyright holders nor the
+ *	 names of any contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SG_SW_QM2_H_
+#define _SG_SW_QM2_H_
+
+#include "../../../drivers/staging/fsl-mc/include/dpaa2-fd.h"
+
+static inline void dma_to_qm_sg_one(struct dpaa2_sg_entry *qm_sg_ptr,
+				    dma_addr_t dma, u32 len, u16 offset)
+{
+	dpaa2_sg_set_addr(qm_sg_ptr, dma);
+	dpaa2_sg_set_format(qm_sg_ptr, dpaa2_sg_single);
+	dpaa2_sg_set_final(qm_sg_ptr, false);
+	dpaa2_sg_set_len(qm_sg_ptr, len);
+	dpaa2_sg_set_bpid(qm_sg_ptr, 0);
+	dpaa2_sg_set_offset(qm_sg_ptr, offset);
+}
+
+/*
+ * convert scatterlist to h/w link table format
+ * but does not have final bit; instead, returns last entry
+ */
+static inline struct dpaa2_sg_entry *
+sg_to_qm_sg(struct scatterlist *sg, int sg_count,
+	    struct dpaa2_sg_entry *qm_sg_ptr, u16 offset)
+{
+	while (sg_count && sg) {
+		dma_to_qm_sg_one(qm_sg_ptr, sg_dma_address(sg),
+				 sg_dma_len(sg), offset);
+		qm_sg_ptr++;
+		sg = sg_next(sg);
+		sg_count--;
+	}
+	return qm_sg_ptr - 1;
+}
+
+/*
+ * convert scatterlist to h/w link table format
+ * scatterlist must have been previously dma mapped
+ */
+static inline void sg_to_qm_sg_last(struct scatterlist *sg, int sg_count,
+				    struct dpaa2_sg_entry *qm_sg_ptr,
+				    u16 offset)
+{
+	qm_sg_ptr = sg_to_qm_sg(sg, sg_count, qm_sg_ptr, offset);
+	dpaa2_sg_set_final(qm_sg_ptr, true);
+}
+
+#endif /* _SG_SW_QM2_H_ */
diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h
index 2f6bf162bb6c8..936b1b6300581 100644
--- a/drivers/crypto/caam/sg_sw_sec4.h
+++ b/drivers/crypto/caam/sg_sw_sec4.h
@@ -5,7 +5,13 @@
  *
  */
 
+#ifndef _SG_SW_SEC4_H_
+#define _SG_SW_SEC4_H_
+
+#include "ctrl.h"
 #include "regs.h"
+#include "sg_sw_qm2.h"
+#include "../../../drivers/staging/fsl-mc/include/dpaa2-fd.h"
 
 struct sec4_sg_entry {
 	u64 ptr;
@@ -19,9 +25,15 @@ struct sec4_sg_entry {
 static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr,
 				      dma_addr_t dma, u32 len, u16 offset)
 {
-	sec4_sg_ptr->ptr = cpu_to_caam_dma64(dma);
-	sec4_sg_ptr->len = cpu_to_caam32(len);
-	sec4_sg_ptr->bpid_offset = cpu_to_caam32(offset & SEC4_SG_OFFSET_MASK);
+	if (caam_dpaa2) {
+		dma_to_qm_sg_one((struct dpaa2_sg_entry *)sec4_sg_ptr, dma, len,
+				 offset);
+	} else {
+		sec4_sg_ptr->ptr = cpu_to_caam_dma64(dma);
+		sec4_sg_ptr->len = cpu_to_caam32(len);
+		sec4_sg_ptr->bpid_offset = cpu_to_caam32(offset &
+							 SEC4_SG_OFFSET_MASK);
+	}
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "sec4_sg_ptr@: ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, sec4_sg_ptr,
@@ -47,6 +59,14 @@ sg_to_sec4_sg(struct scatterlist *sg, int sg_count,
 	return sec4_sg_ptr - 1;
 }
 
+static inline void sg_to_sec4_set_last(struct sec4_sg_entry *sec4_sg_ptr)
+{
+	if (caam_dpaa2)
+		dpaa2_sg_set_final((struct dpaa2_sg_entry *)sec4_sg_ptr, true);
+	else
+		sec4_sg_ptr->len |= cpu_to_caam32(SEC4_SG_LEN_FIN);
+}
+
 /*
  * convert scatterlist to h/w link table format
  * scatterlist must have been previously dma mapped
@@ -56,5 +76,7 @@ static inline void sg_to_sec4_sg_last(struct scatterlist *sg, int sg_count,
 				      u16 offset)
 {
 	sec4_sg_ptr = sg_to_sec4_sg(sg, sg_count, sec4_sg_ptr, offset);
-	sec4_sg_ptr->len |= cpu_to_caam32(SEC4_SG_LEN_FIN);
+	sg_to_sec4_set_last(sec4_sg_ptr);
 }
+
+#endif /* _SG_SW_SEC4_H_ */

From b2e870f22284d2e69602c34027ec7bd5edeae419 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 18 Jul 2017 16:42:56 -0500
Subject: [PATCH 054/116] crypto: n2 - Convert to using %pOF instead of
 full_name

Now that we have a custom printf format specifier, convert users of
full_name to use %pOF instead. This is preparation to remove storing
of the full path string for each node.

Signed-off-by: Rob Herring <robh@kernel.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: linux-crypto@vger.kernel.org
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/n2_core.c | 60 +++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 32 deletions(-)

diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index 269451375b637..a9fd8b9e86cde 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -1730,8 +1730,8 @@ static int spu_mdesc_walk_arcs(struct mdesc_handle *mdesc,
 			continue;
 		id = mdesc_get_property(mdesc, tgt, "id", NULL);
 		if (table[*id] != NULL) {
-			dev_err(&dev->dev, "%s: SPU cpu slot already set.\n",
-				dev->dev.of_node->full_name);
+			dev_err(&dev->dev, "%pOF: SPU cpu slot already set.\n",
+				dev->dev.of_node);
 			return -EINVAL;
 		}
 		cpumask_set_cpu(*id, &p->sharing);
@@ -1751,8 +1751,8 @@ static int handle_exec_unit(struct spu_mdesc_info *ip, struct list_head *list,
 
 	p = kzalloc(sizeof(struct spu_queue), GFP_KERNEL);
 	if (!p) {
-		dev_err(&dev->dev, "%s: Could not allocate SPU queue.\n",
-			dev->dev.of_node->full_name);
+		dev_err(&dev->dev, "%pOF: Could not allocate SPU queue.\n",
+			dev->dev.of_node);
 		return -ENOMEM;
 	}
 
@@ -1981,41 +1981,39 @@ static void n2_spu_driver_version(void)
 static int n2_crypto_probe(struct platform_device *dev)
 {
 	struct mdesc_handle *mdesc;
-	const char *full_name;
 	struct n2_crypto *np;
 	int err;
 
 	n2_spu_driver_version();
 
-	full_name = dev->dev.of_node->full_name;
-	pr_info("Found N2CP at %s\n", full_name);
+	pr_info("Found N2CP at %pOF\n", dev->dev.of_node);
 
 	np = alloc_n2cp();
 	if (!np) {
-		dev_err(&dev->dev, "%s: Unable to allocate n2cp.\n",
-			full_name);
+		dev_err(&dev->dev, "%pOF: Unable to allocate n2cp.\n",
+			dev->dev.of_node);
 		return -ENOMEM;
 	}
 
 	err = grab_global_resources();
 	if (err) {
-		dev_err(&dev->dev, "%s: Unable to grab "
-			"global resources.\n", full_name);
+		dev_err(&dev->dev, "%pOF: Unable to grab global resources.\n",
+			dev->dev.of_node);
 		goto out_free_n2cp;
 	}
 
 	mdesc = mdesc_grab();
 
 	if (!mdesc) {
-		dev_err(&dev->dev, "%s: Unable to grab MDESC.\n",
-			full_name);
+		dev_err(&dev->dev, "%pOF: Unable to grab MDESC.\n",
+			dev->dev.of_node);
 		err = -ENODEV;
 		goto out_free_global;
 	}
 	err = grab_mdesc_irq_props(mdesc, dev, &np->cwq_info, "n2cp");
 	if (err) {
-		dev_err(&dev->dev, "%s: Unable to grab IRQ props.\n",
-			full_name);
+		dev_err(&dev->dev, "%pOF: Unable to grab IRQ props.\n",
+			dev->dev.of_node);
 		mdesc_release(mdesc);
 		goto out_free_global;
 	}
@@ -2026,15 +2024,15 @@ static int n2_crypto_probe(struct platform_device *dev)
 	mdesc_release(mdesc);
 
 	if (err) {
-		dev_err(&dev->dev, "%s: CWQ MDESC scan failed.\n",
-			full_name);
+		dev_err(&dev->dev, "%pOF: CWQ MDESC scan failed.\n",
+			dev->dev.of_node);
 		goto out_free_global;
 	}
 
 	err = n2_register_algs();
 	if (err) {
-		dev_err(&dev->dev, "%s: Unable to register algorithms.\n",
-			full_name);
+		dev_err(&dev->dev, "%pOF: Unable to register algorithms.\n",
+			dev->dev.of_node);
 		goto out_free_spu_list;
 	}
 
@@ -2092,42 +2090,40 @@ static void free_ncp(struct n2_mau *mp)
 static int n2_mau_probe(struct platform_device *dev)
 {
 	struct mdesc_handle *mdesc;
-	const char *full_name;
 	struct n2_mau *mp;
 	int err;
 
 	n2_spu_driver_version();
 
-	full_name = dev->dev.of_node->full_name;
-	pr_info("Found NCP at %s\n", full_name);
+	pr_info("Found NCP at %pOF\n", dev->dev.of_node);
 
 	mp = alloc_ncp();
 	if (!mp) {
-		dev_err(&dev->dev, "%s: Unable to allocate ncp.\n",
-			full_name);
+		dev_err(&dev->dev, "%pOF: Unable to allocate ncp.\n",
+			dev->dev.of_node);
 		return -ENOMEM;
 	}
 
 	err = grab_global_resources();
 	if (err) {
-		dev_err(&dev->dev, "%s: Unable to grab "
-			"global resources.\n", full_name);
+		dev_err(&dev->dev, "%pOF: Unable to grab global resources.\n",
+			dev->dev.of_node);
 		goto out_free_ncp;
 	}
 
 	mdesc = mdesc_grab();
 
 	if (!mdesc) {
-		dev_err(&dev->dev, "%s: Unable to grab MDESC.\n",
-			full_name);
+		dev_err(&dev->dev, "%pOF: Unable to grab MDESC.\n",
+			dev->dev.of_node);
 		err = -ENODEV;
 		goto out_free_global;
 	}
 
 	err = grab_mdesc_irq_props(mdesc, dev, &mp->mau_info, "ncp");
 	if (err) {
-		dev_err(&dev->dev, "%s: Unable to grab IRQ props.\n",
-			full_name);
+		dev_err(&dev->dev, "%pOF: Unable to grab IRQ props.\n",
+			dev->dev.of_node);
 		mdesc_release(mdesc);
 		goto out_free_global;
 	}
@@ -2138,8 +2134,8 @@ static int n2_mau_probe(struct platform_device *dev)
 	mdesc_release(mdesc);
 
 	if (err) {
-		dev_err(&dev->dev, "%s: MAU MDESC scan failed.\n",
-			full_name);
+		dev_err(&dev->dev, "%pOF: MAU MDESC scan failed.\n",
+			dev->dev.of_node);
 		goto out_free_global;
 	}
 

From 0588d8500b6e7a94242c442d24ad9a55d8ad8f34 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Tue, 18 Jul 2017 18:03:11 -0500
Subject: [PATCH 055/116] crypto: omap-sham - remove unnecessary static in
 omap_sham_remove()

Remove unnecessary static on local variable dd. Such variable
is initialized before being used, on every execution path throughout
the function. The static has no benefit and, removing it reduces the
object file size.

This issue was detected using Coccinelle and the following semantic patch:
https://github.com/GustavoARSilva/coccinelle/blob/master/static/static_unused.cocci

In the following log you can see a difference in the object file size.
This log is the output of the size command, before and after the code
change:

before:
   text    data     bss     dec     hex filename
  26135   11944     128   38207    953f drivers/crypto/omap-sham.o

after:
   text    data     bss     dec     hex filename
  26084   11856      64   38004    9474 drivers/crypto/omap-sham.o

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/omap-sham.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index 9ad9d399daf11..c40ac30ec0026 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -2133,7 +2133,7 @@ static int omap_sham_probe(struct platform_device *pdev)
 
 static int omap_sham_remove(struct platform_device *pdev)
 {
-	static struct omap_sham_dev *dd;
+	struct omap_sham_dev *dd;
 	int i, j;
 
 	dd = platform_get_drvdata(pdev);

From 22d96f04bb7498549ae7c130f03d7ea327744c5a Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Tue, 18 Jul 2017 18:04:45 -0500
Subject: [PATCH 056/116] crypto: atmel-sha - remove unnecessary static in
 atmel_sha_remove()

Remove unnecessary static on local variable sha_dd. Such variable
is initialized before being used, on every execution path throughout
the function. The static has no benefit and, removing it reduces the
object file size.

This issue was detected using Coccinelle and the following semantic patch:
https://github.com/GustavoARSilva/coccinelle/blob/master/static/static_unused.cocci

In the following log you can see a significant difference in the object
file size. This log is the output of the size command, before and after
the code change:

before:
   text    data     bss     dec     hex filename
  30005   10264     128   40397    9dcd drivers/crypto/atmel-sha.o

after:
   text    data     bss     dec     hex filename
  29934   10208      64   40206    9d0e drivers/crypto/atmel-sha.o

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/atmel-sha.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index dad4e5bad827f..3e2f41b3eaf3a 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -2883,7 +2883,7 @@ static int atmel_sha_probe(struct platform_device *pdev)
 
 static int atmel_sha_remove(struct platform_device *pdev)
 {
-	static struct atmel_sha_dev *sha_dd;
+	struct atmel_sha_dev *sha_dd;
 
 	sha_dd = platform_get_drvdata(pdev);
 	if (!sha_dd)

From 47f1241ec320302edb77d117302b1990178b449b Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Tue, 18 Jul 2017 18:05:50 -0500
Subject: [PATCH 057/116] crypto: atmel-tdes - remove unnecessary static in
 atmel_tdes_remove()

Remove unnecessary static on local variable tdes_dd. Such variable
is initialized before being used, on every execution path throughout
the function. The static has no benefit and, removing it reduces the
object file size.

This issue was detected using Coccinelle and the following semantic patch:
https://github.com/GustavoARSilva/coccinelle/blob/master/static/static_unused.cocci

In the following log you can see a significant difference in the object
file size. This log is the output of the size command, before and after
the code change:

before:
   text    data     bss     dec     hex filename
  17079    8704     128   25911    6537 drivers/crypto/atmel-tdes.o

after:
   text    data     bss     dec     hex filename
  17039    8616      64   25719    6477 drivers/crypto/atmel-tdes.o

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/atmel-tdes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c
index b25f1b3c981f7..f4b335dda5687 100644
--- a/drivers/crypto/atmel-tdes.c
+++ b/drivers/crypto/atmel-tdes.c
@@ -1487,7 +1487,7 @@ static int atmel_tdes_probe(struct platform_device *pdev)
 
 static int atmel_tdes_remove(struct platform_device *pdev)
 {
-	static struct atmel_tdes_dev *tdes_dd;
+	struct atmel_tdes_dev *tdes_dd;
 
 	tdes_dd = platform_get_drvdata(pdev);
 	if (!tdes_dd)

From 1ce5b2f348c0e016d6195ffb974f7fb98785c90a Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Tue, 18 Jul 2017 18:07:12 -0500
Subject: [PATCH 058/116] crypto: img-hash - remove unnecessary static in
 img_hash_remove()

Remove unnecessary static on local variable hdev. Such variable
is initialized before being used, on every execution path throughout
the function. The static has no benefit and, removing it reduces the
object file size.

This issue was detected using Coccinelle and the following semantic patch:
https://github.com/GustavoARSilva/coccinelle/blob/master/static/static_unused.cocci

In the following log you can see a significant difference in the object
file size. This log is the output of the size command, before and after
the code change:

before:
   text    data     bss     dec     hex filename
  14842    6464     128   21434    53ba drivers/crypto/img-hash.o

after:
   text    data     bss     dec     hex filename
  14789    6376      64   21229    52ed drivers/crypto/img-hash.o

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/img-hash.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c
index 0c6a917a9ab80..b87000a0a01cb 100644
--- a/drivers/crypto/img-hash.c
+++ b/drivers/crypto/img-hash.c
@@ -1054,7 +1054,7 @@ static int img_hash_probe(struct platform_device *pdev)
 
 static int img_hash_remove(struct platform_device *pdev)
 {
-	static struct img_hash_dev *hdev;
+	struct img_hash_dev *hdev;
 
 	hdev = platform_get_drvdata(pdev);
 	spin_lock(&img_hash.lock);

From 0138d32fed6b30ba1f67be43c87b14b7f2acc1c2 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 19 Jul 2017 10:24:15 +0100
Subject: [PATCH 059/116] Crypto: atmel-ecc: Make a couple of local functions
 static

Functions atmel_ecc_i2c_client_alloc and atmel_ecc_i2c_client_free are
local to the source and no not need to be in the global scope. Make
them static.

Cleans up sparse warnings:
symbol 'atmel_ecc_i2c_client_alloc' was not declared. Should it be static?
symbol 'atmel_ecc_i2c_client_free' was not declared. Should it be static?

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/atmel-ecc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c
index 66ab1021eba58..c6e8de2b28dda 100644
--- a/drivers/crypto/atmel-ecc.c
+++ b/drivers/crypto/atmel-ecc.c
@@ -492,7 +492,7 @@ static int atmel_ecdh_compute_shared_secret(struct kpp_request *req)
 	return ret;
 }
 
-struct i2c_client *atmel_ecc_i2c_client_alloc(void)
+static struct i2c_client *atmel_ecc_i2c_client_alloc(void)
 {
 	struct atmel_ecc_i2c_client_priv *i2c_priv, *min_i2c_priv = NULL;
 	struct i2c_client *client = ERR_PTR(-ENODEV);
@@ -527,7 +527,7 @@ struct i2c_client *atmel_ecc_i2c_client_alloc(void)
 	return client;
 }
 
-void atmel_ecc_i2c_client_free(struct i2c_client *client)
+static void atmel_ecc_i2c_client_free(struct i2c_client *client)
 {
 	struct atmel_ecc_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
 

From b66ad0b7aa92e62dc65b56f28cfeb93dbeb82f43 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
Date: Wed, 19 Jul 2017 19:40:32 +0300
Subject: [PATCH 060/116] crypto: tcrypt - remove AES-XTS-192 speed tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove xts(aes) speed tests with 2 x 192-bit keys, since implementations
adhering strictly to IEEE 1619-2007 standard cannot cope with key sizes
other than 2 x 128, 2 x 256 bits - i.e. AES-XTS-{128,256}:
[...]
tcrypt: test 5 (384 bit key, 16 byte blocks):
caam_jr 8020000.jr: key size mismatch
tcrypt: setkey() failed flags=200000
[...]

Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/tcrypt.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 0dd6a432d6ca9..0022a18d36eeb 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -1404,9 +1404,9 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 		test_cipher_speed("lrw(aes)", DECRYPT, sec, NULL, 0,
 				speed_template_32_40_48);
 		test_cipher_speed("xts(aes)", ENCRYPT, sec, NULL, 0,
-				speed_template_32_48_64);
+				speed_template_32_64);
 		test_cipher_speed("xts(aes)", DECRYPT, sec, NULL, 0,
-				speed_template_32_48_64);
+				speed_template_32_64);
 		test_cipher_speed("cts(cbc(aes))", ENCRYPT, sec, NULL, 0,
 				speed_template_16_24_32);
 		test_cipher_speed("cts(cbc(aes))", DECRYPT, sec, NULL, 0,
@@ -1837,9 +1837,9 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 		test_acipher_speed("lrw(aes)", DECRYPT, sec, NULL, 0,
 				   speed_template_32_40_48);
 		test_acipher_speed("xts(aes)", ENCRYPT, sec, NULL, 0,
-				   speed_template_32_48_64);
+				   speed_template_32_64);
 		test_acipher_speed("xts(aes)", DECRYPT, sec, NULL, 0,
-				   speed_template_32_48_64);
+				   speed_template_32_64);
 		test_acipher_speed("cts(cbc(aes))", ENCRYPT, sec, NULL, 0,
 				   speed_template_16_24_32);
 		test_acipher_speed("cts(cbc(aes))", DECRYPT, sec, NULL, 0,

From 0876d16124d41b78f7fdbd96ad42b1d7b13fe620 Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Wed, 19 Jul 2017 22:29:08 -0500
Subject: [PATCH 061/116] crypto: ccp - remove duplicate module version and
 author entry

commit 720419f01832 ("crypto: ccp - Introduce the AMD Secure Processor device")
moved the module registeration from ccp-dev.c to sp-dev.c but patch missed
removing the module version and author entry from ccp-dev.c.

It causes the below warning during boot when CONFIG_CRYPTO_DEV_SP_CCP=y
and CONFIG_CRYPTO_DEV_CCP_CRYPTO=y is set.

[ 0.187825] sysfs: cannot create duplicate filename '/module/ccp/version'
[ 0.187825] sysfs: cannot create duplicate filename '/module/ccp/version'

Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Gary R Hook <gary.hook@amd.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-dev.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
index 52eb3ed25a267..4e029b1766416 100644
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -11,7 +11,6 @@
  * published by the Free Software Foundation.
  */
 
-#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/kthread.h>
 #include <linux/sched.h>
@@ -30,12 +29,6 @@
 
 #include "ccp-dev.h"
 
-MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>");
-MODULE_AUTHOR("Gary R Hook <gary.hook@amd.com>");
-MODULE_LICENSE("GPL");
-MODULE_VERSION("1.1.0");
-MODULE_DESCRIPTION("AMD Cryptographic Coprocessor driver");
-
 struct ccp_tasklet_data {
 	struct completion completion;
 	struct ccp_cmd *cmd;

From 952035baedb15d956d5f25b34287d728c751cac9 Mon Sep 17 00:00:00 2001
From: Tudor-Dan Ambarus <tudor.ambarus@microchip.com>
Date: Thu, 20 Jul 2017 10:37:39 +0300
Subject: [PATCH 062/116] crypto: ecdh - fix concurrency on shared secret and
 pubkey

ecdh_ctx contained static allocated data for the shared secret
and public key.

The shared secret and the public key were doomed to concurrency
issues because they could be shared by multiple crypto requests.

The concurrency is fixed by replacing per-tfm shared secret and
public key with per-request dynamically allocated shared secret
and public key.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/ecdh.c | 51 +++++++++++++++++++++++++++++++++------------------
 1 file changed, 33 insertions(+), 18 deletions(-)

diff --git a/crypto/ecdh.c b/crypto/ecdh.c
index 61c7708905d01..4271fc77d2616 100644
--- a/crypto/ecdh.c
+++ b/crypto/ecdh.c
@@ -20,8 +20,6 @@ struct ecdh_ctx {
 	unsigned int curve_id;
 	unsigned int ndigits;
 	u64 private_key[ECC_MAX_DIGITS];
-	u64 public_key[2 * ECC_MAX_DIGITS];
-	u64 shared_secret[ECC_MAX_DIGITS];
 };
 
 static inline struct ecdh_ctx *ecdh_get_ctx(struct crypto_kpp *tfm)
@@ -70,41 +68,58 @@ static int ecdh_set_secret(struct crypto_kpp *tfm, const void *buf,
 
 static int ecdh_compute_value(struct kpp_request *req)
 {
-	int ret = 0;
 	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
 	struct ecdh_ctx *ctx = ecdh_get_ctx(tfm);
-	size_t copied, nbytes;
+	u64 *public_key;
+	u64 *shared_secret = NULL;
 	void *buf;
+	size_t copied, nbytes, public_key_sz;
+	int ret = -ENOMEM;
 
 	nbytes = ctx->ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+	/* Public part is a point thus it has both coordinates */
+	public_key_sz = 2 * nbytes;
+
+	public_key = kmalloc(public_key_sz, GFP_KERNEL);
+	if (!public_key)
+		return -ENOMEM;
 
 	if (req->src) {
-		copied = sg_copy_to_buffer(req->src, 1, ctx->public_key,
-					   2 * nbytes);
-		if (copied != 2 * nbytes)
-			return -EINVAL;
+		shared_secret = kmalloc(nbytes, GFP_KERNEL);
+		if (!shared_secret)
+			goto free_pubkey;
+
+		copied = sg_copy_to_buffer(req->src, 1, public_key,
+					   public_key_sz);
+		if (copied != public_key_sz) {
+			ret = -EINVAL;
+			goto free_all;
+		}
 
 		ret = crypto_ecdh_shared_secret(ctx->curve_id, ctx->ndigits,
-						ctx->private_key,
-						ctx->public_key,
-						ctx->shared_secret);
+						ctx->private_key, public_key,
+						shared_secret);
 
-		buf = ctx->shared_secret;
+		buf = shared_secret;
 	} else {
 		ret = ecc_make_pub_key(ctx->curve_id, ctx->ndigits,
-				       ctx->private_key, ctx->public_key);
-		buf = ctx->public_key;
-		/* Public part is a point thus it has both coordinates */
-		nbytes *= 2;
+				       ctx->private_key, public_key);
+		buf = public_key;
+		nbytes = public_key_sz;
 	}
 
 	if (ret < 0)
-		return ret;
+		goto free_all;
 
 	copied = sg_copy_from_buffer(req->dst, 1, buf, nbytes);
 	if (copied != nbytes)
-		return -EINVAL;
+		ret = -EINVAL;
 
+	/* fall through */
+free_all:
+	kzfree(shared_secret);
+free_pubkey:
+	kfree(public_key);
 	return ret;
 }
 

From 6d2bce6a15753710836f99054826d5ae638dd04a Mon Sep 17 00:00:00 2001
From: Tudor-Dan Ambarus <tudor.ambarus@microchip.com>
Date: Thu, 20 Jul 2017 16:35:49 +0300
Subject: [PATCH 063/116] crypto: atmel-ecc - fix signed integer to u8
 assignment

static checker warning:
        drivers/crypto/atmel-ecc.c:281 atmel_ecdh_done()
        warn: assigning (-22) to unsigned variable 'status'

Similar warning can be raised in atmel_ecc_work_handler()
when atmel_ecc_send_receive() returns an error. Fix this too.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/atmel-ecc.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c
index c6e8de2b28dda..e66f18a0ddd03 100644
--- a/drivers/crypto/atmel-ecc.c
+++ b/drivers/crypto/atmel-ecc.c
@@ -99,7 +99,7 @@ struct atmel_ecdh_ctx {
  *        @work_data: data structure representing the work
  *        @areq     : optional pointer to an argument passed with the original
  *                    request.
- *        @status   : status returned from the device.
+ *        @status   : status returned from the i2c client device or i2c error.
  * @areq: optional pointer to a user argument for use at callback time.
  * @work: describes the task to be executed.
  * @cmd : structure used for communicating with the device.
@@ -107,7 +107,7 @@ struct atmel_ecdh_ctx {
 struct atmel_ecc_work_data {
 	struct atmel_ecdh_ctx *ctx;
 	void (*cbk)(struct atmel_ecc_work_data *work_data, void *areq,
-		    u8 status);
+		    int status);
 	void *areq;
 	struct work_struct work;
 	struct atmel_ecc_cmd cmd;
@@ -263,7 +263,7 @@ static int atmel_ecc_sleep(struct i2c_client *client)
 }
 
 static void atmel_ecdh_done(struct atmel_ecc_work_data *work_data, void *areq,
-			    u8 status)
+			    int status)
 {
 	struct kpp_request *req = areq;
 	struct atmel_ecdh_ctx *ctx = work_data->ctx;
@@ -344,7 +344,7 @@ static void atmel_ecc_work_handler(struct work_struct *work)
 			container_of(work, struct atmel_ecc_work_data, work);
 	struct atmel_ecc_cmd *cmd = &work_data->cmd;
 	struct i2c_client *client = work_data->ctx->client;
-	u8 status;
+	int status;
 
 	status = atmel_ecc_send_receive(client, cmd);
 	work_data->cbk(work_data, work_data->areq, status);
@@ -352,7 +352,7 @@ static void atmel_ecc_work_handler(struct work_struct *work)
 
 static void atmel_ecc_enqueue(struct atmel_ecc_work_data *work_data,
 			      void (*cbk)(struct atmel_ecc_work_data *work_data,
-					  void *areq, u8 status),
+					  void *areq, int status),
 			      void *areq)
 {
 	work_data->cbk = (void *)cbk;

From 9166c44358346c0a92b11fd4e24925efff791648 Mon Sep 17 00:00:00 2001
From: raveendra padasalagi <raveendra.padasalagi@broadcom.com>
Date: Fri, 21 Jul 2017 11:17:39 +0530
Subject: [PATCH 064/116] crypto: brcm - Support more FlexRM rings than SPU
 engines.

Enhance code to generically support cases where DMA rings
are greater than or equal to number of SPU engines.
New hardware has underlying DMA engine-FlexRM with 32 rings
which can be used to communicate to any of the available
10 SPU engines.

Signed-off-by: Raveendra Padasalagi <raveendra.padasalagi@broadcom.com>
Reviewed-by: Scott Branden <scott.branden@broadcom.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/bcm/cipher.c | 109 ++++++++++++++++--------------------
 drivers/crypto/bcm/cipher.h |  13 +++--
 2 files changed, 56 insertions(+), 66 deletions(-)

diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c
index f1a826f97fec6..8685c7e4debdc 100644
--- a/drivers/crypto/bcm/cipher.c
+++ b/drivers/crypto/bcm/cipher.c
@@ -90,8 +90,6 @@ static int aead_pri = 150;
 module_param(aead_pri, int, 0644);
 MODULE_PARM_DESC(aead_pri, "Priority for AEAD algos");
 
-#define MAX_SPUS 16
-
 /* A type 3 BCM header, expected to precede the SPU header for SPU-M.
  * Bits 3 and 4 in the first byte encode the channel number (the dma ringset).
  * 0x60 - ring 0
@@ -120,7 +118,7 @@ static u8 select_channel(void)
 {
 	u8 chan_idx = atomic_inc_return(&iproc_priv.next_chan);
 
-	return chan_idx % iproc_priv.spu.num_spu;
+	return chan_idx % iproc_priv.spu.num_chan;
 }
 
 /**
@@ -4528,8 +4526,13 @@ static void spu_functions_register(struct device *dev,
  */
 static int spu_mb_init(struct device *dev)
 {
-	struct mbox_client *mcl = &iproc_priv.mcl[iproc_priv.spu.num_spu];
-	int err;
+	struct mbox_client *mcl = &iproc_priv.mcl;
+	int err, i;
+
+	iproc_priv.mbox = devm_kcalloc(dev, iproc_priv.spu.num_chan,
+				  sizeof(struct mbox_chan *), GFP_KERNEL);
+	if (!iproc_priv.mbox)
+		return -ENOMEM;
 
 	mcl->dev = dev;
 	mcl->tx_block = false;
@@ -4538,25 +4541,33 @@ static int spu_mb_init(struct device *dev)
 	mcl->rx_callback = spu_rx_callback;
 	mcl->tx_done = NULL;
 
-	iproc_priv.mbox[iproc_priv.spu.num_spu] =
-			mbox_request_channel(mcl, 0);
-	if (IS_ERR(iproc_priv.mbox[iproc_priv.spu.num_spu])) {
-		err = (int)PTR_ERR(iproc_priv.mbox[iproc_priv.spu.num_spu]);
-		dev_err(dev,
-			"Mbox channel %d request failed with err %d",
-			iproc_priv.spu.num_spu, err);
-		iproc_priv.mbox[iproc_priv.spu.num_spu] = NULL;
-		return err;
+	for (i = 0; i < iproc_priv.spu.num_chan; i++) {
+		iproc_priv.mbox[i] = mbox_request_channel(mcl, i);
+		if (IS_ERR(iproc_priv.mbox[i])) {
+			err = (int)PTR_ERR(iproc_priv.mbox[i]);
+			dev_err(dev,
+				"Mbox channel %d request failed with err %d",
+				i, err);
+			iproc_priv.mbox[i] = NULL;
+			goto free_channels;
+		}
 	}
 
 	return 0;
+free_channels:
+	for (i = 0; i < iproc_priv.spu.num_chan; i++) {
+		if (iproc_priv.mbox[i])
+			mbox_free_channel(iproc_priv.mbox[i]);
+	}
+
+	return err;
 }
 
 static void spu_mb_release(struct platform_device *pdev)
 {
 	int i;
 
-	for (i = 0; i < iproc_priv.spu.num_spu; i++)
+	for (i = 0; i < iproc_priv.spu.num_chan; i++)
 		mbox_free_channel(iproc_priv.mbox[i]);
 }
 
@@ -4567,7 +4578,7 @@ static void spu_counters_init(void)
 
 	atomic_set(&iproc_priv.session_count, 0);
 	atomic_set(&iproc_priv.stream_count, 0);
-	atomic_set(&iproc_priv.next_chan, (int)iproc_priv.spu.num_spu);
+	atomic_set(&iproc_priv.next_chan, (int)iproc_priv.spu.num_chan);
 	atomic64_set(&iproc_priv.bytes_in, 0);
 	atomic64_set(&iproc_priv.bytes_out, 0);
 	for (i = 0; i < SPU_OP_NUM; i++) {
@@ -4809,8 +4820,11 @@ static int spu_dt_read(struct platform_device *pdev)
 	struct resource *spu_ctrl_regs;
 	const struct of_device_id *match;
 	const struct spu_type_subtype *matched_spu_type;
-	void __iomem *spu_reg_vbase[MAX_SPUS];
-	int err;
+	struct device_node *dn = pdev->dev.of_node;
+	int err, i;
+
+	/* Count number of mailbox channels */
+	spu->num_chan = of_count_phandle_with_args(dn, "mboxes", "#mbox-cells");
 
 	match = of_match_device(of_match_ptr(bcm_spu_dt_ids), dev);
 	if (!match) {
@@ -4820,41 +4834,24 @@ static int spu_dt_read(struct platform_device *pdev)
 
 	matched_spu_type = match->data;
 
-	if (iproc_priv.spu.num_spu > 1) {
-		/* If this is 2nd or later SPU, make sure it's same type */
-		if ((spu->spu_type != matched_spu_type->type) ||
-		    (spu->spu_subtype != matched_spu_type->subtype)) {
-			err = -EINVAL;
-			dev_err(&pdev->dev, "Multiple SPU types not allowed");
-			return err;
-		}
-	} else {
-		/* Record type of first SPU */
-		spu->spu_type = matched_spu_type->type;
-		spu->spu_subtype = matched_spu_type->subtype;
-	}
+	spu->spu_type = matched_spu_type->type;
+	spu->spu_subtype = matched_spu_type->subtype;
 
-	/* Get and map SPU registers */
-	spu_ctrl_regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!spu_ctrl_regs) {
-		err = -EINVAL;
-		dev_err(&pdev->dev, "Invalid/missing registers for SPU\n");
-		return err;
-	}
+	i = 0;
+	for (i = 0; (i < MAX_SPUS) && ((spu_ctrl_regs =
+		platform_get_resource(pdev, IORESOURCE_MEM, i)) != NULL); i++) {
 
-	spu_reg_vbase[iproc_priv.spu.num_spu] =
-				devm_ioremap_resource(dev, spu_ctrl_regs);
-	if (IS_ERR(spu_reg_vbase[iproc_priv.spu.num_spu])) {
-		err = PTR_ERR(spu_reg_vbase[iproc_priv.spu.num_spu]);
-		dev_err(&pdev->dev, "Failed to map registers: %d\n",
-			err);
-		spu_reg_vbase[iproc_priv.spu.num_spu] = NULL;
-		return err;
+		spu->reg_vbase[i] = devm_ioremap_resource(dev, spu_ctrl_regs);
+		if (IS_ERR(spu->reg_vbase[i])) {
+			err = PTR_ERR(spu->reg_vbase[i]);
+			dev_err(&pdev->dev, "Failed to map registers: %d\n",
+				err);
+			spu->reg_vbase[i] = NULL;
+			return err;
+		}
 	}
-
-	dev_dbg(dev, "SPU %d detected.", iproc_priv.spu.num_spu);
-
-	spu->reg_vbase[iproc_priv.spu.num_spu] = spu_reg_vbase;
+	spu->num_spu = i;
+	dev_dbg(dev, "Device has %d SPUs", spu->num_spu);
 
 	return 0;
 }
@@ -4865,8 +4862,8 @@ int bcm_spu_probe(struct platform_device *pdev)
 	struct spu_hw *spu = &iproc_priv.spu;
 	int err = 0;
 
-	iproc_priv.pdev[iproc_priv.spu.num_spu] = pdev;
-	platform_set_drvdata(iproc_priv.pdev[iproc_priv.spu.num_spu],
+	iproc_priv.pdev  = pdev;
+	platform_set_drvdata(iproc_priv.pdev,
 			     &iproc_priv);
 
 	err = spu_dt_read(pdev);
@@ -4877,12 +4874,6 @@ int bcm_spu_probe(struct platform_device *pdev)
 	if (err < 0)
 		goto failure;
 
-	iproc_priv.spu.num_spu++;
-
-	/* If already initialized, we've just added another SPU and are done */
-	if (iproc_priv.inited)
-		return 0;
-
 	if (spu->spu_type == SPU_TYPE_SPUM)
 		iproc_priv.bcm_hdr_len = 8;
 	else if (spu->spu_type == SPU_TYPE_SPU2)
@@ -4898,8 +4889,6 @@ int bcm_spu_probe(struct platform_device *pdev)
 	if (err < 0)
 		goto fail_reg;
 
-	iproc_priv.inited = true;
-
 	return 0;
 
 fail_reg:
diff --git a/drivers/crypto/bcm/cipher.h b/drivers/crypto/bcm/cipher.h
index 51dca529ce8f2..57a55eb2a2552 100644
--- a/drivers/crypto/bcm/cipher.h
+++ b/drivers/crypto/bcm/cipher.h
@@ -427,10 +427,13 @@ struct spu_hw {
 
 	/* The number of SPUs on this platform */
 	u32 num_spu;
+
+	/* The number of SPU channels on this platform */
+	u32 num_chan;
 };
 
 struct device_private {
-	struct platform_device *pdev[MAX_SPUS];
+	struct platform_device *pdev;
 
 	struct spu_hw spu;
 
@@ -470,12 +473,10 @@ struct device_private {
 	/* Number of ICV check failures for AEAD messages */
 	atomic_t bad_icv;
 
-	struct mbox_client mcl[MAX_SPUS];
-	/* Array of mailbox channel pointers, one for each channel */
-	struct mbox_chan *mbox[MAX_SPUS];
+	struct mbox_client mcl;
 
-	/* Driver initialized */
-	bool inited;
+	/* Array of mailbox channel pointers, one for each channel */
+	struct mbox_chan **mbox;
 };
 
 extern struct device_private iproc_priv;

From 3c08377262880afc1621ab9cb6dbe7df47a6033d Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 21 Jul 2017 16:42:36 +0100
Subject: [PATCH 065/116] crypto: scompress - don't sleep with preemption
 disabled

Due to the use of per-CPU buffers, scomp_acomp_comp_decomp() executes
with preemption disabled, and so whether the CRYPTO_TFM_REQ_MAY_SLEEP
flag is set is irrelevant, since we cannot sleep anyway. So disregard
the flag, and use GFP_ATOMIC unconditionally.

Cc: <stable@vger.kernel.org> # v4.10+
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/scompress.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/crypto/scompress.c b/crypto/scompress.c
index ae1d3cf209e48..0b40d991d65f6 100644
--- a/crypto/scompress.c
+++ b/crypto/scompress.c
@@ -211,9 +211,7 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir)
 					      scratch_dst, &req->dlen, *ctx);
 	if (!ret) {
 		if (!req->dst) {
-			req->dst = crypto_scomp_sg_alloc(req->dlen,
-				   req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
-				   GFP_KERNEL : GFP_ATOMIC);
+			req->dst = crypto_scomp_sg_alloc(req->dlen, GFP_ATOMIC);
 			if (!req->dst)
 				goto out;
 		}

From cc4d110ec824d3f05f95b1f705158afc6fb08773 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 21 Jul 2017 16:42:37 +0100
Subject: [PATCH 066/116] crypto: scompress - free partially allocated scratch
 buffers on failure

When allocating the per-CPU scratch buffers, we allocate the source
and destination buffers separately, but bail immediately if the second
allocation fails, without freeing the first one. Fix that.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/scompress.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/crypto/scompress.c b/crypto/scompress.c
index 0b40d991d65f6..2c07648305adc 100644
--- a/crypto/scompress.c
+++ b/crypto/scompress.c
@@ -125,8 +125,11 @@ static int crypto_scomp_alloc_all_scratches(void)
 		if (!scomp_src_scratches)
 			return -ENOMEM;
 		scomp_dst_scratches = crypto_scomp_alloc_scratches();
-		if (!scomp_dst_scratches)
+		if (!scomp_dst_scratches) {
+			crypto_scomp_free_scratches(scomp_src_scratches);
+			scomp_src_scratches = NULL;
 			return -ENOMEM;
+		}
 	}
 	return 0;
 }

From 6a8487a1f29fa2801a530200c6cdf81592b1a4d7 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 21 Jul 2017 16:42:38 +0100
Subject: [PATCH 067/116] crypto: scompress - defer allocation of scratch
 buffer to first use

The scompress code allocates 2 x 128 KB of scratch buffers for each CPU,
so that clients of the async API can use synchronous implementations
even from atomic context. However, on systems such as Cavium Thunderx
(which has 96 cores), this adds up to a non-negligible 24 MB. Also,
32-bit systems may prefer to use their precious vmalloc space for other
things,especially since there don't appear to be any clients for the
async compression API yet.

So let's defer allocation of the scratch buffers until the first time
we allocate an acompress cipher based on an scompress implementation.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/scompress.c | 46 +++++++++++++++++-----------------------------
 1 file changed, 17 insertions(+), 29 deletions(-)

diff --git a/crypto/scompress.c b/crypto/scompress.c
index 2c07648305adc..2075e2c4e7dfd 100644
--- a/crypto/scompress.c
+++ b/crypto/scompress.c
@@ -65,11 +65,6 @@ static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg)
 	seq_puts(m, "type         : scomp\n");
 }
 
-static int crypto_scomp_init_tfm(struct crypto_tfm *tfm)
-{
-	return 0;
-}
-
 static void crypto_scomp_free_scratches(void * __percpu *scratches)
 {
 	int i;
@@ -134,6 +129,17 @@ static int crypto_scomp_alloc_all_scratches(void)
 	return 0;
 }
 
+static int crypto_scomp_init_tfm(struct crypto_tfm *tfm)
+{
+	int ret;
+
+	mutex_lock(&scomp_lock);
+	ret = crypto_scomp_alloc_all_scratches();
+	mutex_unlock(&scomp_lock);
+
+	return ret;
+}
+
 static void crypto_scomp_sg_free(struct scatterlist *sgl)
 {
 	int i, n;
@@ -241,6 +247,10 @@ static void crypto_exit_scomp_ops_async(struct crypto_tfm *tfm)
 	struct crypto_scomp **ctx = crypto_tfm_ctx(tfm);
 
 	crypto_free_scomp(*ctx);
+
+	mutex_lock(&scomp_lock);
+	crypto_scomp_free_all_scratches();
+	mutex_unlock(&scomp_lock);
 }
 
 int crypto_init_scomp_ops_async(struct crypto_tfm *tfm)
@@ -317,40 +327,18 @@ static const struct crypto_type crypto_scomp_type = {
 int crypto_register_scomp(struct scomp_alg *alg)
 {
 	struct crypto_alg *base = &alg->base;
-	int ret = -ENOMEM;
-
-	mutex_lock(&scomp_lock);
-	if (crypto_scomp_alloc_all_scratches())
-		goto error;
 
 	base->cra_type = &crypto_scomp_type;
 	base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
 	base->cra_flags |= CRYPTO_ALG_TYPE_SCOMPRESS;
 
-	ret = crypto_register_alg(base);
-	if (ret)
-		goto error;
-
-	mutex_unlock(&scomp_lock);
-	return ret;
-
-error:
-	crypto_scomp_free_all_scratches();
-	mutex_unlock(&scomp_lock);
-	return ret;
+	return crypto_register_alg(base);
 }
 EXPORT_SYMBOL_GPL(crypto_register_scomp);
 
 int crypto_unregister_scomp(struct scomp_alg *alg)
 {
-	int ret;
-
-	mutex_lock(&scomp_lock);
-	ret = crypto_unregister_alg(&alg->base);
-	crypto_scomp_free_all_scratches();
-	mutex_unlock(&scomp_lock);
-
-	return ret;
+	return crypto_unregister_alg(&alg->base);
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_scomp);
 

From 60f3aaee7343b15f86bcfbb02384206d8a1900cd Mon Sep 17 00:00:00 2001
From: PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>
Date: Fri, 21 Jul 2017 22:55:56 +0530
Subject: [PATCH 068/116] hwrng: Kconfig - Correct help text about feeding
 entropy pool

Modify Kconfig help text to reflect the fact that random data from hwrng
is fed into kernel random number generator's entropy pool.

Signed-off-by: PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/Kconfig | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 1b223c32a8ae7..2f45e1532670e 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -13,10 +13,8 @@ menuconfig HW_RANDOM
 	  that's usually called /dev/hwrng, and which exposes one
 	  of possibly several hardware random number generators.
 
-	  These hardware random number generators do not feed directly
-	  into the kernel's random number generator.  That is usually
-	  handled by the "rngd" daemon.  Documentation/hw_random.txt
-	  has more information.
+	  These hardware random number generators do feed into the
+	  kernel's random number generator entropy pool.
 
 	  If unsure, say Y.
 

From 15d05fd4ed06a2f34edd3cf69ab27c2a47e3b52e Mon Sep 17 00:00:00 2001
From: Steffen Trumtrar <s.trumtrar@pengutronix.de>
Date: Sun, 23 Jul 2017 19:49:04 +0200
Subject: [PATCH 069/116] Documentation: devicetree: add Freescale RNGC binding

Add binding documentation for the Freescale RNGC found on
some i.MX2/3 SoCs.

Signed-off-by: Steffen Trumtrar <s.trumtrar@pengutronix.de>
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../devicetree/bindings/rng/imx-rngc.txt      | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/rng/imx-rngc.txt

diff --git a/Documentation/devicetree/bindings/rng/imx-rngc.txt b/Documentation/devicetree/bindings/rng/imx-rngc.txt
new file mode 100644
index 0000000000000..93c7174a7bedc
--- /dev/null
+++ b/Documentation/devicetree/bindings/rng/imx-rngc.txt
@@ -0,0 +1,21 @@
+Freescale RNGC (Random Number Generator Version C)
+
+The driver also supports version B, which is mostly compatible
+to version C.
+
+Required properties:
+- compatible : should be one of
+               "fsl,imx25-rngb"
+               "fsl,imx35-rngc"
+- reg : offset and length of the register set of this block
+- interrupts : the interrupt number for the RNGC block
+- clocks : the RNGC clk source
+
+Example:
+
+rng@53fb0000 {
+	compatible = "fsl,imx25-rngb";
+	reg = <0x53fb0000 0x4000>;
+	interrupts = <22>;
+	clocks = <&trng_clk>;
+};

From 1d5449445bd0afbc003518543873c7198a09728b Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 23 Jul 2017 19:49:06 +0200
Subject: [PATCH 070/116] hwrng: mx-rngc - add a driver for Freescale RNGC

The driver is ported from Freescale's Linux git and can be
found in the

	vendor/freescale/imx_2.6.35_maintain

branch.

The driver supports both RNG version C that's part of some Freescale
i.MX3 SoCs and version B that is available on i.MX2x chipsets.

Signed-off-by: Steffen Trumtrar <s.trumtrar@pengutronix.de>
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Reviewed-by: PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/Kconfig    |  14 ++
 drivers/char/hw_random/Makefile   |   1 +
 drivers/char/hw_random/imx-rngc.c | 331 ++++++++++++++++++++++++++++++
 3 files changed, 346 insertions(+)
 create mode 100644 drivers/char/hw_random/imx-rngc.c

diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 2f45e1532670e..95a031e9eced0 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -253,6 +253,20 @@ config HW_RANDOM_MXC_RNGA
 
 	  If unsure, say Y.
 
+config HW_RANDOM_IMX_RNGC
+	tristate "Freescale i.MX RNGC Random Number Generator"
+	depends on ARCH_MXC
+	default HW_RANDOM
+	---help---
+	  This driver provides kernel-side support for the Random Number
+	  Generator Version C hardware found on some Freescale i.MX
+	  processors. Version B is also supported by this driver.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called imx-rngc.
+
+	  If unsure, say Y.
+
 config HW_RANDOM_NOMADIK
 	tristate "ST-Ericsson Nomadik Random Number Generator support"
 	depends on ARCH_NOMADIK
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index b085975ec1d2f..39a67defac67c 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_HW_RANDOM_PASEMI) += pasemi-rng.o
 obj-$(CONFIG_HW_RANDOM_VIRTIO) += virtio-rng.o
 obj-$(CONFIG_HW_RANDOM_TX4939) += tx4939-rng.o
 obj-$(CONFIG_HW_RANDOM_MXC_RNGA) += mxc-rnga.o
+obj-$(CONFIG_HW_RANDOM_IMX_RNGC) += imx-rngc.o
 obj-$(CONFIG_HW_RANDOM_OCTEON) += octeon-rng.o
 obj-$(CONFIG_HW_RANDOM_NOMADIK) += nomadik-rng.o
 obj-$(CONFIG_HW_RANDOM_PSERIES) += pseries-rng.o
diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c
new file mode 100644
index 0000000000000..88db42d307606
--- /dev/null
+++ b/drivers/char/hw_random/imx-rngc.c
@@ -0,0 +1,331 @@
+/*
+ * RNG driver for Freescale RNGC
+ *
+ * Copyright (C) 2008-2012 Freescale Semiconductor, Inc.
+ * Copyright (C) 2017 Martin Kaiser <martin@kaiser.cx>
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/hw_random.h>
+#include <linux/completion.h>
+#include <linux/io.h>
+
+#define RNGC_COMMAND			0x0004
+#define RNGC_CONTROL			0x0008
+#define RNGC_STATUS			0x000C
+#define RNGC_ERROR			0x0010
+#define RNGC_FIFO			0x0014
+
+#define RNGC_CMD_CLR_ERR		0x00000020
+#define RNGC_CMD_CLR_INT		0x00000010
+#define RNGC_CMD_SEED			0x00000002
+#define RNGC_CMD_SELF_TEST		0x00000001
+
+#define RNGC_CTRL_MASK_ERROR		0x00000040
+#define RNGC_CTRL_MASK_DONE		0x00000020
+
+#define RNGC_STATUS_ERROR		0x00010000
+#define RNGC_STATUS_FIFO_LEVEL_MASK	0x00000f00
+#define RNGC_STATUS_FIFO_LEVEL_SHIFT	8
+#define RNGC_STATUS_SEED_DONE		0x00000020
+#define RNGC_STATUS_ST_DONE		0x00000010
+
+#define RNGC_ERROR_STATUS_STAT_ERR	0x00000008
+
+#define RNGC_TIMEOUT  3000 /* 3 sec */
+
+
+static bool self_test = true;
+module_param(self_test, bool, 0);
+
+struct imx_rngc {
+	struct device		*dev;
+	struct clk		*clk;
+	void __iomem		*base;
+	struct hwrng		rng;
+	struct completion	rng_op_done;
+	/*
+	 * err_reg is written only by the irq handler and read only
+	 * when interrupts are masked, we need no spinlock
+	 */
+	u32			err_reg;
+};
+
+
+static inline void imx_rngc_irq_mask_clear(struct imx_rngc *rngc)
+{
+	u32 ctrl, cmd;
+
+	/* mask interrupts */
+	ctrl = readl(rngc->base + RNGC_CONTROL);
+	ctrl |= RNGC_CTRL_MASK_DONE | RNGC_CTRL_MASK_ERROR;
+	writel(ctrl, rngc->base + RNGC_CONTROL);
+
+	/*
+	 * CLR_INT clears the interrupt only if there's no error
+	 * CLR_ERR clear the interrupt and the error register if there
+	 * is an error
+	 */
+	cmd = readl(rngc->base + RNGC_COMMAND);
+	cmd |= RNGC_CMD_CLR_INT | RNGC_CMD_CLR_ERR;
+	writel(cmd, rngc->base + RNGC_COMMAND);
+}
+
+static inline void imx_rngc_irq_unmask(struct imx_rngc *rngc)
+{
+	u32 ctrl;
+
+	ctrl = readl(rngc->base + RNGC_CONTROL);
+	ctrl &= ~(RNGC_CTRL_MASK_DONE | RNGC_CTRL_MASK_ERROR);
+	writel(ctrl, rngc->base + RNGC_CONTROL);
+}
+
+static int imx_rngc_self_test(struct imx_rngc *rngc)
+{
+	u32 cmd;
+	int ret;
+
+	imx_rngc_irq_unmask(rngc);
+
+	/* run self test */
+	cmd = readl(rngc->base + RNGC_COMMAND);
+	writel(cmd | RNGC_CMD_SELF_TEST, rngc->base + RNGC_COMMAND);
+
+	ret = wait_for_completion_timeout(&rngc->rng_op_done, RNGC_TIMEOUT);
+	if (!ret) {
+		imx_rngc_irq_mask_clear(rngc);
+		return -ETIMEDOUT;
+	}
+
+	if (rngc->err_reg != 0)
+		return -EIO;
+
+	return 0;
+}
+
+static int imx_rngc_read(struct hwrng *rng, void *data, size_t max, bool wait)
+{
+	struct imx_rngc *rngc = container_of(rng, struct imx_rngc, rng);
+	unsigned int status;
+	unsigned int level;
+	int retval = 0;
+
+	while (max >= sizeof(u32)) {
+		status = readl(rngc->base + RNGC_STATUS);
+
+		/* is there some error while reading this random number? */
+		if (status & RNGC_STATUS_ERROR)
+			break;
+
+		/* how many random numbers are in FIFO? [0-16] */
+		level = (status & RNGC_STATUS_FIFO_LEVEL_MASK) >>
+			RNGC_STATUS_FIFO_LEVEL_SHIFT;
+
+		if (level) {
+			/* retrieve a random number from FIFO */
+			*(u32 *)data = readl(rngc->base + RNGC_FIFO);
+
+			retval += sizeof(u32);
+			data += sizeof(u32);
+			max -= sizeof(u32);
+		}
+	}
+
+	return retval ? retval : -EIO;
+}
+
+static irqreturn_t imx_rngc_irq(int irq, void *priv)
+{
+	struct imx_rngc *rngc = (struct imx_rngc *)priv;
+	u32 status;
+
+	/*
+	 * clearing the interrupt will also clear the error register
+	 * read error and status before clearing
+	 */
+	status = readl(rngc->base + RNGC_STATUS);
+	rngc->err_reg = readl(rngc->base + RNGC_ERROR);
+
+	imx_rngc_irq_mask_clear(rngc);
+
+	if (status & (RNGC_STATUS_SEED_DONE | RNGC_STATUS_ST_DONE))
+		complete(&rngc->rng_op_done);
+
+	return IRQ_HANDLED;
+}
+
+static int imx_rngc_init(struct hwrng *rng)
+{
+	struct imx_rngc *rngc = container_of(rng, struct imx_rngc, rng);
+	u32 cmd;
+	int ret;
+
+	/* clear error */
+	cmd = readl(rngc->base + RNGC_COMMAND);
+	writel(cmd | RNGC_CMD_CLR_ERR, rngc->base + RNGC_COMMAND);
+
+	/* create seed, repeat while there is some statistical error */
+	do {
+		imx_rngc_irq_unmask(rngc);
+
+		/* seed creation */
+		cmd = readl(rngc->base + RNGC_COMMAND);
+		writel(cmd | RNGC_CMD_SEED, rngc->base + RNGC_COMMAND);
+
+		ret = wait_for_completion_timeout(&rngc->rng_op_done,
+				RNGC_TIMEOUT);
+
+		if (!ret) {
+			imx_rngc_irq_mask_clear(rngc);
+			return -ETIMEDOUT;
+		}
+
+	} while (rngc->err_reg == RNGC_ERROR_STATUS_STAT_ERR);
+
+	return rngc->err_reg ? -EIO : 0;
+}
+
+static int imx_rngc_probe(struct platform_device *pdev)
+{
+	struct imx_rngc *rngc;
+	struct resource *res;
+	int ret;
+	int irq;
+
+	rngc = devm_kzalloc(&pdev->dev, sizeof(*rngc), GFP_KERNEL);
+	if (!rngc)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	rngc->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(rngc->base))
+		return PTR_ERR(rngc->base);
+
+	rngc->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(rngc->clk)) {
+		dev_err(&pdev->dev, "Can not get rng_clk\n");
+		return PTR_ERR(rngc->clk);
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq <= 0) {
+		dev_err(&pdev->dev, "Couldn't get irq %d\n", irq);
+		return irq;
+	}
+
+	ret = clk_prepare_enable(rngc->clk);
+	if (ret)
+		return ret;
+
+	ret = devm_request_irq(&pdev->dev,
+			irq, imx_rngc_irq, 0, pdev->name, (void *)rngc);
+	if (ret) {
+		dev_err(rngc->dev, "Can't get interrupt working.\n");
+		goto err;
+	}
+
+	init_completion(&rngc->rng_op_done);
+
+	rngc->rng.name = pdev->name;
+	rngc->rng.init = imx_rngc_init;
+	rngc->rng.read = imx_rngc_read;
+
+	rngc->dev = &pdev->dev;
+	platform_set_drvdata(pdev, rngc);
+
+	imx_rngc_irq_mask_clear(rngc);
+
+	if (self_test) {
+		ret = imx_rngc_self_test(rngc);
+		if (ret) {
+			dev_err(rngc->dev, "FSL RNGC self test failed.\n");
+			goto err;
+		}
+	}
+
+	ret = hwrng_register(&rngc->rng);
+	if (ret) {
+		dev_err(&pdev->dev, "FSL RNGC registering failed (%d)\n", ret);
+		goto err;
+	}
+
+	dev_info(&pdev->dev, "Freescale RNGC registered.\n");
+	return 0;
+
+err:
+	clk_disable_unprepare(rngc->clk);
+
+	return ret;
+}
+
+static int __exit imx_rngc_remove(struct platform_device *pdev)
+{
+	struct imx_rngc *rngc = platform_get_drvdata(pdev);
+
+	hwrng_unregister(&rngc->rng);
+
+	clk_disable_unprepare(rngc->clk);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int imx_rngc_suspend(struct device *dev)
+{
+	struct imx_rngc *rngc = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(rngc->clk);
+
+	return 0;
+}
+
+static int imx_rngc_resume(struct device *dev)
+{
+	struct imx_rngc *rngc = dev_get_drvdata(dev);
+
+	clk_prepare_enable(rngc->clk);
+
+	return 0;
+}
+
+static const struct dev_pm_ops imx_rngc_pm_ops = {
+	.suspend	= imx_rngc_suspend,
+	.resume		= imx_rngc_resume,
+};
+#endif
+
+static const struct of_device_id imx_rngc_dt_ids[] = {
+	{ .compatible = "fsl,imx25-rngb", .data = NULL, },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, imx_rngc_dt_ids);
+
+static struct platform_driver imx_rngc_driver = {
+	.driver = {
+		.name = "imx_rngc",
+#ifdef CONFIG_PM
+		.pm = &imx_rngc_pm_ops,
+#endif
+		.of_match_table = imx_rngc_dt_ids,
+	},
+	.remove = __exit_p(imx_rngc_remove),
+};
+
+module_platform_driver_probe(imx_rngc_driver, imx_rngc_probe);
+
+MODULE_AUTHOR("Freescale Semiconductor, Inc.");
+MODULE_DESCRIPTION("H/W RNGC driver for i.MX");
+MODULE_LICENSE("GPL");

From 641eacd15696a65b08880985701c8082872da136 Mon Sep 17 00:00:00 2001
From: Zain Wang <wzz@rock-chips.com>
Date: Mon, 24 Jul 2017 09:23:13 +0800
Subject: [PATCH 071/116] crypto: rockchip - move the crypto completion from
 interrupt context

It's illegal to call the completion function from hardirq context,
it will cause runtime tests to fail. Let's build a new task (done_task)
for moving update operation from hardirq context.

Signed-off-by: zain wang <wzz@rock-chips.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto.c       | 39 +++++++++++++------
 drivers/crypto/rockchip/rk3288_crypto.h       |  4 +-
 .../rockchip/rk3288_crypto_ablkcipher.c       |  2 +-
 drivers/crypto/rockchip/rk3288_crypto_ahash.c |  2 +-
 4 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index d0f80c6241f95..c2b1dd70f9f72 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -169,24 +169,22 @@ static irqreturn_t rk_crypto_irq_handle(int irq, void *dev_id)
 {
 	struct rk_crypto_info *dev  = platform_get_drvdata(dev_id);
 	u32 interrupt_status;
-	int err = 0;
 
 	spin_lock(&dev->lock);
 	interrupt_status = CRYPTO_READ(dev, RK_CRYPTO_INTSTS);
 	CRYPTO_WRITE(dev, RK_CRYPTO_INTSTS, interrupt_status);
+
 	if (interrupt_status & 0x0a) {
 		dev_warn(dev->dev, "DMA Error\n");
-		err = -EFAULT;
-	} else if (interrupt_status & 0x05) {
-		err = dev->update(dev);
+		dev->err = -EFAULT;
 	}
-	if (err)
-		dev->complete(dev, err);
+	tasklet_schedule(&dev->done_task);
+
 	spin_unlock(&dev->lock);
 	return IRQ_HANDLED;
 }
 
-static void rk_crypto_tasklet_cb(unsigned long data)
+static void rk_crypto_queue_task_cb(unsigned long data)
 {
 	struct rk_crypto_info *dev = (struct rk_crypto_info *)data;
 	struct crypto_async_request *async_req, *backlog;
@@ -210,11 +208,26 @@ static void rk_crypto_tasklet_cb(unsigned long data)
 		dev->ablk_req = ablkcipher_request_cast(async_req);
 	else
 		dev->ahash_req = ahash_request_cast(async_req);
+	dev->err = 0;
 	err = dev->start(dev);
 	if (err)
 		dev->complete(dev, err);
 }
 
+static void rk_crypto_done_task_cb(unsigned long data)
+{
+	struct rk_crypto_info *dev = (struct rk_crypto_info *)data;
+
+	if (dev->err) {
+		dev->complete(dev, dev->err);
+		return;
+	}
+
+	dev->err = dev->update(dev);
+	if (dev->err)
+		dev->complete(dev, dev->err);
+}
+
 static struct rk_crypto_tmp *rk_cipher_algs[] = {
 	&rk_ecb_aes_alg,
 	&rk_cbc_aes_alg,
@@ -361,8 +374,10 @@ static int rk_crypto_probe(struct platform_device *pdev)
 	crypto_info->dev = &pdev->dev;
 	platform_set_drvdata(pdev, crypto_info);
 
-	tasklet_init(&crypto_info->crypto_tasklet,
-		     rk_crypto_tasklet_cb, (unsigned long)crypto_info);
+	tasklet_init(&crypto_info->queue_task,
+		     rk_crypto_queue_task_cb, (unsigned long)crypto_info);
+	tasklet_init(&crypto_info->done_task,
+		     rk_crypto_done_task_cb, (unsigned long)crypto_info);
 	crypto_init_queue(&crypto_info->queue, 50);
 
 	crypto_info->enable_clk = rk_crypto_enable_clk;
@@ -380,7 +395,8 @@ static int rk_crypto_probe(struct platform_device *pdev)
 	return 0;
 
 err_register_alg:
-	tasklet_kill(&crypto_info->crypto_tasklet);
+	tasklet_kill(&crypto_info->queue_task);
+	tasklet_kill(&crypto_info->done_task);
 err_crypto:
 	return err;
 }
@@ -390,7 +406,8 @@ static int rk_crypto_remove(struct platform_device *pdev)
 	struct rk_crypto_info *crypto_tmp = platform_get_drvdata(pdev);
 
 	rk_crypto_unregister();
-	tasklet_kill(&crypto_tmp->crypto_tasklet);
+	tasklet_kill(&crypto_tmp->done_task);
+	tasklet_kill(&crypto_tmp->queue_task);
 	return 0;
 }
 
diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h
index d7b71fea320b1..65ad1c2619490 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.h
+++ b/drivers/crypto/rockchip/rk3288_crypto.h
@@ -190,9 +190,11 @@ struct rk_crypto_info {
 	void __iomem			*reg;
 	int				irq;
 	struct crypto_queue		queue;
-	struct tasklet_struct		crypto_tasklet;
+	struct tasklet_struct		queue_task;
+	struct tasklet_struct		done_task;
 	struct ablkcipher_request	*ablk_req;
 	struct ahash_request		*ahash_req;
+	int 				err;
 	/* device lock */
 	spinlock_t			lock;
 
diff --git a/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c b/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c
index b5a3afe222e42..8787e44593f85 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c
@@ -42,7 +42,7 @@ static int rk_handle_req(struct rk_crypto_info *dev,
 	spin_lock_irqsave(&dev->lock, flags);
 	err = ablkcipher_enqueue_request(&dev->queue, req);
 	spin_unlock_irqrestore(&dev->lock, flags);
-	tasklet_schedule(&dev->crypto_tasklet);
+	tasklet_schedule(&dev->queue_task);
 	return err;
 }
 
diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
index 718588219f75a..9b55585a20fa0 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
@@ -204,7 +204,7 @@ static int rk_ahash_digest(struct ahash_request *req)
 	ret = crypto_enqueue_request(&dev->queue, &req->base);
 	spin_unlock_irqrestore(&dev->lock, flags);
 
-	tasklet_schedule(&dev->crypto_tasklet);
+	tasklet_schedule(&dev->queue_task);
 
 	/*
 	 * it will take some time to process date after last dma transmission.

From 9a42e4eed3fcd7ba8dff6622384cd08bfe5ef707 Mon Sep 17 00:00:00 2001
From: Zain Wang <wzz@rock-chips.com>
Date: Mon, 24 Jul 2017 09:23:14 +0800
Subject: [PATCH 072/116] crypto: rockchip - return the err code when unable
 dequeue the crypto request

Sometime we would unable to dequeue the crypto request, in this case,
we should finish crypto and return the err code.

Signed-off-by: zain wang <wzz@rock-chips.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto.c       | 19 -------------------
 .../rockchip/rk3288_crypto_ablkcipher.c       | 15 +++++++++++++++
 drivers/crypto/rockchip/rk3288_crypto_ahash.c | 14 ++++++++++++++
 3 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index c2b1dd70f9f72..57c37831bd424 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -187,27 +187,8 @@ static irqreturn_t rk_crypto_irq_handle(int irq, void *dev_id)
 static void rk_crypto_queue_task_cb(unsigned long data)
 {
 	struct rk_crypto_info *dev = (struct rk_crypto_info *)data;
-	struct crypto_async_request *async_req, *backlog;
-	unsigned long flags;
 	int err = 0;
 
-	spin_lock_irqsave(&dev->lock, flags);
-	backlog   = crypto_get_backlog(&dev->queue);
-	async_req = crypto_dequeue_request(&dev->queue);
-	spin_unlock_irqrestore(&dev->lock, flags);
-	if (!async_req) {
-		dev_err(dev->dev, "async_req is NULL !!\n");
-		return;
-	}
-	if (backlog) {
-		backlog->complete(backlog, -EINPROGRESS);
-		backlog = NULL;
-	}
-
-	if (crypto_tfm_alg_type(async_req->tfm) == CRYPTO_ALG_TYPE_ABLKCIPHER)
-		dev->ablk_req = ablkcipher_request_cast(async_req);
-	else
-		dev->ahash_req = ahash_request_cast(async_req);
 	dev->err = 0;
 	err = dev->start(dev);
 	if (err)
diff --git a/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c b/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c
index 8787e44593f85..dbe78def7b650 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c
@@ -25,6 +25,7 @@ static int rk_handle_req(struct rk_crypto_info *dev,
 			 struct ablkcipher_request *req)
 {
 	unsigned long flags;
+	struct crypto_async_request *async_req, *backlog;
 	int err;
 
 	if (!IS_ALIGNED(req->nbytes, dev->align_size))
@@ -41,7 +42,21 @@ static int rk_handle_req(struct rk_crypto_info *dev,
 
 	spin_lock_irqsave(&dev->lock, flags);
 	err = ablkcipher_enqueue_request(&dev->queue, req);
+	backlog   = crypto_get_backlog(&dev->queue);
+	async_req = crypto_dequeue_request(&dev->queue);
 	spin_unlock_irqrestore(&dev->lock, flags);
+
+	if (!async_req) {
+		dev_err(dev->dev, "async_req is NULL !!\n");
+		return err;
+	}
+	if (backlog) {
+		backlog->complete(backlog, -EINPROGRESS);
+		backlog = NULL;
+	}
+
+	dev->ablk_req = ablkcipher_request_cast(async_req);
+
 	tasklet_schedule(&dev->queue_task);
 	return err;
 }
diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
index 9b55585a20fa0..ebc46e007804f 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
@@ -166,6 +166,7 @@ static int rk_ahash_digest(struct ahash_request *req)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
 	struct rk_ahash_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
+	struct crypto_async_request *async_req, *backlog;
 	struct rk_crypto_info *dev = NULL;
 	unsigned long flags;
 	int ret;
@@ -202,8 +203,21 @@ static int rk_ahash_digest(struct ahash_request *req)
 
 	spin_lock_irqsave(&dev->lock, flags);
 	ret = crypto_enqueue_request(&dev->queue, &req->base);
+	backlog   = crypto_get_backlog(&dev->queue);
+	async_req = crypto_dequeue_request(&dev->queue);
 	spin_unlock_irqrestore(&dev->lock, flags);
 
+	if (!async_req) {
+		dev_err(dev->dev, "async_req is NULL !!\n");
+		return ret;
+	}
+	if (backlog) {
+		backlog->complete(backlog, -EINPROGRESS);
+		backlog = NULL;
+	}
+
+	dev->ahash_req = ahash_request_cast(async_req);
+
 	tasklet_schedule(&dev->queue_task);
 
 	/*

From a7c391f04fe3259fb0417d71fec78ae28f25780e Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 24 Jul 2017 11:28:03 +0100
Subject: [PATCH 073/116] crypto: algapi - use separate dst and src operands
 for __crypto_xor()

In preparation of introducing crypto_xor_cpy(), which will use separate
operands for input and output, modify the __crypto_xor() implementation,
which it will share with the existing crypto_xor(), which provides the
actual functionality when not using the inline version.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algapi.c         | 25 +++++++++++++++----------
 include/crypto/algapi.h |  4 ++--
 2 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/crypto/algapi.c b/crypto/algapi.c
index e4cc7615a1395..aa699ff6c8765 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -975,13 +975,15 @@ void crypto_inc(u8 *a, unsigned int size)
 }
 EXPORT_SYMBOL_GPL(crypto_inc);
 
-void __crypto_xor(u8 *dst, const u8 *src, unsigned int len)
+void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int len)
 {
 	int relalign = 0;
 
 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
 		int size = sizeof(unsigned long);
-		int d = ((unsigned long)dst ^ (unsigned long)src) & (size - 1);
+		int d = (((unsigned long)dst ^ (unsigned long)src1) |
+			 ((unsigned long)dst ^ (unsigned long)src2)) &
+			(size - 1);
 
 		relalign = d ? 1 << __ffs(d) : size;
 
@@ -992,34 +994,37 @@ void __crypto_xor(u8 *dst, const u8 *src, unsigned int len)
 		 * process the remainder of the input using optimal strides.
 		 */
 		while (((unsigned long)dst & (relalign - 1)) && len > 0) {
-			*dst++ ^= *src++;
+			*dst++ = *src1++ ^ *src2++;
 			len--;
 		}
 	}
 
 	while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) {
-		*(u64 *)dst ^= *(u64 *)src;
+		*(u64 *)dst = *(u64 *)src1 ^  *(u64 *)src2;
 		dst += 8;
-		src += 8;
+		src1 += 8;
+		src2 += 8;
 		len -= 8;
 	}
 
 	while (len >= 4 && !(relalign & 3)) {
-		*(u32 *)dst ^= *(u32 *)src;
+		*(u32 *)dst = *(u32 *)src1 ^ *(u32 *)src2;
 		dst += 4;
-		src += 4;
+		src1 += 4;
+		src2 += 4;
 		len -= 4;
 	}
 
 	while (len >= 2 && !(relalign & 1)) {
-		*(u16 *)dst ^= *(u16 *)src;
+		*(u16 *)dst = *(u16 *)src1 ^ *(u16 *)src2;
 		dst += 2;
-		src += 2;
+		src1 += 2;
+		src2 += 2;
 		len -= 2;
 	}
 
 	while (len--)
-		*dst++ ^= *src++;
+		*dst++ = *src1++ ^ *src2++;
 }
 EXPORT_SYMBOL_GPL(__crypto_xor);
 
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 436c4c2683c7d..fd547f946bf8e 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -192,7 +192,7 @@ static inline unsigned int crypto_queue_len(struct crypto_queue *queue)
 }
 
 void crypto_inc(u8 *a, unsigned int size);
-void __crypto_xor(u8 *dst, const u8 *src, unsigned int size);
+void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int size);
 
 static inline void crypto_xor(u8 *dst, const u8 *src, unsigned int size)
 {
@@ -207,7 +207,7 @@ static inline void crypto_xor(u8 *dst, const u8 *src, unsigned int size)
 			size -= sizeof(unsigned long);
 		}
 	} else {
-		__crypto_xor(dst, src, size);
+		__crypto_xor(dst, dst, src, size);
 	}
 }
 

From 45fe93dff2fb58b22de04c729f8447ba0f773d93 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 24 Jul 2017 11:28:04 +0100
Subject: [PATCH 074/116] crypto: algapi - make crypto_xor() take separate dst
 and src arguments

There are quite a number of occurrences in the kernel of the pattern

  if (dst != src)
          memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
  crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);

or

  crypto_xor(keystream, src, nbytes);
  memcpy(dst, keystream, nbytes);

where crypto_xor() is preceded or followed by a memcpy() invocation
that is only there because crypto_xor() uses its output parameter as
one of the inputs. To avoid having to add new instances of this pattern
in the arm64 code, which will be refactored to implement non-SIMD
fallbacks, add an alternative implementation called crypto_xor_cpy(),
taking separate input and output arguments. This removes the need for
the separate memcpy().

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/aes-ce-glue.c       |  4 +---
 arch/arm/crypto/aes-neonbs-glue.c   |  5 ++---
 arch/arm64/crypto/aes-glue.c        |  4 +---
 arch/arm64/crypto/aes-neonbs-glue.c |  5 ++---
 arch/sparc/crypto/aes_glue.c        |  3 +--
 arch/x86/crypto/aesni-intel_glue.c  |  4 ++--
 arch/x86/crypto/blowfish_glue.c     |  3 +--
 arch/x86/crypto/cast5_avx_glue.c    |  3 +--
 arch/x86/crypto/des3_ede_glue.c     |  3 +--
 crypto/ctr.c                        |  3 +--
 crypto/pcbc.c                       | 12 ++++--------
 drivers/crypto/vmx/aes_ctr.c        |  3 +--
 drivers/md/dm-crypt.c               | 11 +++++------
 include/crypto/algapi.h             | 19 +++++++++++++++++++
 14 files changed, 42 insertions(+), 40 deletions(-)

diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
index 0f966a8ca1cef..d0a9cec73707b 100644
--- a/arch/arm/crypto/aes-ce-glue.c
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -285,9 +285,7 @@ static int ctr_encrypt(struct skcipher_request *req)
 
 		ce_aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc,
 				   num_rounds(ctx), blocks, walk.iv);
-		if (tdst != tsrc)
-			memcpy(tdst, tsrc, nbytes);
-		crypto_xor(tdst, tail, nbytes);
+		crypto_xor_cpy(tdst, tsrc, tail, nbytes);
 		err = skcipher_walk_done(&walk, 0);
 	}
 	kernel_neon_end();
diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c
index c763779614442..18768f3304495 100644
--- a/arch/arm/crypto/aes-neonbs-glue.c
+++ b/arch/arm/crypto/aes-neonbs-glue.c
@@ -221,9 +221,8 @@ static int ctr_encrypt(struct skcipher_request *req)
 			u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
 			u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
 
-			if (dst != src)
-				memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
-			crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
+			crypto_xor_cpy(dst, src, final,
+				       walk.total % AES_BLOCK_SIZE);
 
 			err = skcipher_walk_done(&walk, 0);
 			break;
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index bcf596b0197ef..0da30e3b0e4be 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -241,9 +241,7 @@ static int ctr_encrypt(struct skcipher_request *req)
 
 		aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds,
 				blocks, walk.iv, first);
-		if (tdst != tsrc)
-			memcpy(tdst, tsrc, nbytes);
-		crypto_xor(tdst, tail, nbytes);
+		crypto_xor_cpy(tdst, tsrc, tail, nbytes);
 		err = skcipher_walk_done(&walk, 0);
 	}
 	kernel_neon_end();
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index db2501d93550c..9001aec16007b 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -224,9 +224,8 @@ static int ctr_encrypt(struct skcipher_request *req)
 			u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
 			u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
 
-			if (dst != src)
-				memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
-			crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
+			crypto_xor_cpy(dst, src, final,
+				       walk.total % AES_BLOCK_SIZE);
 
 			err = skcipher_walk_done(&walk, 0);
 			break;
diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c
index c90930de76ba8..3cd4f6b198b65 100644
--- a/arch/sparc/crypto/aes_glue.c
+++ b/arch/sparc/crypto/aes_glue.c
@@ -344,8 +344,7 @@ static void ctr_crypt_final(struct crypto_sparc64_aes_ctx *ctx,
 
 	ctx->ops->ecb_encrypt(&ctx->key[0], (const u64 *)ctrblk,
 			      keystream, AES_BLOCK_SIZE);
-	crypto_xor((u8 *) keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
+	crypto_xor_cpy(dst, (u8 *) keystream, src, nbytes);
 	crypto_inc(ctrblk, AES_BLOCK_SIZE);
 }
 
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 4a55cdcdc0082..5c15d6b573299 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -475,8 +475,8 @@ static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
 	unsigned int nbytes = walk->nbytes;
 
 	aesni_enc(ctx, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
+	crypto_xor_cpy(dst, keystream, src, nbytes);
+
 	crypto_inc(ctrblk, AES_BLOCK_SIZE);
 }
 
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index 17c05531dfd17..f9eca34301e20 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -271,8 +271,7 @@ static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
 	unsigned int nbytes = walk->nbytes;
 
 	blowfish_enc_blk(ctx, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
+	crypto_xor_cpy(dst, keystream, src, nbytes);
 
 	crypto_inc(ctrblk, BF_BLOCK_SIZE);
 }
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index 8648158f39166..dbea6020ffe7d 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -256,8 +256,7 @@ static void ctr_crypt_final(struct blkcipher_desc *desc,
 	unsigned int nbytes = walk->nbytes;
 
 	__cast5_encrypt(ctx, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
+	crypto_xor_cpy(dst, keystream, src, nbytes);
 
 	crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
 }
diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c
index d6fc59aaaadfb..30c0a37f48826 100644
--- a/arch/x86/crypto/des3_ede_glue.c
+++ b/arch/x86/crypto/des3_ede_glue.c
@@ -277,8 +277,7 @@ static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx,
 	unsigned int nbytes = walk->nbytes;
 
 	des3_ede_enc_blk(ctx, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
+	crypto_xor_cpy(dst, keystream, src, nbytes);
 
 	crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE);
 }
diff --git a/crypto/ctr.c b/crypto/ctr.c
index 477d9226ccaac..854d924f9d8e6 100644
--- a/crypto/ctr.c
+++ b/crypto/ctr.c
@@ -65,8 +65,7 @@ static void crypto_ctr_crypt_final(struct blkcipher_walk *walk,
 	unsigned int nbytes = walk->nbytes;
 
 	crypto_cipher_encrypt_one(tfm, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
+	crypto_xor_cpy(dst, keystream, src, nbytes);
 
 	crypto_inc(ctrblk, bsize);
 }
diff --git a/crypto/pcbc.c b/crypto/pcbc.c
index 29dd2b4a3b85b..d9e45a9587201 100644
--- a/crypto/pcbc.c
+++ b/crypto/pcbc.c
@@ -55,8 +55,7 @@ static int crypto_pcbc_encrypt_segment(struct skcipher_request *req,
 	do {
 		crypto_xor(iv, src, bsize);
 		crypto_cipher_encrypt_one(tfm, dst, iv);
-		memcpy(iv, dst, bsize);
-		crypto_xor(iv, src, bsize);
+		crypto_xor_cpy(iv, dst, src, bsize);
 
 		src += bsize;
 		dst += bsize;
@@ -79,8 +78,7 @@ static int crypto_pcbc_encrypt_inplace(struct skcipher_request *req,
 		memcpy(tmpbuf, src, bsize);
 		crypto_xor(iv, src, bsize);
 		crypto_cipher_encrypt_one(tfm, src, iv);
-		memcpy(iv, tmpbuf, bsize);
-		crypto_xor(iv, src, bsize);
+		crypto_xor_cpy(iv, tmpbuf, src, bsize);
 
 		src += bsize;
 	} while ((nbytes -= bsize) >= bsize);
@@ -127,8 +125,7 @@ static int crypto_pcbc_decrypt_segment(struct skcipher_request *req,
 	do {
 		crypto_cipher_decrypt_one(tfm, dst, src);
 		crypto_xor(dst, iv, bsize);
-		memcpy(iv, src, bsize);
-		crypto_xor(iv, dst, bsize);
+		crypto_xor_cpy(iv, dst, src, bsize);
 
 		src += bsize;
 		dst += bsize;
@@ -153,8 +150,7 @@ static int crypto_pcbc_decrypt_inplace(struct skcipher_request *req,
 		memcpy(tmpbuf, src, bsize);
 		crypto_cipher_decrypt_one(tfm, src, src);
 		crypto_xor(src, iv, bsize);
-		memcpy(iv, tmpbuf, bsize);
-		crypto_xor(iv, src, bsize);
+		crypto_xor_cpy(iv, src, tmpbuf, bsize);
 
 		src += bsize;
 	} while ((nbytes -= bsize) >= bsize);
diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c
index 9c26d9e8dbea7..17d84217dd765 100644
--- a/drivers/crypto/vmx/aes_ctr.c
+++ b/drivers/crypto/vmx/aes_ctr.c
@@ -104,8 +104,7 @@ static void p8_aes_ctr_final(struct p8_aes_ctr_ctx *ctx,
 	pagefault_enable();
 	preempt_enable();
 
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
+	crypto_xor_cpy(dst, keystream, src, nbytes);
 	crypto_inc(ctrblk, AES_BLOCK_SIZE);
 }
 
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index cdf6b1e124604..fa17e54527968 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -758,9 +758,8 @@ static int crypt_iv_tcw_whitening(struct crypt_config *cc,
 	int i, r;
 
 	/* xor whitening with sector number */
-	memcpy(buf, tcw->whitening, TCW_WHITENING_SIZE);
-	crypto_xor(buf, (u8 *)&sector, 8);
-	crypto_xor(&buf[8], (u8 *)&sector, 8);
+	crypto_xor_cpy(buf, tcw->whitening, (u8 *)&sector, 8);
+	crypto_xor_cpy(&buf[8], tcw->whitening + 8, (u8 *)&sector, 8);
 
 	/* calculate crc32 for every 32bit part and xor it */
 	desc->tfm = tcw->crc32_tfm;
@@ -805,10 +804,10 @@ static int crypt_iv_tcw_gen(struct crypt_config *cc, u8 *iv,
 	}
 
 	/* Calculate IV */
-	memcpy(iv, tcw->iv_seed, cc->iv_size);
-	crypto_xor(iv, (u8 *)&sector, 8);
+	crypto_xor_cpy(iv, tcw->iv_seed, (u8 *)&sector, 8);
 	if (cc->iv_size > 8)
-		crypto_xor(&iv[8], (u8 *)&sector, cc->iv_size - 8);
+		crypto_xor_cpy(&iv[8], tcw->iv_seed + 8, (u8 *)&sector,
+			       cc->iv_size - 8);
 
 	return r;
 }
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index fd547f946bf8e..e3cebf640c000 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -211,6 +211,25 @@ static inline void crypto_xor(u8 *dst, const u8 *src, unsigned int size)
 	}
 }
 
+static inline void crypto_xor_cpy(u8 *dst, const u8 *src1, const u8 *src2,
+				  unsigned int size)
+{
+	if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
+	    __builtin_constant_p(size) &&
+	    (size % sizeof(unsigned long)) == 0) {
+		unsigned long *d = (unsigned long *)dst;
+		unsigned long *s1 = (unsigned long *)src1;
+		unsigned long *s2 = (unsigned long *)src2;
+
+		while (size > 0) {
+			*d++ = *s1++ ^ *s2++;
+			size -= sizeof(unsigned long);
+		}
+	} else {
+		__crypto_xor(dst, src1, src2, size);
+	}
+}
+
 int blkcipher_walk_done(struct blkcipher_desc *desc,
 			struct blkcipher_walk *walk, int err);
 int blkcipher_walk_virt(struct blkcipher_desc *desc,

From 6d6254d728a2e696aa697b4b44cb7736851f62e3 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 24 Jul 2017 11:28:05 +0100
Subject: [PATCH 075/116] crypto: arm64/ghash-ce - add non-SIMD scalar fallback

The arm64 kernel will shortly disallow nested kernel mode NEON, so
add a fallback to scalar C code that can be invoked in that case.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/Kconfig         |  3 +-
 arch/arm64/crypto/ghash-ce-glue.c | 49 ++++++++++++++++++++++++++-----
 2 files changed, 43 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index d92293747d631..7d75a363e3172 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -28,8 +28,9 @@ config CRYPTO_SHA2_ARM64_CE
 
 config CRYPTO_GHASH_ARM64_CE
 	tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
-	depends on ARM64 && KERNEL_MODE_NEON
+	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
+	select CRYPTO_GF128MUL
 
 config CRYPTO_CRCT10DIF_ARM64_CE
 	tristate "CRCT10DIF digest algorithm using PMULL instructions"
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 833ec1e3f3e9b..30221ef56e70d 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -1,7 +1,7 @@
 /*
  * Accelerated GHASH implementation with ARMv8 PMULL instructions.
  *
- * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published
@@ -9,7 +9,9 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
+#include <crypto/gf128mul.h>
 #include <crypto/internal/hash.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
@@ -25,6 +27,7 @@ MODULE_LICENSE("GPL v2");
 struct ghash_key {
 	u64 a;
 	u64 b;
+	be128 k;
 };
 
 struct ghash_desc_ctx {
@@ -44,6 +47,36 @@ static int ghash_init(struct shash_desc *desc)
 	return 0;
 }
 
+static void ghash_do_update(int blocks, u64 dg[], const char *src,
+			    struct ghash_key *key, const char *head)
+{
+	if (likely(may_use_simd())) {
+		kernel_neon_begin();
+		pmull_ghash_update(blocks, dg, src, key, head);
+		kernel_neon_end();
+	} else {
+		be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
+
+		do {
+			const u8 *in = src;
+
+			if (head) {
+				in = head;
+				blocks++;
+				head = NULL;
+			} else {
+				src += GHASH_BLOCK_SIZE;
+			}
+
+			crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE);
+			gf128mul_lle(&dst, &key->k);
+		} while (--blocks);
+
+		dg[0] = be64_to_cpu(dst.b);
+		dg[1] = be64_to_cpu(dst.a);
+	}
+}
+
 static int ghash_update(struct shash_desc *desc, const u8 *src,
 			unsigned int len)
 {
@@ -67,10 +100,9 @@ static int ghash_update(struct shash_desc *desc, const u8 *src,
 		blocks = len / GHASH_BLOCK_SIZE;
 		len %= GHASH_BLOCK_SIZE;
 
-		kernel_neon_begin_partial(8);
-		pmull_ghash_update(blocks, ctx->digest, src, key,
-				   partial ? ctx->buf : NULL);
-		kernel_neon_end();
+		ghash_do_update(blocks, ctx->digest, src, key,
+				partial ? ctx->buf : NULL);
+
 		src += blocks * GHASH_BLOCK_SIZE;
 		partial = 0;
 	}
@@ -89,9 +121,7 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
 
 		memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
 
-		kernel_neon_begin_partial(8);
-		pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
-		kernel_neon_end();
+		ghash_do_update(1, ctx->digest, ctx->buf, key, NULL);
 	}
 	put_unaligned_be64(ctx->digest[1], dst);
 	put_unaligned_be64(ctx->digest[0], dst + 8);
@@ -111,6 +141,9 @@ static int ghash_setkey(struct crypto_shash *tfm,
 		return -EINVAL;
 	}
 
+	/* needed for the fallback */
+	memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
+
 	/* perform multiplication by 'x' in GF(2^128) */
 	b = get_unaligned_be64(inkey);
 	a = get_unaligned_be64(inkey + 8);

From 2dde374e1ff0663ca46b343949c242959028f976 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 24 Jul 2017 11:28:06 +0100
Subject: [PATCH 076/116] crypto: arm64/crct10dif - add non-SIMD generic
 fallback

The arm64 kernel will shortly disallow nested kernel mode NEON, so
add a fallback to scalar C code that can be invoked in that case.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/crct10dif-ce-glue.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/crypto/crct10dif-ce-glue.c b/arch/arm64/crypto/crct10dif-ce-glue.c
index 60cb590c2590b..96f0cae4a0225 100644
--- a/arch/arm64/crypto/crct10dif-ce-glue.c
+++ b/arch/arm64/crypto/crct10dif-ce-glue.c
@@ -1,7 +1,7 @@
 /*
  * Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions
  *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -18,6 +18,7 @@
 #include <crypto/internal/hash.h>
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 
 #define CRC_T10DIF_PMULL_CHUNK_SIZE	16U
 
@@ -48,9 +49,13 @@ static int crct10dif_update(struct shash_desc *desc, const u8 *data,
 	}
 
 	if (length > 0) {
-		kernel_neon_begin_partial(14);
-		*crc = crc_t10dif_pmull(*crc, data, length);
-		kernel_neon_end();
+		if (may_use_simd()) {
+			kernel_neon_begin();
+			*crc = crc_t10dif_pmull(*crc, data, length);
+			kernel_neon_end();
+		} else {
+			*crc = crc_t10dif_generic(*crc, data, length);
+		}
 	}
 
 	return 0;

From 15c7d8f8a2c7c86cb36f4d0273a1b2b2c9a479d7 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 24 Jul 2017 11:28:07 +0100
Subject: [PATCH 077/116] crypto: arm64/crc32 - add non-SIMD scalar fallback

The arm64 kernel will shortly disallow nested kernel mode NEON, so
add a fallback to scalar C code that can be invoked in that case.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/crc32-ce-glue.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/crypto/crc32-ce-glue.c b/arch/arm64/crypto/crc32-ce-glue.c
index eccb1ae900641..624f4137918ce 100644
--- a/arch/arm64/crypto/crc32-ce-glue.c
+++ b/arch/arm64/crypto/crc32-ce-glue.c
@@ -1,7 +1,7 @@
 /*
  * Accelerated CRC32(C) using arm64 NEON and Crypto Extensions instructions
  *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -19,6 +19,7 @@
 
 #include <asm/hwcap.h>
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
 
 #define PMULL_MIN_LEN		64L	/* minimum size of buffer
@@ -105,10 +106,10 @@ static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
 		length -= l;
 	}
 
-	if (length >= PMULL_MIN_LEN) {
+	if (length >= PMULL_MIN_LEN && may_use_simd()) {
 		l = round_down(length, SCALE_F);
 
-		kernel_neon_begin_partial(10);
+		kernel_neon_begin();
 		*crc = crc32_pmull_le(data, l, *crc);
 		kernel_neon_end();
 
@@ -137,10 +138,10 @@ static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data,
 		length -= l;
 	}
 
-	if (length >= PMULL_MIN_LEN) {
+	if (length >= PMULL_MIN_LEN && may_use_simd()) {
 		l = round_down(length, SCALE_F);
 
-		kernel_neon_begin_partial(10);
+		kernel_neon_begin();
 		*crc = crc32c_pmull_le(data, l, *crc);
 		kernel_neon_end();
 

From 0771f3234db67732a49777a5fceaed6f7b4b488d Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 24 Jul 2017 11:28:08 +0100
Subject: [PATCH 078/116] crypto: arm64/sha1-ce - add non-SIMD generic fallback

The arm64 kernel will shortly disallow nested kernel mode NEON, so
add a fallback to scalar C code that can be invoked in that case.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/Kconfig        |  3 ++-
 arch/arm64/crypto/sha1-ce-glue.c | 18 ++++++++++++++----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 7d75a363e3172..5d5953545dad5 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -18,8 +18,9 @@ config CRYPTO_SHA512_ARM64
 
 config CRYPTO_SHA1_ARM64_CE
 	tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
-	depends on ARM64 && KERNEL_MODE_NEON
+	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
+	select CRYPTO_SHA1
 
 config CRYPTO_SHA2_ARM64_CE
 	tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)"
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index ea319c055f5df..efbeb3e0dcfb0 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -1,7 +1,7 @@
 /*
  * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
  *
- * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,7 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
@@ -37,8 +38,11 @@ static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
 {
 	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
 
+	if (!may_use_simd())
+		return crypto_sha1_update(desc, data, len);
+
 	sctx->finalize = 0;
-	kernel_neon_begin_partial(16);
+	kernel_neon_begin();
 	sha1_base_do_update(desc, data, len,
 			    (sha1_block_fn *)sha1_ce_transform);
 	kernel_neon_end();
@@ -52,13 +56,16 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
 	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
 	bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
 
+	if (!may_use_simd())
+		return crypto_sha1_finup(desc, data, len, out);
+
 	/*
 	 * Allow the asm code to perform the finalization if there is no
 	 * partial data and the input is a round multiple of the block size.
 	 */
 	sctx->finalize = finalize;
 
-	kernel_neon_begin_partial(16);
+	kernel_neon_begin();
 	sha1_base_do_update(desc, data, len,
 			    (sha1_block_fn *)sha1_ce_transform);
 	if (!finalize)
@@ -71,8 +78,11 @@ static int sha1_ce_final(struct shash_desc *desc, u8 *out)
 {
 	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
 
+	if (!may_use_simd())
+		return crypto_sha1_finup(desc, NULL, 0, out);
+
 	sctx->finalize = 0;
-	kernel_neon_begin_partial(16);
+	kernel_neon_begin();
 	sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
 	kernel_neon_end();
 	return sha1_base_finish(desc, out);

From da1793312f7693787e0ed22aa121261c3e0e15c0 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 24 Jul 2017 11:28:09 +0100
Subject: [PATCH 079/116] crypto: arm64/sha2-ce - add non-SIMD scalar fallback

The arm64 kernel will shortly disallow nested kernel mode NEON, so
add a fallback to scalar code that can be invoked in that case.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/Kconfig        |  3 ++-
 arch/arm64/crypto/sha2-ce-glue.c | 30 ++++++++++++++++++++++++++----
 arch/arm64/crypto/sha256-glue.c  |  1 +
 3 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 5d5953545dad5..8cd145f9c1ff3 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -24,8 +24,9 @@ config CRYPTO_SHA1_ARM64_CE
 
 config CRYPTO_SHA2_ARM64_CE
 	tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)"
-	depends on ARM64 && KERNEL_MODE_NEON
+	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
+	select CRYPTO_SHA256_ARM64
 
 config CRYPTO_GHASH_ARM64_CE
 	tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index 0ed9486f75dd9..fd1ff2b13dfa3 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -1,7 +1,7 @@
 /*
  * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions
  *
- * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,7 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
@@ -34,13 +35,19 @@ const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state,
 const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state,
 						 finalize);
 
+asmlinkage void sha256_block_data_order(u32 *digest, u8 const *src, int blocks);
+
 static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
 			    unsigned int len)
 {
 	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
 
+	if (!may_use_simd())
+		return sha256_base_do_update(desc, data, len,
+				(sha256_block_fn *)sha256_block_data_order);
+
 	sctx->finalize = 0;
-	kernel_neon_begin_partial(28);
+	kernel_neon_begin();
 	sha256_base_do_update(desc, data, len,
 			      (sha256_block_fn *)sha2_ce_transform);
 	kernel_neon_end();
@@ -54,13 +61,22 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
 	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
 	bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE);
 
+	if (!may_use_simd()) {
+		if (len)
+			sha256_base_do_update(desc, data, len,
+				(sha256_block_fn *)sha256_block_data_order);
+		sha256_base_do_finalize(desc,
+				(sha256_block_fn *)sha256_block_data_order);
+		return sha256_base_finish(desc, out);
+	}
+
 	/*
 	 * Allow the asm code to perform the finalization if there is no
 	 * partial data and the input is a round multiple of the block size.
 	 */
 	sctx->finalize = finalize;
 
-	kernel_neon_begin_partial(28);
+	kernel_neon_begin();
 	sha256_base_do_update(desc, data, len,
 			      (sha256_block_fn *)sha2_ce_transform);
 	if (!finalize)
@@ -74,8 +90,14 @@ static int sha256_ce_final(struct shash_desc *desc, u8 *out)
 {
 	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
 
+	if (!may_use_simd()) {
+		sha256_base_do_finalize(desc,
+				(sha256_block_fn *)sha256_block_data_order);
+		return sha256_base_finish(desc, out);
+	}
+
 	sctx->finalize = 0;
-	kernel_neon_begin_partial(28);
+	kernel_neon_begin();
 	sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
 	kernel_neon_end();
 	return sha256_base_finish(desc, out);
diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c
index a2226f8419609..b064d925fe2a7 100644
--- a/arch/arm64/crypto/sha256-glue.c
+++ b/arch/arm64/crypto/sha256-glue.c
@@ -29,6 +29,7 @@ MODULE_ALIAS_CRYPTO("sha256");
 
 asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
 					unsigned int num_blks);
+EXPORT_SYMBOL(sha256_block_data_order);
 
 asmlinkage void sha256_block_neon(u32 *digest, const void *data,
 				  unsigned int num_blks);

From f402e3115e20b345bd6fbfcf463a506d958c7bf6 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 24 Jul 2017 11:28:10 +0100
Subject: [PATCH 080/116] crypto: arm64/aes-ce-cipher - match round key
 endianness with generic code

In order to be able to reuse the generic AES code as a fallback for
situations where the NEON may not be used, update the key handling
to match the byte order of the generic code: it stores round keys
as sequences of 32-bit quantities rather than streams of bytes, and
so our code needs to be updated to reflect that.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/aes-ce-ccm-core.S | 30 ++++++++++++-------------
 arch/arm64/crypto/aes-ce-cipher.c   | 35 +++++++++++++----------------
 arch/arm64/crypto/aes-ce.S          | 12 +++++-----
 3 files changed, 37 insertions(+), 40 deletions(-)

diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index 3363560c79b7e..e3a375c4cb83c 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -1,7 +1,7 @@
 /*
  * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
  *
- * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -32,7 +32,7 @@ ENTRY(ce_aes_ccm_auth_data)
 	beq	8f				/* out of input? */
 	cbnz	w8, 0b
 	eor	v0.16b, v0.16b, v1.16b
-1:	ld1	{v3.16b}, [x4]			/* load first round key */
+1:	ld1	{v3.4s}, [x4]			/* load first round key */
 	prfm	pldl1strm, [x1]
 	cmp	w5, #12				/* which key size? */
 	add	x6, x4, #16
@@ -42,17 +42,17 @@ ENTRY(ce_aes_ccm_auth_data)
 	mov	v5.16b, v3.16b
 	b	4f
 2:	mov	v4.16b, v3.16b
-	ld1	{v5.16b}, [x6], #16		/* load 2nd round key */
+	ld1	{v5.4s}, [x6], #16		/* load 2nd round key */
 3:	aese	v0.16b, v4.16b
 	aesmc	v0.16b, v0.16b
-4:	ld1	{v3.16b}, [x6], #16		/* load next round key */
+4:	ld1	{v3.4s}, [x6], #16		/* load next round key */
 	aese	v0.16b, v5.16b
 	aesmc	v0.16b, v0.16b
-5:	ld1	{v4.16b}, [x6], #16		/* load next round key */
+5:	ld1	{v4.4s}, [x6], #16		/* load next round key */
 	subs	w7, w7, #3
 	aese	v0.16b, v3.16b
 	aesmc	v0.16b, v0.16b
-	ld1	{v5.16b}, [x6], #16		/* load next round key */
+	ld1	{v5.4s}, [x6], #16		/* load next round key */
 	bpl	3b
 	aese	v0.16b, v4.16b
 	subs	w2, w2, #16			/* last data? */
@@ -90,7 +90,7 @@ ENDPROC(ce_aes_ccm_auth_data)
 	 * 			 u32 rounds);
 	 */
 ENTRY(ce_aes_ccm_final)
-	ld1	{v3.16b}, [x2], #16		/* load first round key */
+	ld1	{v3.4s}, [x2], #16		/* load first round key */
 	ld1	{v0.16b}, [x0]			/* load mac */
 	cmp	w3, #12				/* which key size? */
 	sub	w3, w3, #2			/* modified # of rounds */
@@ -100,17 +100,17 @@ ENTRY(ce_aes_ccm_final)
 	mov	v5.16b, v3.16b
 	b	2f
 0:	mov	v4.16b, v3.16b
-1:	ld1	{v5.16b}, [x2], #16		/* load next round key */
+1:	ld1	{v5.4s}, [x2], #16		/* load next round key */
 	aese	v0.16b, v4.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v4.16b
 	aesmc	v1.16b, v1.16b
-2:	ld1	{v3.16b}, [x2], #16		/* load next round key */
+2:	ld1	{v3.4s}, [x2], #16		/* load next round key */
 	aese	v0.16b, v5.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v5.16b
 	aesmc	v1.16b, v1.16b
-3:	ld1	{v4.16b}, [x2], #16		/* load next round key */
+3:	ld1	{v4.4s}, [x2], #16		/* load next round key */
 	subs	w3, w3, #3
 	aese	v0.16b, v3.16b
 	aesmc	v0.16b, v0.16b
@@ -137,31 +137,31 @@ CPU_LE(	rev	x8, x8			)	/* keep swabbed ctr in reg */
 	cmp	w4, #12				/* which key size? */
 	sub	w7, w4, #2			/* get modified # of rounds */
 	ins	v1.d[1], x9			/* no carry in lower ctr */
-	ld1	{v3.16b}, [x3]			/* load first round key */
+	ld1	{v3.4s}, [x3]			/* load first round key */
 	add	x10, x3, #16
 	bmi	1f
 	bne	4f
 	mov	v5.16b, v3.16b
 	b	3f
 1:	mov	v4.16b, v3.16b
-	ld1	{v5.16b}, [x10], #16		/* load 2nd round key */
+	ld1	{v5.4s}, [x10], #16		/* load 2nd round key */
 2:	/* inner loop: 3 rounds, 2x interleaved */
 	aese	v0.16b, v4.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v4.16b
 	aesmc	v1.16b, v1.16b
-3:	ld1	{v3.16b}, [x10], #16		/* load next round key */
+3:	ld1	{v3.4s}, [x10], #16		/* load next round key */
 	aese	v0.16b, v5.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v5.16b
 	aesmc	v1.16b, v1.16b
-4:	ld1	{v4.16b}, [x10], #16		/* load next round key */
+4:	ld1	{v4.4s}, [x10], #16		/* load next round key */
 	subs	w7, w7, #3
 	aese	v0.16b, v3.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v3.16b
 	aesmc	v1.16b, v1.16b
-	ld1	{v5.16b}, [x10], #16		/* load next round key */
+	ld1	{v5.4s}, [x10], #16		/* load next round key */
 	bpl	2b
 	aese	v0.16b, v4.16b
 	aese	v1.16b, v4.16b
diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c
index 50d9fe11d0c86..a0a0e5e3a8b57 100644
--- a/arch/arm64/crypto/aes-ce-cipher.c
+++ b/arch/arm64/crypto/aes-ce-cipher.c
@@ -1,7 +1,7 @@
 /*
  * aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions
  *
- * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,7 @@
  */
 
 #include <asm/neon.h>
+#include <asm/unaligned.h>
 #include <crypto/aes.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
@@ -47,24 +48,24 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 	kernel_neon_begin_partial(4);
 
 	__asm__("	ld1	{v0.16b}, %[in]			;"
-		"	ld1	{v1.16b}, [%[key]], #16		;"
+		"	ld1	{v1.4s}, [%[key]], #16		;"
 		"	cmp	%w[rounds], #10			;"
 		"	bmi	0f				;"
 		"	bne	3f				;"
 		"	mov	v3.16b, v1.16b			;"
 		"	b	2f				;"
 		"0:	mov	v2.16b, v1.16b			;"
-		"	ld1	{v3.16b}, [%[key]], #16		;"
+		"	ld1	{v3.4s}, [%[key]], #16		;"
 		"1:	aese	v0.16b, v2.16b			;"
 		"	aesmc	v0.16b, v0.16b			;"
-		"2:	ld1	{v1.16b}, [%[key]], #16		;"
+		"2:	ld1	{v1.4s}, [%[key]], #16		;"
 		"	aese	v0.16b, v3.16b			;"
 		"	aesmc	v0.16b, v0.16b			;"
-		"3:	ld1	{v2.16b}, [%[key]], #16		;"
+		"3:	ld1	{v2.4s}, [%[key]], #16		;"
 		"	subs	%w[rounds], %w[rounds], #3	;"
 		"	aese	v0.16b, v1.16b			;"
 		"	aesmc	v0.16b, v0.16b			;"
-		"	ld1	{v3.16b}, [%[key]], #16		;"
+		"	ld1	{v3.4s}, [%[key]], #16		;"
 		"	bpl	1b				;"
 		"	aese	v0.16b, v2.16b			;"
 		"	eor	v0.16b, v0.16b, v3.16b		;"
@@ -92,24 +93,24 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 	kernel_neon_begin_partial(4);
 
 	__asm__("	ld1	{v0.16b}, %[in]			;"
-		"	ld1	{v1.16b}, [%[key]], #16		;"
+		"	ld1	{v1.4s}, [%[key]], #16		;"
 		"	cmp	%w[rounds], #10			;"
 		"	bmi	0f				;"
 		"	bne	3f				;"
 		"	mov	v3.16b, v1.16b			;"
 		"	b	2f				;"
 		"0:	mov	v2.16b, v1.16b			;"
-		"	ld1	{v3.16b}, [%[key]], #16		;"
+		"	ld1	{v3.4s}, [%[key]], #16		;"
 		"1:	aesd	v0.16b, v2.16b			;"
 		"	aesimc	v0.16b, v0.16b			;"
-		"2:	ld1	{v1.16b}, [%[key]], #16		;"
+		"2:	ld1	{v1.4s}, [%[key]], #16		;"
 		"	aesd	v0.16b, v3.16b			;"
 		"	aesimc	v0.16b, v0.16b			;"
-		"3:	ld1	{v2.16b}, [%[key]], #16		;"
+		"3:	ld1	{v2.4s}, [%[key]], #16		;"
 		"	subs	%w[rounds], %w[rounds], #3	;"
 		"	aesd	v0.16b, v1.16b			;"
 		"	aesimc	v0.16b, v0.16b			;"
-		"	ld1	{v3.16b}, [%[key]], #16		;"
+		"	ld1	{v3.4s}, [%[key]], #16		;"
 		"	bpl	1b				;"
 		"	aesd	v0.16b, v2.16b			;"
 		"	eor	v0.16b, v0.16b, v3.16b		;"
@@ -165,20 +166,16 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 	    key_len != AES_KEYSIZE_256)
 		return -EINVAL;
 
-	memcpy(ctx->key_enc, in_key, key_len);
 	ctx->key_length = key_len;
+	for (i = 0; i < kwords; i++)
+		ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
 
 	kernel_neon_begin_partial(2);
 	for (i = 0; i < sizeof(rcon); i++) {
 		u32 *rki = ctx->key_enc + (i * kwords);
 		u32 *rko = rki + kwords;
 
-#ifndef CONFIG_CPU_BIG_ENDIAN
 		rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
-#else
-		rko[0] = rol32(aes_sub(rki[kwords - 1]), 8) ^ (rcon[i] << 24) ^
-			 rki[0];
-#endif
 		rko[1] = rko[0] ^ rki[1];
 		rko[2] = rko[1] ^ rki[2];
 		rko[3] = rko[2] ^ rki[3];
@@ -210,9 +207,9 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 
 	key_dec[0] = key_enc[j];
 	for (i = 1, j--; j > 0; i++, j--)
-		__asm__("ld1	{v0.16b}, %[in]		;"
+		__asm__("ld1	{v0.4s}, %[in]		;"
 			"aesimc	v1.16b, v0.16b		;"
-			"st1	{v1.16b}, %[out]	;"
+			"st1	{v1.4s}, %[out]	;"
 
 		:	[out]	"=Q"(key_dec[i])
 		:	[in]	"Q"(key_enc[j])
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
index b46093d567e54..50330f5c3adc4 100644
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -2,7 +2,7 @@
  * linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with
  *                                    Crypto Extensions
  *
- * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -22,11 +22,11 @@
 	cmp		\rounds, #12
 	blo		2222f		/* 128 bits */
 	beq		1111f		/* 192 bits */
-	ld1		{v17.16b-v18.16b}, [\rk], #32
-1111:	ld1		{v19.16b-v20.16b}, [\rk], #32
-2222:	ld1		{v21.16b-v24.16b}, [\rk], #64
-	ld1		{v25.16b-v28.16b}, [\rk], #64
-	ld1		{v29.16b-v31.16b}, [\rk]
+	ld1		{v17.4s-v18.4s}, [\rk], #32
+1111:	ld1		{v19.4s-v20.4s}, [\rk], #32
+2222:	ld1		{v21.4s-v24.4s}, [\rk], #64
+	ld1		{v25.4s-v28.4s}, [\rk], #64
+	ld1		{v29.4s-v31.4s}, [\rk]
 	.endm
 
 	/* prepare for encryption with key in rk[] */

From b8fb993a836cd432309410eadf083bbe9c0e9e9c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 24 Jul 2017 11:28:11 +0100
Subject: [PATCH 081/116] crypto: arm64/aes-ce-cipher: add non-SIMD generic
 fallback

The arm64 kernel will shortly disallow nested kernel mode NEON, so
add a fallback to scalar code that can be invoked in that case.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/Kconfig         |  1 +
 arch/arm64/crypto/aes-ce-cipher.c | 20 +++++++++++++++++---
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 8cd145f9c1ff3..2fd4bb6d0b5a8 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -52,6 +52,7 @@ config CRYPTO_AES_ARM64_CE
 	tristate "AES core cipher using ARMv8 Crypto Extensions"
 	depends on ARM64 && KERNEL_MODE_NEON
 	select CRYPTO_ALGAPI
+	select CRYPTO_AES_ARM64
 
 config CRYPTO_AES_ARM64_CE_CCM
 	tristate "AES in CCM mode using ARMv8 Crypto Extensions"
diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c
index a0a0e5e3a8b57..6a75cd75ed11c 100644
--- a/arch/arm64/crypto/aes-ce-cipher.c
+++ b/arch/arm64/crypto/aes-ce-cipher.c
@@ -9,6 +9,7 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
 #include <crypto/aes.h>
 #include <linux/cpufeature.h>
@@ -21,6 +22,9 @@ MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
 struct aes_block {
 	u8 b[AES_BLOCK_SIZE];
 };
@@ -45,7 +49,12 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 	void *dummy0;
 	int dummy1;
 
-	kernel_neon_begin_partial(4);
+	if (!may_use_simd()) {
+		__aes_arm64_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
+		return;
+	}
+
+	kernel_neon_begin();
 
 	__asm__("	ld1	{v0.16b}, %[in]			;"
 		"	ld1	{v1.4s}, [%[key]], #16		;"
@@ -90,7 +99,12 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 	void *dummy0;
 	int dummy1;
 
-	kernel_neon_begin_partial(4);
+	if (!may_use_simd()) {
+		__aes_arm64_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
+		return;
+	}
+
+	kernel_neon_begin();
 
 	__asm__("	ld1	{v0.16b}, %[in]			;"
 		"	ld1	{v1.4s}, [%[key]], #16		;"
@@ -170,7 +184,7 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 	for (i = 0; i < kwords; i++)
 		ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
 
-	kernel_neon_begin_partial(2);
+	kernel_neon_begin();
 	for (i = 0; i < sizeof(rcon); i++) {
 		u32 *rki = ctx->key_enc + (i * kwords);
 		u32 *rko = rki + kwords;

From 5092fcf3490811a735ef44bd22d8b5ff1bd63926 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 24 Jul 2017 11:28:12 +0100
Subject: [PATCH 082/116] crypto: arm64/aes-ce-ccm: add non-SIMD generic
 fallback

The arm64 kernel will shortly disallow nested kernel mode NEON.

So honour this in the ARMv8 Crypto Extensions implementation of
CCM-AES, and fall back to a scalar implementation using the generic
crypto helpers for AES, XOR and incrementing the CTR counter.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/Kconfig           |   1 +
 arch/arm64/crypto/aes-ce-ccm-glue.c | 174 ++++++++++++++++++++++------
 2 files changed, 140 insertions(+), 35 deletions(-)

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 2fd4bb6d0b5a8..ba637765c19a4 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -59,6 +59,7 @@ config CRYPTO_AES_ARM64_CE_CCM
 	depends on ARM64 && KERNEL_MODE_NEON
 	select CRYPTO_ALGAPI
 	select CRYPTO_AES_ARM64_CE
+	select CRYPTO_AES_ARM64
 	select CRYPTO_AEAD
 
 config CRYPTO_AES_ARM64_CE_BLK
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index 6a7dbc7c83a61..a1254036f2b1e 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -1,7 +1,7 @@
 /*
  * aes-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions
  *
- * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,7 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
 #include <crypto/aes.h>
 #include <crypto/scatterwalk.h>
@@ -44,6 +45,8 @@ asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
 asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[],
 				 u32 rounds);
 
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
 static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
 		      unsigned int key_len)
 {
@@ -103,7 +106,45 @@ static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
 	return 0;
 }
 
-static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
+static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[],
+			   u32 abytes, u32 *macp, bool use_neon)
+{
+	if (likely(use_neon)) {
+		ce_aes_ccm_auth_data(mac, in, abytes, macp, key->key_enc,
+				     num_rounds(key));
+	} else {
+		if (*macp > 0 && *macp < AES_BLOCK_SIZE) {
+			int added = min(abytes, AES_BLOCK_SIZE - *macp);
+
+			crypto_xor(&mac[*macp], in, added);
+
+			*macp += added;
+			in += added;
+			abytes -= added;
+		}
+
+		while (abytes > AES_BLOCK_SIZE) {
+			__aes_arm64_encrypt(key->key_enc, mac, mac,
+					    num_rounds(key));
+			crypto_xor(mac, in, AES_BLOCK_SIZE);
+
+			in += AES_BLOCK_SIZE;
+			abytes -= AES_BLOCK_SIZE;
+		}
+
+		if (abytes > 0) {
+			__aes_arm64_encrypt(key->key_enc, mac, mac,
+					    num_rounds(key));
+			crypto_xor(mac, in, abytes);
+			*macp = abytes;
+		} else {
+			*macp = 0;
+		}
+	}
+}
+
+static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[],
+				   bool use_neon)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
@@ -122,8 +163,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
 		ltag.len = 6;
 	}
 
-	ce_aes_ccm_auth_data(mac, (u8 *)&ltag, ltag.len, &macp, ctx->key_enc,
-			     num_rounds(ctx));
+	ccm_update_mac(ctx, mac, (u8 *)&ltag, ltag.len, &macp, use_neon);
 	scatterwalk_start(&walk, req->src);
 
 	do {
@@ -135,8 +175,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
 			n = scatterwalk_clamp(&walk, len);
 		}
 		p = scatterwalk_map(&walk);
-		ce_aes_ccm_auth_data(mac, p, n, &macp, ctx->key_enc,
-				     num_rounds(ctx));
+		ccm_update_mac(ctx, mac, p, n, &macp, use_neon);
 		len -= n;
 
 		scatterwalk_unmap(p);
@@ -145,6 +184,56 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
 	} while (len);
 }
 
+static int ccm_crypt_fallback(struct skcipher_walk *walk, u8 mac[], u8 iv0[],
+			      struct crypto_aes_ctx *ctx, bool enc)
+{
+	u8 buf[AES_BLOCK_SIZE];
+	int err = 0;
+
+	while (walk->nbytes) {
+		int blocks = walk->nbytes / AES_BLOCK_SIZE;
+		u32 tail = walk->nbytes % AES_BLOCK_SIZE;
+		u8 *dst = walk->dst.virt.addr;
+		u8 *src = walk->src.virt.addr;
+		u32 nbytes = walk->nbytes;
+
+		if (nbytes == walk->total && tail > 0) {
+			blocks++;
+			tail = 0;
+		}
+
+		do {
+			u32 bsize = AES_BLOCK_SIZE;
+
+			if (nbytes < AES_BLOCK_SIZE)
+				bsize = nbytes;
+
+			crypto_inc(walk->iv, AES_BLOCK_SIZE);
+			__aes_arm64_encrypt(ctx->key_enc, buf, walk->iv,
+					    num_rounds(ctx));
+			__aes_arm64_encrypt(ctx->key_enc, mac, mac,
+					    num_rounds(ctx));
+			if (enc)
+				crypto_xor(mac, src, bsize);
+			crypto_xor_cpy(dst, src, buf, bsize);
+			if (!enc)
+				crypto_xor(mac, dst, bsize);
+			dst += bsize;
+			src += bsize;
+			nbytes -= bsize;
+		} while (--blocks);
+
+		err = skcipher_walk_done(walk, tail);
+	}
+
+	if (!err) {
+		__aes_arm64_encrypt(ctx->key_enc, buf, iv0, num_rounds(ctx));
+		__aes_arm64_encrypt(ctx->key_enc, mac, mac, num_rounds(ctx));
+		crypto_xor(mac, buf, AES_BLOCK_SIZE);
+	}
+	return err;
+}
+
 static int ccm_encrypt(struct aead_request *req)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
@@ -153,39 +242,46 @@ static int ccm_encrypt(struct aead_request *req)
 	u8 __aligned(8) mac[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	u32 len = req->cryptlen;
+	bool use_neon = may_use_simd();
 	int err;
 
 	err = ccm_init_mac(req, mac, len);
 	if (err)
 		return err;
 
-	kernel_neon_begin_partial(6);
+	if (likely(use_neon))
+		kernel_neon_begin();
 
 	if (req->assoclen)
-		ccm_calculate_auth_mac(req, mac);
+		ccm_calculate_auth_mac(req, mac, use_neon);
 
 	/* preserve the original iv for the final round */
 	memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
 	err = skcipher_walk_aead_encrypt(&walk, req, true);
 
-	while (walk.nbytes) {
-		u32 tail = walk.nbytes % AES_BLOCK_SIZE;
-
-		if (walk.nbytes == walk.total)
-			tail = 0;
+	if (likely(use_neon)) {
+		while (walk.nbytes) {
+			u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 
-		ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				   walk.nbytes - tail, ctx->key_enc,
-				   num_rounds(ctx), mac, walk.iv);
+			if (walk.nbytes == walk.total)
+				tail = 0;
 
-		err = skcipher_walk_done(&walk, tail);
-	}
-	if (!err)
-		ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+			ce_aes_ccm_encrypt(walk.dst.virt.addr,
+					   walk.src.virt.addr,
+					   walk.nbytes - tail, ctx->key_enc,
+					   num_rounds(ctx), mac, walk.iv);
 
-	kernel_neon_end();
+			err = skcipher_walk_done(&walk, tail);
+		}
+		if (!err)
+			ce_aes_ccm_final(mac, buf, ctx->key_enc,
+					 num_rounds(ctx));
 
+		kernel_neon_end();
+	} else {
+		err = ccm_crypt_fallback(&walk, mac, buf, ctx, true);
+	}
 	if (err)
 		return err;
 
@@ -205,38 +301,46 @@ static int ccm_decrypt(struct aead_request *req)
 	u8 __aligned(8) mac[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	u32 len = req->cryptlen - authsize;
+	bool use_neon = may_use_simd();
 	int err;
 
 	err = ccm_init_mac(req, mac, len);
 	if (err)
 		return err;
 
-	kernel_neon_begin_partial(6);
+	if (likely(use_neon))
+		kernel_neon_begin();
 
 	if (req->assoclen)
-		ccm_calculate_auth_mac(req, mac);
+		ccm_calculate_auth_mac(req, mac, use_neon);
 
 	/* preserve the original iv for the final round */
 	memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
 	err = skcipher_walk_aead_decrypt(&walk, req, true);
 
-	while (walk.nbytes) {
-		u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+	if (likely(use_neon)) {
+		while (walk.nbytes) {
+			u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 
-		if (walk.nbytes == walk.total)
-			tail = 0;
+			if (walk.nbytes == walk.total)
+				tail = 0;
 
-		ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				   walk.nbytes - tail, ctx->key_enc,
-				   num_rounds(ctx), mac, walk.iv);
+			ce_aes_ccm_decrypt(walk.dst.virt.addr,
+					   walk.src.virt.addr,
+					   walk.nbytes - tail, ctx->key_enc,
+					   num_rounds(ctx), mac, walk.iv);
 
-		err = skcipher_walk_done(&walk, tail);
-	}
-	if (!err)
-		ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+			err = skcipher_walk_done(&walk, tail);
+		}
+		if (!err)
+			ce_aes_ccm_final(mac, buf, ctx->key_enc,
+					 num_rounds(ctx));
 
-	kernel_neon_end();
+		kernel_neon_end();
+	} else {
+		err = ccm_crypt_fallback(&walk, mac, buf, ctx, false);
+	}
 
 	if (err)
 		return err;

From e211506979e205e5a00b0a9d321fb3cbb44ee9ea Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 24 Jul 2017 11:28:13 +0100
Subject: [PATCH 083/116] crypto: arm64/aes-blk - add a non-SIMD fallback for
 synchronous CTR

To accommodate systems that may disallow use of the NEON in kernel mode
in some circumstances, introduce a C fallback for synchronous AES in CTR
mode, and use it if may_use_simd() returns false.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/Kconfig            |  6 ++-
 arch/arm64/crypto/aes-ctr-fallback.h | 53 +++++++++++++++++++++++++
 arch/arm64/crypto/aes-glue.c         | 59 +++++++++++++++++++++-------
 3 files changed, 101 insertions(+), 17 deletions(-)
 create mode 100644 arch/arm64/crypto/aes-ctr-fallback.h

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index ba637765c19a4..a068dcbe25187 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -64,15 +64,17 @@ config CRYPTO_AES_ARM64_CE_CCM
 
 config CRYPTO_AES_ARM64_CE_BLK
 	tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
-	depends on ARM64 && KERNEL_MODE_NEON
+	depends on KERNEL_MODE_NEON
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_AES_ARM64_CE
+	select CRYPTO_AES_ARM64
 	select CRYPTO_SIMD
 
 config CRYPTO_AES_ARM64_NEON_BLK
 	tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
-	depends on ARM64 && KERNEL_MODE_NEON
+	depends on KERNEL_MODE_NEON
 	select CRYPTO_BLKCIPHER
+	select CRYPTO_AES_ARM64
 	select CRYPTO_AES
 	select CRYPTO_SIMD
 
diff --git a/arch/arm64/crypto/aes-ctr-fallback.h b/arch/arm64/crypto/aes-ctr-fallback.h
new file mode 100644
index 0000000000000..c9285717b6b5e
--- /dev/null
+++ b/arch/arm64/crypto/aes-ctr-fallback.h
@@ -0,0 +1,53 @@
+/*
+ * Fallback for sync aes(ctr) in contexts where kernel mode NEON
+ * is not allowed
+ *
+ * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/aes.h>
+#include <crypto/internal/skcipher.h>
+
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
+static inline int aes_ctr_encrypt_fallback(struct crypto_aes_ctx *ctx,
+					   struct skcipher_request *req)
+{
+	struct skcipher_walk walk;
+	u8 buf[AES_BLOCK_SIZE];
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, true);
+
+	while (walk.nbytes > 0) {
+		u8 *dst = walk.dst.virt.addr;
+		u8 *src = walk.src.virt.addr;
+		int nbytes = walk.nbytes;
+		int tail = 0;
+
+		if (nbytes < walk.total) {
+			nbytes = round_down(nbytes, AES_BLOCK_SIZE);
+			tail = walk.nbytes % AES_BLOCK_SIZE;
+		}
+
+		do {
+			int bsize = min(nbytes, AES_BLOCK_SIZE);
+
+			__aes_arm64_encrypt(ctx->key_enc, buf, walk.iv,
+					    6 + ctx->key_length / 4);
+			crypto_xor_cpy(dst, src, buf, bsize);
+			crypto_inc(walk.iv, AES_BLOCK_SIZE);
+
+			dst += AES_BLOCK_SIZE;
+			src += AES_BLOCK_SIZE;
+			nbytes -= AES_BLOCK_SIZE;
+		} while (nbytes > 0);
+
+		err = skcipher_walk_done(&walk, tail);
+	}
+	return err;
+}
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 0da30e3b0e4be..998ba519a0266 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -10,6 +10,7 @@
 
 #include <asm/neon.h>
 #include <asm/hwcap.h>
+#include <asm/simd.h>
 #include <crypto/aes.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/simd.h>
@@ -19,6 +20,7 @@
 #include <crypto/xts.h>
 
 #include "aes-ce-setkey.h"
+#include "aes-ctr-fallback.h"
 
 #ifdef USE_V8_CRYPTO_EXTENSIONS
 #define MODE			"ce"
@@ -249,6 +251,17 @@ static int ctr_encrypt(struct skcipher_request *req)
 	return err;
 }
 
+static int ctr_encrypt_sync(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	if (!may_use_simd())
+		return aes_ctr_encrypt_fallback(ctx, req);
+
+	return ctr_encrypt(req);
+}
+
 static int xts_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -355,8 +368,8 @@ static struct skcipher_alg aes_algs[] = { {
 	.ivsize		= AES_BLOCK_SIZE,
 	.chunksize	= AES_BLOCK_SIZE,
 	.setkey		= skcipher_aes_setkey,
-	.encrypt	= ctr_encrypt,
-	.decrypt	= ctr_encrypt,
+	.encrypt	= ctr_encrypt_sync,
+	.decrypt	= ctr_encrypt_sync,
 }, {
 	.base = {
 		.cra_name		= "__xts(aes)",
@@ -458,11 +471,35 @@ static int mac_init(struct shash_desc *desc)
 	return 0;
 }
 
+static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks,
+			  u8 dg[], int enc_before, int enc_after)
+{
+	int rounds = 6 + ctx->key_length / 4;
+
+	if (may_use_simd()) {
+		kernel_neon_begin();
+		aes_mac_update(in, ctx->key_enc, rounds, blocks, dg, enc_before,
+			       enc_after);
+		kernel_neon_end();
+	} else {
+		if (enc_before)
+			__aes_arm64_encrypt(ctx->key_enc, dg, dg, rounds);
+
+		while (blocks--) {
+			crypto_xor(dg, in, AES_BLOCK_SIZE);
+			in += AES_BLOCK_SIZE;
+
+			if (blocks || enc_after)
+				__aes_arm64_encrypt(ctx->key_enc, dg, dg,
+						    rounds);
+		}
+	}
+}
+
 static int mac_update(struct shash_desc *desc, const u8 *p, unsigned int len)
 {
 	struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
 	struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
-	int rounds = 6 + tctx->key.key_length / 4;
 
 	while (len > 0) {
 		unsigned int l;
@@ -474,10 +511,8 @@ static int mac_update(struct shash_desc *desc, const u8 *p, unsigned int len)
 
 			len %= AES_BLOCK_SIZE;
 
-			kernel_neon_begin();
-			aes_mac_update(p, tctx->key.key_enc, rounds, blocks,
-				       ctx->dg, (ctx->len != 0), (len != 0));
-			kernel_neon_end();
+			mac_do_update(&tctx->key, p, blocks, ctx->dg,
+				      (ctx->len != 0), (len != 0));
 
 			p += blocks * AES_BLOCK_SIZE;
 
@@ -505,11 +540,8 @@ static int cbcmac_final(struct shash_desc *desc, u8 *out)
 {
 	struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
 	struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
-	int rounds = 6 + tctx->key.key_length / 4;
 
-	kernel_neon_begin();
-	aes_mac_update(NULL, tctx->key.key_enc, rounds, 0, ctx->dg, 1, 0);
-	kernel_neon_end();
+	mac_do_update(&tctx->key, NULL, 0, ctx->dg, 1, 0);
 
 	memcpy(out, ctx->dg, AES_BLOCK_SIZE);
 
@@ -520,7 +552,6 @@ static int cmac_final(struct shash_desc *desc, u8 *out)
 {
 	struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
 	struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
-	int rounds = 6 + tctx->key.key_length / 4;
 	u8 *consts = tctx->consts;
 
 	if (ctx->len != AES_BLOCK_SIZE) {
@@ -528,9 +559,7 @@ static int cmac_final(struct shash_desc *desc, u8 *out)
 		consts += AES_BLOCK_SIZE;
 	}
 
-	kernel_neon_begin();
-	aes_mac_update(consts, tctx->key.key_enc, rounds, 1, ctx->dg, 0, 1);
-	kernel_neon_end();
+	mac_do_update(&tctx->key, consts, 1, ctx->dg, 0, 1);
 
 	memcpy(out, ctx->dg, AES_BLOCK_SIZE);
 

From 611d5324f488b82ef224ab493f1ed5fb2af6f003 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 24 Jul 2017 11:28:14 +0100
Subject: [PATCH 084/116] crypto: arm64/chacha20 - take may_use_simd() into
 account

To accommodate systems that disallow the use of kernel mode NEON in
some circumstances, take the return value of may_use_simd into
account when deciding whether to invoke the C fallback routine.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/chacha20-neon-glue.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha20-neon-glue.c
index a7cd575ea2233..cbdb75d15cd03 100644
--- a/arch/arm64/crypto/chacha20-neon-glue.c
+++ b/arch/arm64/crypto/chacha20-neon-glue.c
@@ -1,7 +1,7 @@
 /*
  * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
  *
- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -26,6 +26,7 @@
 
 #include <asm/hwcap.h>
 #include <asm/neon.h>
+#include <asm/simd.h>
 
 asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
 asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
@@ -64,7 +65,7 @@ static int chacha20_neon(struct skcipher_request *req)
 	u32 state[16];
 	int err;
 
-	if (req->cryptlen <= CHACHA20_BLOCK_SIZE)
+	if (!may_use_simd() || req->cryptlen <= CHACHA20_BLOCK_SIZE)
 		return crypto_chacha20_crypt(req);
 
 	err = skcipher_walk_virt(&walk, req, true);

From ec808bbef0b15ad103771222d419245617378f32 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 24 Jul 2017 11:28:15 +0100
Subject: [PATCH 085/116] crypto: arm64/aes-bs - implement non-SIMD fallback
 for AES-CTR

Of the various chaining modes implemented by the bit sliced AES driver,
only CTR is exposed as a synchronous cipher, and requires a fallback in
order to remain usable once we update the kernel mode NEON handling logic
to disallow nested use. So wire up the existing CTR fallback C code.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/Kconfig           |  1 +
 arch/arm64/crypto/aes-neonbs-glue.c | 48 ++++++++++++++++++++++++++---
 2 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index a068dcbe25187..f9e264b833664 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -89,6 +89,7 @@ config CRYPTO_AES_ARM64_BS
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_AES_ARM64_NEON_BLK
+	select CRYPTO_AES_ARM64
 	select CRYPTO_SIMD
 
 endif
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index 9001aec16007b..c55d68ccb89f8 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -1,7 +1,7 @@
 /*
  * Bit sliced AES using NEON instructions
  *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -9,12 +9,15 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <crypto/aes.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/xts.h>
 #include <linux/module.h>
 
+#include "aes-ctr-fallback.h"
+
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 
@@ -58,6 +61,11 @@ struct aesbs_cbc_ctx {
 	u32			enc[AES_MAX_KEYLENGTH_U32];
 };
 
+struct aesbs_ctr_ctx {
+	struct aesbs_ctx	key;		/* must be first member */
+	struct crypto_aes_ctx	fallback;
+};
+
 struct aesbs_xts_ctx {
 	struct aesbs_ctx	key;
 	u32			twkey[AES_MAX_KEYLENGTH_U32];
@@ -196,6 +204,25 @@ static int cbc_decrypt(struct skcipher_request *req)
 	return err;
 }
 
+static int aesbs_ctr_setkey_sync(struct crypto_skcipher *tfm, const u8 *in_key,
+				 unsigned int key_len)
+{
+	struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err;
+
+	err = crypto_aes_expand_key(&ctx->fallback, in_key, key_len);
+	if (err)
+		return err;
+
+	ctx->key.rounds = 6 + key_len / 4;
+
+	kernel_neon_begin();
+	aesbs_convert_key(ctx->key.rk, ctx->fallback.key_enc, ctx->key.rounds);
+	kernel_neon_end();
+
+	return 0;
+}
+
 static int ctr_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -259,6 +286,17 @@ static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
 	return aesbs_setkey(tfm, in_key, key_len);
 }
 
+static int ctr_encrypt_sync(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	if (!may_use_simd())
+		return aes_ctr_encrypt_fallback(&ctx->fallback, req);
+
+	return ctr_encrypt(req);
+}
+
 static int __xts_crypt(struct skcipher_request *req,
 		       void (*fn)(u8 out[], u8 const in[], u8 const rk[],
 				  int rounds, int blocks, u8 iv[]))
@@ -355,7 +393,7 @@ static struct skcipher_alg aes_algs[] = { {
 	.base.cra_driver_name	= "ctr-aes-neonbs",
 	.base.cra_priority	= 250 - 1,
 	.base.cra_blocksize	= 1,
-	.base.cra_ctxsize	= sizeof(struct aesbs_ctx),
+	.base.cra_ctxsize	= sizeof(struct aesbs_ctr_ctx),
 	.base.cra_module	= THIS_MODULE,
 
 	.min_keysize		= AES_MIN_KEY_SIZE,
@@ -363,9 +401,9 @@ static struct skcipher_alg aes_algs[] = { {
 	.chunksize		= AES_BLOCK_SIZE,
 	.walksize		= 8 * AES_BLOCK_SIZE,
 	.ivsize			= AES_BLOCK_SIZE,
-	.setkey			= aesbs_setkey,
-	.encrypt		= ctr_encrypt,
-	.decrypt		= ctr_encrypt,
+	.setkey			= aesbs_ctr_setkey_sync,
+	.encrypt		= ctr_encrypt_sync,
+	.decrypt		= ctr_encrypt_sync,
 }, {
 	.base.cra_name		= "__xts(aes)",
 	.base.cra_driver_name	= "__xts-aes-neonbs",

From 537c1445ab0b1e33ca338b669e347652c45f4e8c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 24 Jul 2017 11:28:16 +0100
Subject: [PATCH 086/116] crypto: arm64/gcm - implement native driver using v8
 Crypto Extensions

Currently, the AES-GCM implementation for arm64 systems that support the
ARMv8 Crypto Extensions is based on the generic GCM module, which combines
the AES-CTR implementation using AES instructions with the PMULL based
GHASH driver. This is suboptimal, given the fact that the input data needs
to be loaded twice, once for the encryption and again for the MAC
calculation.

On Cortex-A57 (r1p2) and other recent cores that implement micro-op fusing
for the AES instructions, AES executes at less than 1 cycle per byte, which
means that any cycles wasted on loading the data twice hurt even more.

So implement a new GCM driver that combines the AES and PMULL instructions
at the block level. This improves performance on Cortex-A57 by ~37% (from
3.5 cpb to 2.6 cpb)

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/Kconfig         |   4 +-
 arch/arm64/crypto/ghash-ce-core.S | 175 ++++++++++++
 arch/arm64/crypto/ghash-ce-glue.c | 438 ++++++++++++++++++++++++++++--
 3 files changed, 591 insertions(+), 26 deletions(-)

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index f9e264b833664..7ca54a76f6b9f 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -29,10 +29,12 @@ config CRYPTO_SHA2_ARM64_CE
 	select CRYPTO_SHA256_ARM64
 
 config CRYPTO_GHASH_ARM64_CE
-	tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
+	tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
 	select CRYPTO_GF128MUL
+	select CRYPTO_AES
+	select CRYPTO_AES_ARM64
 
 config CRYPTO_CRCT10DIF_ARM64_CE
 	tristate "CRCT10DIF digest algorithm using PMULL instructions"
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index f0bb9f0b524fc..cb22459eba855 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -77,3 +77,178 @@ CPU_LE(	rev64		T1.16b, T1.16b	)
 	st1		{XL.2d}, [x1]
 	ret
 ENDPROC(pmull_ghash_update)
+
+	KS		.req	v8
+	CTR		.req	v9
+	INP		.req	v10
+
+	.macro		load_round_keys, rounds, rk
+	cmp		\rounds, #12
+	blo		2222f		/* 128 bits */
+	beq		1111f		/* 192 bits */
+	ld1		{v17.4s-v18.4s}, [\rk], #32
+1111:	ld1		{v19.4s-v20.4s}, [\rk], #32
+2222:	ld1		{v21.4s-v24.4s}, [\rk], #64
+	ld1		{v25.4s-v28.4s}, [\rk], #64
+	ld1		{v29.4s-v31.4s}, [\rk]
+	.endm
+
+	.macro		enc_round, state, key
+	aese		\state\().16b, \key\().16b
+	aesmc		\state\().16b, \state\().16b
+	.endm
+
+	.macro		enc_block, state, rounds
+	cmp		\rounds, #12
+	b.lo		2222f		/* 128 bits */
+	b.eq		1111f		/* 192 bits */
+	enc_round	\state, v17
+	enc_round	\state, v18
+1111:	enc_round	\state, v19
+	enc_round	\state, v20
+2222:	.irp		key, v21, v22, v23, v24, v25, v26, v27, v28, v29
+	enc_round	\state, \key
+	.endr
+	aese		\state\().16b, v30.16b
+	eor		\state\().16b, \state\().16b, v31.16b
+	.endm
+
+	.macro		pmull_gcm_do_crypt, enc
+	ld1		{SHASH.2d}, [x4]
+	ld1		{XL.2d}, [x1]
+	ldr		x8, [x5, #8]			// load lower counter
+
+	movi		MASK.16b, #0xe1
+	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
+CPU_LE(	rev		x8, x8		)
+	shl		MASK.2d, MASK.2d, #57
+	eor		SHASH2.16b, SHASH2.16b, SHASH.16b
+
+	.if		\enc == 1
+	ld1		{KS.16b}, [x7]
+	.endif
+
+0:	ld1		{CTR.8b}, [x5]			// load upper counter
+	ld1		{INP.16b}, [x3], #16
+	rev		x9, x8
+	add		x8, x8, #1
+	sub		w0, w0, #1
+	ins		CTR.d[1], x9			// set lower counter
+
+	.if		\enc == 1
+	eor		INP.16b, INP.16b, KS.16b	// encrypt input
+	st1		{INP.16b}, [x2], #16
+	.endif
+
+	rev64		T1.16b, INP.16b
+
+	cmp		w6, #12
+	b.ge		2f				// AES-192/256?
+
+1:	enc_round	CTR, v21
+
+	ext		T2.16b, XL.16b, XL.16b, #8
+	ext		IN1.16b, T1.16b, T1.16b, #8
+
+	enc_round	CTR, v22
+
+	eor		T1.16b, T1.16b, T2.16b
+	eor		XL.16b, XL.16b, IN1.16b
+
+	enc_round	CTR, v23
+
+	pmull2		XH.1q, SHASH.2d, XL.2d		// a1 * b1
+	eor		T1.16b, T1.16b, XL.16b
+
+	enc_round	CTR, v24
+
+	pmull		XL.1q, SHASH.1d, XL.1d		// a0 * b0
+	pmull		XM.1q, SHASH2.1d, T1.1d		// (a1 + a0)(b1 + b0)
+
+	enc_round	CTR, v25
+
+	ext		T1.16b, XL.16b, XH.16b, #8
+	eor		T2.16b, XL.16b, XH.16b
+	eor		XM.16b, XM.16b, T1.16b
+
+	enc_round	CTR, v26
+
+	eor		XM.16b, XM.16b, T2.16b
+	pmull		T2.1q, XL.1d, MASK.1d
+
+	enc_round	CTR, v27
+
+	mov		XH.d[0], XM.d[1]
+	mov		XM.d[1], XL.d[0]
+
+	enc_round	CTR, v28
+
+	eor		XL.16b, XM.16b, T2.16b
+
+	enc_round	CTR, v29
+
+	ext		T2.16b, XL.16b, XL.16b, #8
+
+	aese		CTR.16b, v30.16b
+
+	pmull		XL.1q, XL.1d, MASK.1d
+	eor		T2.16b, T2.16b, XH.16b
+
+	eor		KS.16b, CTR.16b, v31.16b
+
+	eor		XL.16b, XL.16b, T2.16b
+
+	.if		\enc == 0
+	eor		INP.16b, INP.16b, KS.16b
+	st1		{INP.16b}, [x2], #16
+	.endif
+
+	cbnz		w0, 0b
+
+CPU_LE(	rev		x8, x8		)
+	st1		{XL.2d}, [x1]
+	str		x8, [x5, #8]			// store lower counter
+
+	.if		\enc == 1
+	st1		{KS.16b}, [x7]
+	.endif
+
+	ret
+
+2:	b.eq		3f				// AES-192?
+	enc_round	CTR, v17
+	enc_round	CTR, v18
+3:	enc_round	CTR, v19
+	enc_round	CTR, v20
+	b		1b
+	.endm
+
+	/*
+	 * void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
+	 *			  struct ghash_key const *k, u8 ctr[],
+	 *			  int rounds, u8 ks[])
+	 */
+ENTRY(pmull_gcm_encrypt)
+	pmull_gcm_do_crypt	1
+ENDPROC(pmull_gcm_encrypt)
+
+	/*
+	 * void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
+	 *			  struct ghash_key const *k, u8 ctr[],
+	 *			  int rounds)
+	 */
+ENTRY(pmull_gcm_decrypt)
+	pmull_gcm_do_crypt	0
+ENDPROC(pmull_gcm_decrypt)
+
+	/*
+	 * void pmull_gcm_encrypt_block(u8 dst[], u8 src[], u8 rk[], int rounds)
+	 */
+ENTRY(pmull_gcm_encrypt_block)
+	cbz		x2, 0f
+	load_round_keys	w3, x2
+0:	ld1		{v0.16b}, [x1]
+	enc_block	v0, w3
+	st1		{v0.16b}, [x0]
+	ret
+ENDPROC(pmull_gcm_encrypt_block)
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 30221ef56e70d..ee6aaac05905d 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -11,18 +11,25 @@
 #include <asm/neon.h>
 #include <asm/simd.h>
 #include <asm/unaligned.h>
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/b128ops.h>
 #include <crypto/gf128mul.h>
+#include <crypto/internal/aead.h>
 #include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
 #include <linux/module.h>
 
-MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
+MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 
 #define GHASH_BLOCK_SIZE	16
 #define GHASH_DIGEST_SIZE	16
+#define GCM_IV_SIZE		12
 
 struct ghash_key {
 	u64 a;
@@ -36,9 +43,27 @@ struct ghash_desc_ctx {
 	u32 count;
 };
 
+struct gcm_aes_ctx {
+	struct crypto_aes_ctx	aes_key;
+	struct ghash_key	ghash_key;
+};
+
 asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
 				   struct ghash_key const *k, const char *head);
 
+asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[],
+				  const u8 src[], struct ghash_key const *k,
+				  u8 ctr[], int rounds, u8 ks[]);
+
+asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[],
+				  const u8 src[], struct ghash_key const *k,
+				  u8 ctr[], int rounds);
+
+asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[],
+					u32 const rk[], int rounds);
+
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
 static int ghash_init(struct shash_desc *desc)
 {
 	struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
@@ -130,17 +155,11 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
 	return 0;
 }
 
-static int ghash_setkey(struct crypto_shash *tfm,
-			const u8 *inkey, unsigned int keylen)
+static int __ghash_setkey(struct ghash_key *key,
+			  const u8 *inkey, unsigned int keylen)
 {
-	struct ghash_key *key = crypto_shash_ctx(tfm);
 	u64 a, b;
 
-	if (keylen != GHASH_BLOCK_SIZE) {
-		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
 	/* needed for the fallback */
 	memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
 
@@ -157,32 +176,401 @@ static int ghash_setkey(struct crypto_shash *tfm,
 	return 0;
 }
 
+static int ghash_setkey(struct crypto_shash *tfm,
+			const u8 *inkey, unsigned int keylen)
+{
+	struct ghash_key *key = crypto_shash_ctx(tfm);
+
+	if (keylen != GHASH_BLOCK_SIZE) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	return __ghash_setkey(key, inkey, keylen);
+}
+
 static struct shash_alg ghash_alg = {
-	.digestsize	= GHASH_DIGEST_SIZE,
-	.init		= ghash_init,
-	.update		= ghash_update,
-	.final		= ghash_final,
-	.setkey		= ghash_setkey,
-	.descsize	= sizeof(struct ghash_desc_ctx),
-	.base		= {
-		.cra_name		= "ghash",
-		.cra_driver_name	= "ghash-ce",
-		.cra_priority		= 200,
-		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
-		.cra_blocksize		= GHASH_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct ghash_key),
-		.cra_module		= THIS_MODULE,
-	},
+	.base.cra_name		= "ghash",
+	.base.cra_driver_name	= "ghash-ce",
+	.base.cra_priority	= 200,
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= GHASH_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct ghash_key),
+	.base.cra_module	= THIS_MODULE,
+
+	.digestsize		= GHASH_DIGEST_SIZE,
+	.init			= ghash_init,
+	.update			= ghash_update,
+	.final			= ghash_final,
+	.setkey			= ghash_setkey,
+	.descsize		= sizeof(struct ghash_desc_ctx),
+};
+
+static int num_rounds(struct crypto_aes_ctx *ctx)
+{
+	/*
+	 * # of rounds specified by AES:
+	 * 128 bit key		10 rounds
+	 * 192 bit key		12 rounds
+	 * 256 bit key		14 rounds
+	 * => n byte key	=> 6 + (n/4) rounds
+	 */
+	return 6 + ctx->key_length / 4;
+}
+
+static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
+		      unsigned int keylen)
+{
+	struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm);
+	u8 key[GHASH_BLOCK_SIZE];
+	int ret;
+
+	ret = crypto_aes_expand_key(&ctx->aes_key, inkey, keylen);
+	if (ret) {
+		tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+
+	__aes_arm64_encrypt(ctx->aes_key.key_enc, key, (u8[AES_BLOCK_SIZE]){},
+			    num_rounds(&ctx->aes_key));
+
+	return __ghash_setkey(&ctx->ghash_key, key, sizeof(key));
+}
+
+static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+	switch (authsize) {
+	case 4:
+	case 8:
+	case 12 ... 16:
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[],
+			   int *buf_count, struct gcm_aes_ctx *ctx)
+{
+	if (*buf_count > 0) {
+		int buf_added = min(count, GHASH_BLOCK_SIZE - *buf_count);
+
+		memcpy(&buf[*buf_count], src, buf_added);
+
+		*buf_count += buf_added;
+		src += buf_added;
+		count -= buf_added;
+	}
+
+	if (count >= GHASH_BLOCK_SIZE || *buf_count == GHASH_BLOCK_SIZE) {
+		int blocks = count / GHASH_BLOCK_SIZE;
+
+		ghash_do_update(blocks, dg, src, &ctx->ghash_key,
+				*buf_count ? buf : NULL);
+
+		src += blocks * GHASH_BLOCK_SIZE;
+		count %= GHASH_BLOCK_SIZE;
+		*buf_count = 0;
+	}
+
+	if (count > 0) {
+		memcpy(buf, src, count);
+		*buf_count = count;
+	}
+}
+
+static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[])
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
+	u8 buf[GHASH_BLOCK_SIZE];
+	struct scatter_walk walk;
+	u32 len = req->assoclen;
+	int buf_count = 0;
+
+	scatterwalk_start(&walk, req->src);
+
+	do {
+		u32 n = scatterwalk_clamp(&walk, len);
+		u8 *p;
+
+		if (!n) {
+			scatterwalk_start(&walk, sg_next(walk.sg));
+			n = scatterwalk_clamp(&walk, len);
+		}
+		p = scatterwalk_map(&walk);
+
+		gcm_update_mac(dg, p, n, buf, &buf_count, ctx);
+		len -= n;
+
+		scatterwalk_unmap(p);
+		scatterwalk_advance(&walk, n);
+		scatterwalk_done(&walk, 0, len);
+	} while (len);
+
+	if (buf_count) {
+		memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count);
+		ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
+	}
+}
+
+static void gcm_final(struct aead_request *req, struct gcm_aes_ctx *ctx,
+		      u64 dg[], u8 tag[], int cryptlen)
+{
+	u8 mac[AES_BLOCK_SIZE];
+	u128 lengths;
+
+	lengths.a = cpu_to_be64(req->assoclen * 8);
+	lengths.b = cpu_to_be64(cryptlen * 8);
+
+	ghash_do_update(1, dg, (void *)&lengths, &ctx->ghash_key, NULL);
+
+	put_unaligned_be64(dg[1], mac);
+	put_unaligned_be64(dg[0], mac + 8);
+
+	crypto_xor(tag, mac, AES_BLOCK_SIZE);
+}
+
+static int gcm_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
+	struct skcipher_walk walk;
+	u8 iv[AES_BLOCK_SIZE];
+	u8 ks[AES_BLOCK_SIZE];
+	u8 tag[AES_BLOCK_SIZE];
+	u64 dg[2] = {};
+	int err;
+
+	if (req->assoclen)
+		gcm_calculate_auth_mac(req, dg);
+
+	memcpy(iv, req->iv, GCM_IV_SIZE);
+	put_unaligned_be32(1, iv + GCM_IV_SIZE);
+
+	if (likely(may_use_simd())) {
+		kernel_neon_begin();
+
+		pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
+					num_rounds(&ctx->aes_key));
+		put_unaligned_be32(2, iv + GCM_IV_SIZE);
+		pmull_gcm_encrypt_block(ks, iv, NULL,
+					num_rounds(&ctx->aes_key));
+		put_unaligned_be32(3, iv + GCM_IV_SIZE);
+
+		err = skcipher_walk_aead_encrypt(&walk, req, true);
+
+		while (walk.nbytes >= AES_BLOCK_SIZE) {
+			int blocks = walk.nbytes / AES_BLOCK_SIZE;
+
+			pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr,
+					  walk.src.virt.addr, &ctx->ghash_key,
+					  iv, num_rounds(&ctx->aes_key), ks);
+
+			err = skcipher_walk_done(&walk,
+						 walk.nbytes % AES_BLOCK_SIZE);
+		}
+		kernel_neon_end();
+	} else {
+		__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
+				    num_rounds(&ctx->aes_key));
+		put_unaligned_be32(2, iv + GCM_IV_SIZE);
+
+		err = skcipher_walk_aead_encrypt(&walk, req, true);
+
+		while (walk.nbytes >= AES_BLOCK_SIZE) {
+			int blocks = walk.nbytes / AES_BLOCK_SIZE;
+			u8 *dst = walk.dst.virt.addr;
+			u8 *src = walk.src.virt.addr;
+
+			do {
+				__aes_arm64_encrypt(ctx->aes_key.key_enc,
+						    ks, iv,
+						    num_rounds(&ctx->aes_key));
+				crypto_xor_cpy(dst, src, ks, AES_BLOCK_SIZE);
+				crypto_inc(iv, AES_BLOCK_SIZE);
+
+				dst += AES_BLOCK_SIZE;
+				src += AES_BLOCK_SIZE;
+			} while (--blocks > 0);
+
+			ghash_do_update(walk.nbytes / AES_BLOCK_SIZE, dg,
+					walk.dst.virt.addr, &ctx->ghash_key,
+					NULL);
+
+			err = skcipher_walk_done(&walk,
+						 walk.nbytes % AES_BLOCK_SIZE);
+		}
+		if (walk.nbytes)
+			__aes_arm64_encrypt(ctx->aes_key.key_enc, ks, iv,
+					    num_rounds(&ctx->aes_key));
+	}
+
+	/* handle the tail */
+	if (walk.nbytes) {
+		u8 buf[GHASH_BLOCK_SIZE];
+
+		crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, ks,
+			       walk.nbytes);
+
+		memcpy(buf, walk.dst.virt.addr, walk.nbytes);
+		memset(buf + walk.nbytes, 0, GHASH_BLOCK_SIZE - walk.nbytes);
+		ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
+
+		err = skcipher_walk_done(&walk, 0);
+	}
+
+	if (err)
+		return err;
+
+	gcm_final(req, ctx, dg, tag, req->cryptlen);
+
+	/* copy authtag to end of dst */
+	scatterwalk_map_and_copy(tag, req->dst, req->assoclen + req->cryptlen,
+				 crypto_aead_authsize(aead), 1);
+
+	return 0;
+}
+
+static int gcm_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
+	unsigned int authsize = crypto_aead_authsize(aead);
+	struct skcipher_walk walk;
+	u8 iv[AES_BLOCK_SIZE];
+	u8 tag[AES_BLOCK_SIZE];
+	u8 buf[GHASH_BLOCK_SIZE];
+	u64 dg[2] = {};
+	int err;
+
+	if (req->assoclen)
+		gcm_calculate_auth_mac(req, dg);
+
+	memcpy(iv, req->iv, GCM_IV_SIZE);
+	put_unaligned_be32(1, iv + GCM_IV_SIZE);
+
+	if (likely(may_use_simd())) {
+		kernel_neon_begin();
+
+		pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
+					num_rounds(&ctx->aes_key));
+		put_unaligned_be32(2, iv + GCM_IV_SIZE);
+
+		err = skcipher_walk_aead_decrypt(&walk, req, true);
+
+		while (walk.nbytes >= AES_BLOCK_SIZE) {
+			int blocks = walk.nbytes / AES_BLOCK_SIZE;
+
+			pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr,
+					  walk.src.virt.addr, &ctx->ghash_key,
+					  iv, num_rounds(&ctx->aes_key));
+
+			err = skcipher_walk_done(&walk,
+						 walk.nbytes % AES_BLOCK_SIZE);
+		}
+		if (walk.nbytes)
+			pmull_gcm_encrypt_block(iv, iv, NULL,
+						num_rounds(&ctx->aes_key));
+
+		kernel_neon_end();
+	} else {
+		__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
+				    num_rounds(&ctx->aes_key));
+		put_unaligned_be32(2, iv + GCM_IV_SIZE);
+
+		err = skcipher_walk_aead_decrypt(&walk, req, true);
+
+		while (walk.nbytes >= AES_BLOCK_SIZE) {
+			int blocks = walk.nbytes / AES_BLOCK_SIZE;
+			u8 *dst = walk.dst.virt.addr;
+			u8 *src = walk.src.virt.addr;
+
+			ghash_do_update(blocks, dg, walk.src.virt.addr,
+					&ctx->ghash_key, NULL);
+
+			do {
+				__aes_arm64_encrypt(ctx->aes_key.key_enc,
+						    buf, iv,
+						    num_rounds(&ctx->aes_key));
+				crypto_xor_cpy(dst, src, buf, AES_BLOCK_SIZE);
+				crypto_inc(iv, AES_BLOCK_SIZE);
+
+				dst += AES_BLOCK_SIZE;
+				src += AES_BLOCK_SIZE;
+			} while (--blocks > 0);
+
+			err = skcipher_walk_done(&walk,
+						 walk.nbytes % AES_BLOCK_SIZE);
+		}
+		if (walk.nbytes)
+			__aes_arm64_encrypt(ctx->aes_key.key_enc, iv, iv,
+					    num_rounds(&ctx->aes_key));
+	}
+
+	/* handle the tail */
+	if (walk.nbytes) {
+		memcpy(buf, walk.src.virt.addr, walk.nbytes);
+		memset(buf + walk.nbytes, 0, GHASH_BLOCK_SIZE - walk.nbytes);
+		ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
+
+		crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, iv,
+			       walk.nbytes);
+
+		err = skcipher_walk_done(&walk, 0);
+	}
+
+	if (err)
+		return err;
+
+	gcm_final(req, ctx, dg, tag, req->cryptlen - authsize);
+
+	/* compare calculated auth tag with the stored one */
+	scatterwalk_map_and_copy(buf, req->src,
+				 req->assoclen + req->cryptlen - authsize,
+				 authsize, 0);
+
+	if (crypto_memneq(tag, buf, authsize))
+		return -EBADMSG;
+	return 0;
+}
+
+static struct aead_alg gcm_aes_alg = {
+	.ivsize			= GCM_IV_SIZE,
+	.chunksize		= AES_BLOCK_SIZE,
+	.maxauthsize		= AES_BLOCK_SIZE,
+	.setkey			= gcm_setkey,
+	.setauthsize		= gcm_setauthsize,
+	.encrypt		= gcm_encrypt,
+	.decrypt		= gcm_decrypt,
+
+	.base.cra_name		= "gcm(aes)",
+	.base.cra_driver_name	= "gcm-aes-ce",
+	.base.cra_priority	= 300,
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct gcm_aes_ctx),
+	.base.cra_module	= THIS_MODULE,
 };
 
 static int __init ghash_ce_mod_init(void)
 {
-	return crypto_register_shash(&ghash_alg);
+	int ret;
+
+	ret = crypto_register_aead(&gcm_aes_alg);
+	if (ret)
+		return ret;
+
+	ret = crypto_register_shash(&ghash_alg);
+	if (ret)
+		crypto_unregister_aead(&gcm_aes_alg);
+	return ret;
 }
 
 static void __exit ghash_ce_mod_exit(void)
 {
 	crypto_unregister_shash(&ghash_alg);
+	crypto_unregister_aead(&gcm_aes_alg);
 }
 
 module_cpu_feature_match(PMULL, ghash_ce_mod_init);

From 3759ee057261a45da0505e79084de8b6ac31c4a5 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 24 Jul 2017 11:28:17 +0100
Subject: [PATCH 087/116] crypto: arm/ghash - add NEON accelerated fallback for
 vmull.p64

Implement a NEON fallback for systems that do support NEON but have
no support for the optional 64x64->128 polynomial multiplication
instruction that is part of the ARMv8 Crypto Extensions. It is based
on the paper "Fast Software Polynomial Multiplication on ARM Processors
Using the NEON Engine" by Danilo Camara, Conrado Gouvea, Julio Lopez and
Ricardo Dahab (https://hal.inria.fr/hal-01506572)

On a 32-bit guest executing under KVM on a Cortex-A57, the new code is
not only 4x faster than the generic table based GHASH driver, it is also
time invariant. (Note that the existing vmull.p64 code is 16x faster on
this core).

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/Kconfig         |   5 +-
 arch/arm/crypto/ghash-ce-core.S | 234 ++++++++++++++++++++++++++------
 arch/arm/crypto/ghash-ce-glue.c |  24 +++-
 3 files changed, 215 insertions(+), 48 deletions(-)

diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index b9adedcc5b2e2..ec72752d5668e 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -94,14 +94,15 @@ config CRYPTO_AES_ARM_CE
 	  ARMv8 Crypto Extensions
 
 config CRYPTO_GHASH_ARM_CE
-	tristate "PMULL-accelerated GHASH using ARMv8 Crypto Extensions"
+	tristate "PMULL-accelerated GHASH using NEON/ARMv8 Crypto Extensions"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
 	select CRYPTO_CRYPTD
 	help
 	  Use an implementation of GHASH (used by the GCM AEAD chaining mode)
 	  that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
-	  that is part of the ARMv8 Crypto Extensions
+	  that is part of the ARMv8 Crypto Extensions, or a slower variant that
+	  uses the vmull.p8 instruction that is part of the basic NEON ISA.
 
 config CRYPTO_CRCT10DIF_ARM_CE
 	tristate "CRCT10DIF digest algorithm using PMULL instructions"
diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S
index f6ab8bcc9efe7..2f78c10b18815 100644
--- a/arch/arm/crypto/ghash-ce-core.S
+++ b/arch/arm/crypto/ghash-ce-core.S
@@ -1,7 +1,7 @@
 /*
- * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
+ * Accelerated GHASH implementation with NEON/ARMv8 vmull.p8/64 instructions.
  *
- * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2015 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published
@@ -12,40 +12,162 @@
 #include <asm/assembler.h>
 
 	SHASH		.req	q0
-	SHASH2		.req	q1
-	T1		.req	q2
-	T2		.req	q3
-	MASK		.req	q4
-	XL		.req	q5
-	XM		.req	q6
-	XH		.req	q7
-	IN1		.req	q7
+	T1		.req	q1
+	XL		.req	q2
+	XM		.req	q3
+	XH		.req	q4
+	IN1		.req	q4
 
 	SHASH_L		.req	d0
 	SHASH_H		.req	d1
-	SHASH2_L	.req	d2
-	T1_L		.req	d4
-	MASK_L		.req	d8
-	XL_L		.req	d10
-	XL_H		.req	d11
-	XM_L		.req	d12
-	XM_H		.req	d13
-	XH_L		.req	d14
+	T1_L		.req	d2
+	T1_H		.req	d3
+	XL_L		.req	d4
+	XL_H		.req	d5
+	XM_L		.req	d6
+	XM_H		.req	d7
+	XH_L		.req	d8
+
+	t0l		.req	d10
+	t0h		.req	d11
+	t1l		.req	d12
+	t1h		.req	d13
+	t2l		.req	d14
+	t2h		.req	d15
+	t3l		.req	d16
+	t3h		.req	d17
+	t4l		.req	d18
+	t4h		.req	d19
+
+	t0q		.req	q5
+	t1q		.req	q6
+	t2q		.req	q7
+	t3q		.req	q8
+	t4q		.req	q9
+	T2		.req	q9
+
+	s1l		.req	d20
+	s1h		.req	d21
+	s2l		.req	d22
+	s2h		.req	d23
+	s3l		.req	d24
+	s3h		.req	d25
+	s4l		.req	d26
+	s4h		.req	d27
+
+	MASK		.req	d28
+	SHASH2_p8	.req	d28
+
+	k16		.req	d29
+	k32		.req	d30
+	k48		.req	d31
+	SHASH2_p64	.req	d31
 
 	.text
 	.fpu		crypto-neon-fp-armv8
 
+	.macro		__pmull_p64, rd, rn, rm, b1, b2, b3, b4
+	vmull.p64	\rd, \rn, \rm
+	.endm
+
 	/*
-	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
-	 *			   struct ghash_key const *k, const char *head)
+	 * This implementation of 64x64 -> 128 bit polynomial multiplication
+	 * using vmull.p8 instructions (8x8 -> 16) is taken from the paper
+	 * "Fast Software Polynomial Multiplication on ARM Processors Using
+	 * the NEON Engine" by Danilo Camara, Conrado Gouvea, Julio Lopez and
+	 * Ricardo Dahab (https://hal.inria.fr/hal-01506572)
+	 *
+	 * It has been slightly tweaked for in-order performance, and to allow
+	 * 'rq' to overlap with 'ad' or 'bd'.
 	 */
-ENTRY(pmull_ghash_update)
-	vld1.64		{SHASH}, [r3]
+	.macro		__pmull_p8, rq, ad, bd, b1=t4l, b2=t3l, b3=t4l, b4=t3l
+	vext.8		t0l, \ad, \ad, #1	@ A1
+	.ifc		\b1, t4l
+	vext.8		t4l, \bd, \bd, #1	@ B1
+	.endif
+	vmull.p8	t0q, t0l, \bd		@ F = A1*B
+	vext.8		t1l, \ad, \ad, #2	@ A2
+	vmull.p8	t4q, \ad, \b1		@ E = A*B1
+	.ifc		\b2, t3l
+	vext.8		t3l, \bd, \bd, #2	@ B2
+	.endif
+	vmull.p8	t1q, t1l, \bd		@ H = A2*B
+	vext.8		t2l, \ad, \ad, #3	@ A3
+	vmull.p8	t3q, \ad, \b2		@ G = A*B2
+	veor		t0q, t0q, t4q		@ L = E + F
+	.ifc		\b3, t4l
+	vext.8		t4l, \bd, \bd, #3	@ B3
+	.endif
+	vmull.p8	t2q, t2l, \bd		@ J = A3*B
+	veor		t0l, t0l, t0h		@ t0 = (L) (P0 + P1) << 8
+	veor		t1q, t1q, t3q		@ M = G + H
+	.ifc		\b4, t3l
+	vext.8		t3l, \bd, \bd, #4	@ B4
+	.endif
+	vmull.p8	t4q, \ad, \b3		@ I = A*B3
+	veor		t1l, t1l, t1h		@ t1 = (M) (P2 + P3) << 16
+	vmull.p8	t3q, \ad, \b4		@ K = A*B4
+	vand		t0h, t0h, k48
+	vand		t1h, t1h, k32
+	veor		t2q, t2q, t4q		@ N = I + J
+	veor		t0l, t0l, t0h
+	veor		t1l, t1l, t1h
+	veor		t2l, t2l, t2h		@ t2 = (N) (P4 + P5) << 24
+	vand		t2h, t2h, k16
+	veor		t3l, t3l, t3h		@ t3 = (K) (P6 + P7) << 32
+	vmov.i64	t3h, #0
+	vext.8		t0q, t0q, t0q, #15
+	veor		t2l, t2l, t2h
+	vext.8		t1q, t1q, t1q, #14
+	vmull.p8	\rq, \ad, \bd		@ D = A*B
+	vext.8		t2q, t2q, t2q, #13
+	vext.8		t3q, t3q, t3q, #12
+	veor		t0q, t0q, t1q
+	veor		t2q, t2q, t3q
+	veor		\rq, \rq, t0q
+	veor		\rq, \rq, t2q
+	.endm
+
+	//
+	// PMULL (64x64->128) based reduction for CPUs that can do
+	// it in a single instruction.
+	//
+	.macro		__pmull_reduce_p64
+	vmull.p64	T1, XL_L, MASK
+
+	veor		XH_L, XH_L, XM_H
+	vext.8		T1, T1, T1, #8
+	veor		XL_H, XL_H, XM_L
+	veor		T1, T1, XL
+
+	vmull.p64	XL, T1_H, MASK
+	.endm
+
+	//
+	// Alternative reduction for CPUs that lack support for the
+	// 64x64->128 PMULL instruction
+	//
+	.macro		__pmull_reduce_p8
+	veor		XL_H, XL_H, XM_L
+	veor		XH_L, XH_L, XM_H
+
+	vshl.i64	T1, XL, #57
+	vshl.i64	T2, XL, #62
+	veor		T1, T1, T2
+	vshl.i64	T2, XL, #63
+	veor		T1, T1, T2
+	veor		XL_H, XL_H, T1_L
+	veor		XH_L, XH_L, T1_H
+
+	vshr.u64	T1, XL, #1
+	veor		XH, XH, XL
+	veor		XL, XL, T1
+	vshr.u64	T1, T1, #6
+	vshr.u64	XL, XL, #1
+	.endm
+
+	.macro		ghash_update, pn
 	vld1.64		{XL}, [r1]
-	vmov.i8		MASK, #0xe1
-	vext.8		SHASH2, SHASH, SHASH, #8
-	vshl.u64	MASK, MASK, #57
-	veor		SHASH2, SHASH2, SHASH
 
 	/* do the head block first, if supplied */
 	ldr		ip, [sp]
@@ -62,33 +184,59 @@ ENTRY(pmull_ghash_update)
 #ifndef CONFIG_CPU_BIG_ENDIAN
 	vrev64.8	T1, T1
 #endif
-	vext.8		T2, XL, XL, #8
 	vext.8		IN1, T1, T1, #8
-	veor		T1, T1, T2
+	veor		T1_L, T1_L, XL_H
 	veor		XL, XL, IN1
 
-	vmull.p64	XH, SHASH_H, XL_H		@ a1 * b1
+	__pmull_\pn	XH, XL_H, SHASH_H, s1h, s2h, s3h, s4h	@ a1 * b1
 	veor		T1, T1, XL
-	vmull.p64	XL, SHASH_L, XL_L		@ a0 * b0
-	vmull.p64	XM, SHASH2_L, T1_L		@ (a1 + a0)(b1 + b0)
+	__pmull_\pn	XL, XL_L, SHASH_L, s1l, s2l, s3l, s4l	@ a0 * b0
+	__pmull_\pn	XM, T1_L, SHASH2_\pn			@ (a1+a0)(b1+b0)
 
-	vext.8		T1, XL, XH, #8
-	veor		T2, XL, XH
+	veor		T1, XL, XH
 	veor		XM, XM, T1
-	veor		XM, XM, T2
-	vmull.p64	T2, XL_L, MASK_L
 
-	vmov		XH_L, XM_H
-	vmov		XM_H, XL_L
+	__pmull_reduce_\pn
 
-	veor		XL, XM, T2
-	vext.8		T2, XL, XL, #8
-	vmull.p64	XL, XL_L, MASK_L
-	veor		T2, T2, XH
-	veor		XL, XL, T2
+	veor		T1, T1, XH
+	veor		XL, XL, T1
 
 	bne		0b
 
 	vst1.64		{XL}, [r1]
 	bx		lr
-ENDPROC(pmull_ghash_update)
+	.endm
+
+	/*
+	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+	 *			   struct ghash_key const *k, const char *head)
+	 */
+ENTRY(pmull_ghash_update_p64)
+	vld1.64		{SHASH}, [r3]
+	veor		SHASH2_p64, SHASH_L, SHASH_H
+
+	vmov.i8		MASK, #0xe1
+	vshl.u64	MASK, MASK, #57
+
+	ghash_update	p64
+ENDPROC(pmull_ghash_update_p64)
+
+ENTRY(pmull_ghash_update_p8)
+	vld1.64		{SHASH}, [r3]
+	veor		SHASH2_p8, SHASH_L, SHASH_H
+
+	vext.8		s1l, SHASH_L, SHASH_L, #1
+	vext.8		s2l, SHASH_L, SHASH_L, #2
+	vext.8		s3l, SHASH_L, SHASH_L, #3
+	vext.8		s4l, SHASH_L, SHASH_L, #4
+	vext.8		s1h, SHASH_H, SHASH_H, #1
+	vext.8		s2h, SHASH_H, SHASH_H, #2
+	vext.8		s3h, SHASH_H, SHASH_H, #3
+	vext.8		s4h, SHASH_H, SHASH_H, #4
+
+	vmov.i64	k16, #0xffff
+	vmov.i64	k32, #0xffffffff
+	vmov.i64	k48, #0xffffffffffff
+
+	ghash_update	p8
+ENDPROC(pmull_ghash_update_p8)
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
index 6bac8bea9f1e8..d9bb52cae2ac9 100644
--- a/arch/arm/crypto/ghash-ce-glue.c
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -22,6 +22,7 @@
 MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("ghash");
 
 #define GHASH_BLOCK_SIZE	16
 #define GHASH_DIGEST_SIZE	16
@@ -41,8 +42,17 @@ struct ghash_async_ctx {
 	struct cryptd_ahash *cryptd_tfm;
 };
 
-asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
-				   struct ghash_key const *k, const char *head);
+asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
+				       struct ghash_key const *k,
+				       const char *head);
+
+asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
+				      struct ghash_key const *k,
+				      const char *head);
+
+static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
+				  struct ghash_key const *k,
+				  const char *head);
 
 static int ghash_init(struct shash_desc *desc)
 {
@@ -312,6 +322,14 @@ static int __init ghash_ce_mod_init(void)
 {
 	int err;
 
+	if (!(elf_hwcap & HWCAP_NEON))
+		return -ENODEV;
+
+	if (elf_hwcap2 & HWCAP2_PMULL)
+		pmull_ghash_update = pmull_ghash_update_p64;
+	else
+		pmull_ghash_update = pmull_ghash_update_p8;
+
 	err = crypto_register_shash(&ghash_alg);
 	if (err)
 		return err;
@@ -332,5 +350,5 @@ static void __exit ghash_ce_mod_exit(void)
 	crypto_unregister_shash(&ghash_alg);
 }
 
-module_cpu_feature_match(PMULL, ghash_ce_mod_init);
+module_init(ghash_ce_mod_init);
 module_exit(ghash_ce_mod_exit);

From 03c9a333fef1bb0a67615b686a7342d853f1a460 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 24 Jul 2017 11:28:18 +0100
Subject: [PATCH 088/116] crypto: arm64/ghash - add NEON accelerated fallback
 for 64-bit PMULL

Implement a NEON fallback for systems that do support NEON but have
no support for the optional 64x64->128 polynomial multiplication
instruction that is part of the ARMv8 Crypto Extensions. It is based
on the paper "Fast Software Polynomial Multiplication on ARM Processors
Using the NEON Engine" by Danilo Camara, Conrado Gouvea, Julio Lopez and
Ricardo Dahab (https://hal.inria.fr/hal-01506572), but has been reworked
extensively for the AArch64 ISA.

On a low-end core such as the Cortex-A53 found in the Raspberry Pi3, the
NEON based implementation is 4x faster than the table based one, and
is time invariant as well, making it less vulnerable to timing attacks.
When combined with the bit-sliced NEON implementation of AES-CTR, the
AES-GCM performance increases by 2x (from 58 to 29 cycles per byte).

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/ghash-ce-core.S | 248 ++++++++++++++++++++++++++----
 arch/arm64/crypto/ghash-ce-glue.c |  40 ++++-
 2 files changed, 252 insertions(+), 36 deletions(-)

diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index cb22459eba855..11ebf1ae248a1 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -1,7 +1,7 @@
 /*
  * Accelerated GHASH implementation with ARMv8 PMULL instructions.
  *
- * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published
@@ -11,31 +11,215 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
-	SHASH	.req	v0
-	SHASH2	.req	v1
-	T1	.req	v2
-	T2	.req	v3
-	MASK	.req	v4
-	XL	.req	v5
-	XM	.req	v6
-	XH	.req	v7
-	IN1	.req	v7
+	SHASH		.req	v0
+	SHASH2		.req	v1
+	T1		.req	v2
+	T2		.req	v3
+	MASK		.req	v4
+	XL		.req	v5
+	XM		.req	v6
+	XH		.req	v7
+	IN1		.req	v7
+
+	k00_16		.req	v8
+	k32_48		.req	v9
+
+	t3		.req	v10
+	t4		.req	v11
+	t5		.req	v12
+	t6		.req	v13
+	t7		.req	v14
+	t8		.req	v15
+	t9		.req	v16
+
+	perm1		.req	v17
+	perm2		.req	v18
+	perm3		.req	v19
+
+	sh1		.req	v20
+	sh2		.req	v21
+	sh3		.req	v22
+	sh4		.req	v23
+
+	ss1		.req	v24
+	ss2		.req	v25
+	ss3		.req	v26
+	ss4		.req	v27
 
 	.text
 	.arch		armv8-a+crypto
 
-	/*
-	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
-	 *			   struct ghash_key const *k, const char *head)
-	 */
-ENTRY(pmull_ghash_update)
+	.macro		__pmull_p64, rd, rn, rm
+	pmull		\rd\().1q, \rn\().1d, \rm\().1d
+	.endm
+
+	.macro		__pmull2_p64, rd, rn, rm
+	pmull2		\rd\().1q, \rn\().2d, \rm\().2d
+	.endm
+
+	.macro		__pmull_p8, rq, ad, bd
+	ext		t3.8b, \ad\().8b, \ad\().8b, #1		// A1
+	ext		t5.8b, \ad\().8b, \ad\().8b, #2		// A2
+	ext		t7.8b, \ad\().8b, \ad\().8b, #3		// A3
+
+	__pmull_p8_\bd	\rq, \ad
+	.endm
+
+	.macro		__pmull2_p8, rq, ad, bd
+	tbl		t3.16b, {\ad\().16b}, perm1.16b		// A1
+	tbl		t5.16b, {\ad\().16b}, perm2.16b		// A2
+	tbl		t7.16b, {\ad\().16b}, perm3.16b		// A3
+
+	__pmull2_p8_\bd	\rq, \ad
+	.endm
+
+	.macro		__pmull_p8_SHASH, rq, ad
+	__pmull_p8_tail	\rq, \ad\().8b, SHASH.8b, 8b,, sh1, sh2, sh3, sh4
+	.endm
+
+	.macro		__pmull_p8_SHASH2, rq, ad
+	__pmull_p8_tail	\rq, \ad\().8b, SHASH2.8b, 8b,, ss1, ss2, ss3, ss4
+	.endm
+
+	.macro		__pmull2_p8_SHASH, rq, ad
+	__pmull_p8_tail	\rq, \ad\().16b, SHASH.16b, 16b, 2, sh1, sh2, sh3, sh4
+	.endm
+
+	.macro		__pmull_p8_tail, rq, ad, bd, nb, t, b1, b2, b3, b4
+	pmull\t		t3.8h, t3.\nb, \bd			// F = A1*B
+	pmull\t		t4.8h, \ad, \b1\().\nb			// E = A*B1
+	pmull\t		t5.8h, t5.\nb, \bd			// H = A2*B
+	pmull\t		t6.8h, \ad, \b2\().\nb			// G = A*B2
+	pmull\t		t7.8h, t7.\nb, \bd			// J = A3*B
+	pmull\t		t8.8h, \ad, \b3\().\nb			// I = A*B3
+	pmull\t		t9.8h, \ad, \b4\().\nb			// K = A*B4
+	pmull\t		\rq\().8h, \ad, \bd			// D = A*B
+
+	eor		t3.16b, t3.16b, t4.16b			// L = E + F
+	eor		t5.16b, t5.16b, t6.16b			// M = G + H
+	eor		t7.16b, t7.16b, t8.16b			// N = I + J
+
+	uzp1		t4.2d, t3.2d, t5.2d
+	uzp2		t3.2d, t3.2d, t5.2d
+	uzp1		t6.2d, t7.2d, t9.2d
+	uzp2		t7.2d, t7.2d, t9.2d
+
+	// t3 = (L) (P0 + P1) << 8
+	// t5 = (M) (P2 + P3) << 16
+	eor		t4.16b, t4.16b, t3.16b
+	and		t3.16b, t3.16b, k32_48.16b
+
+	// t7 = (N) (P4 + P5) << 24
+	// t9 = (K) (P6 + P7) << 32
+	eor		t6.16b, t6.16b, t7.16b
+	and		t7.16b, t7.16b, k00_16.16b
+
+	eor		t4.16b, t4.16b, t3.16b
+	eor		t6.16b, t6.16b, t7.16b
+
+	zip2		t5.2d, t4.2d, t3.2d
+	zip1		t3.2d, t4.2d, t3.2d
+	zip2		t9.2d, t6.2d, t7.2d
+	zip1		t7.2d, t6.2d, t7.2d
+
+	ext		t3.16b, t3.16b, t3.16b, #15
+	ext		t5.16b, t5.16b, t5.16b, #14
+	ext		t7.16b, t7.16b, t7.16b, #13
+	ext		t9.16b, t9.16b, t9.16b, #12
+
+	eor		t3.16b, t3.16b, t5.16b
+	eor		t7.16b, t7.16b, t9.16b
+	eor		\rq\().16b, \rq\().16b, t3.16b
+	eor		\rq\().16b, \rq\().16b, t7.16b
+	.endm
+
+	.macro		__pmull_pre_p64
+	movi		MASK.16b, #0xe1
+	shl		MASK.2d, MASK.2d, #57
+	.endm
+
+	.macro		__pmull_pre_p8
+	// k00_16 := 0x0000000000000000_000000000000ffff
+	// k32_48 := 0x00000000ffffffff_0000ffffffffffff
+	movi		k32_48.2d, #0xffffffff
+	mov		k32_48.h[2], k32_48.h[0]
+	ushr		k00_16.2d, k32_48.2d, #32
+
+	// prepare the permutation vectors
+	mov_q		x5, 0x080f0e0d0c0b0a09
+	movi		T1.8b, #8
+	dup		perm1.2d, x5
+	eor		perm1.16b, perm1.16b, T1.16b
+	ushr		perm2.2d, perm1.2d, #8
+	ushr		perm3.2d, perm1.2d, #16
+	ushr		T1.2d, perm1.2d, #24
+	sli		perm2.2d, perm1.2d, #56
+	sli		perm3.2d, perm1.2d, #48
+	sli		T1.2d, perm1.2d, #40
+
+	// precompute loop invariants
+	tbl		sh1.16b, {SHASH.16b}, perm1.16b
+	tbl		sh2.16b, {SHASH.16b}, perm2.16b
+	tbl		sh3.16b, {SHASH.16b}, perm3.16b
+	tbl		sh4.16b, {SHASH.16b}, T1.16b
+	ext		ss1.8b, SHASH2.8b, SHASH2.8b, #1
+	ext		ss2.8b, SHASH2.8b, SHASH2.8b, #2
+	ext		ss3.8b, SHASH2.8b, SHASH2.8b, #3
+	ext		ss4.8b, SHASH2.8b, SHASH2.8b, #4
+	.endm
+
+	//
+	// PMULL (64x64->128) based reduction for CPUs that can do
+	// it in a single instruction.
+	//
+	.macro		__pmull_reduce_p64
+	pmull		T2.1q, XL.1d, MASK.1d
+	eor		XM.16b, XM.16b, T1.16b
+
+	mov		XH.d[0], XM.d[1]
+	mov		XM.d[1], XL.d[0]
+
+	eor		XL.16b, XM.16b, T2.16b
+	ext		T2.16b, XL.16b, XL.16b, #8
+	pmull		XL.1q, XL.1d, MASK.1d
+	.endm
+
+	//
+	// Alternative reduction for CPUs that lack support for the
+	// 64x64->128 PMULL instruction
+	//
+	.macro		__pmull_reduce_p8
+	eor		XM.16b, XM.16b, T1.16b
+
+	mov		XL.d[1], XM.d[0]
+	mov		XH.d[0], XM.d[1]
+
+	shl		T1.2d, XL.2d, #57
+	shl		T2.2d, XL.2d, #62
+	eor		T2.16b, T2.16b, T1.16b
+	shl		T1.2d, XL.2d, #63
+	eor		T2.16b, T2.16b, T1.16b
+	ext		T1.16b, XL.16b, XH.16b, #8
+	eor		T2.16b, T2.16b, T1.16b
+
+	mov		XL.d[1], T2.d[0]
+	mov		XH.d[0], T2.d[1]
+
+	ushr		T2.2d, XL.2d, #1
+	eor		XH.16b, XH.16b, XL.16b
+	eor		XL.16b, XL.16b, T2.16b
+	ushr		T2.2d, T2.2d, #6
+	ushr		XL.2d, XL.2d, #1
+	.endm
+
+	.macro		__pmull_ghash, pn
 	ld1		{SHASH.2d}, [x3]
 	ld1		{XL.2d}, [x1]
-	movi		MASK.16b, #0xe1
 	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
-	shl		MASK.2d, MASK.2d, #57
 	eor		SHASH2.16b, SHASH2.16b, SHASH.16b
 
+	__pmull_pre_\pn
+
 	/* do the head block first, if supplied */
 	cbz		x4, 0f
 	ld1		{T1.2d}, [x4]
@@ -52,23 +236,17 @@ CPU_LE(	rev64		T1.16b, T1.16b	)
 	eor		T1.16b, T1.16b, T2.16b
 	eor		XL.16b, XL.16b, IN1.16b
 
-	pmull2		XH.1q, SHASH.2d, XL.2d		// a1 * b1
+	__pmull2_\pn	XH, XL, SHASH			// a1 * b1
 	eor		T1.16b, T1.16b, XL.16b
-	pmull		XL.1q, SHASH.1d, XL.1d		// a0 * b0
-	pmull		XM.1q, SHASH2.1d, T1.1d		// (a1 + a0)(b1 + b0)
+	__pmull_\pn 	XL, XL, SHASH			// a0 * b0
+	__pmull_\pn	XM, T1, SHASH2			// (a1 + a0)(b1 + b0)
 
-	ext		T1.16b, XL.16b, XH.16b, #8
 	eor		T2.16b, XL.16b, XH.16b
-	eor		XM.16b, XM.16b, T1.16b
+	ext		T1.16b, XL.16b, XH.16b, #8
 	eor		XM.16b, XM.16b, T2.16b
-	pmull		T2.1q, XL.1d, MASK.1d
 
-	mov		XH.d[0], XM.d[1]
-	mov		XM.d[1], XL.d[0]
+	__pmull_reduce_\pn
 
-	eor		XL.16b, XM.16b, T2.16b
-	ext		T2.16b, XL.16b, XL.16b, #8
-	pmull		XL.1q, XL.1d, MASK.1d
 	eor		T2.16b, T2.16b, XH.16b
 	eor		XL.16b, XL.16b, T2.16b
 
@@ -76,7 +254,19 @@ CPU_LE(	rev64		T1.16b, T1.16b	)
 
 	st1		{XL.2d}, [x1]
 	ret
-ENDPROC(pmull_ghash_update)
+	.endm
+
+	/*
+	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+	 *			   struct ghash_key const *k, const char *head)
+	 */
+ENTRY(pmull_ghash_update_p64)
+	__pmull_ghash	p64
+ENDPROC(pmull_ghash_update_p64)
+
+ENTRY(pmull_ghash_update_p8)
+	__pmull_ghash	p8
+ENDPROC(pmull_ghash_update_p8)
 
 	KS		.req	v8
 	CTR		.req	v9
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index ee6aaac05905d..cfc9c92814fd0 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -26,6 +26,7 @@
 MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("ghash");
 
 #define GHASH_BLOCK_SIZE	16
 #define GHASH_DIGEST_SIZE	16
@@ -48,8 +49,17 @@ struct gcm_aes_ctx {
 	struct ghash_key	ghash_key;
 };
 
-asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
-				   struct ghash_key const *k, const char *head);
+asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
+				       struct ghash_key const *k,
+				       const char *head);
+
+asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
+				      struct ghash_key const *k,
+				      const char *head);
+
+static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
+				  struct ghash_key const *k,
+				  const char *head);
 
 asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[],
 				  const u8 src[], struct ghash_key const *k,
@@ -557,13 +567,24 @@ static int __init ghash_ce_mod_init(void)
 {
 	int ret;
 
-	ret = crypto_register_aead(&gcm_aes_alg);
-	if (ret)
-		return ret;
+	if (!(elf_hwcap & HWCAP_ASIMD))
+		return -ENODEV;
+
+	if (elf_hwcap & HWCAP_PMULL)
+		pmull_ghash_update = pmull_ghash_update_p64;
+
+	else
+		pmull_ghash_update = pmull_ghash_update_p8;
 
 	ret = crypto_register_shash(&ghash_alg);
 	if (ret)
-		crypto_unregister_aead(&gcm_aes_alg);
+		return ret;
+
+	if (elf_hwcap & HWCAP_PMULL) {
+		ret = crypto_register_aead(&gcm_aes_alg);
+		if (ret)
+			crypto_unregister_shash(&ghash_alg);
+	}
 	return ret;
 }
 
@@ -573,5 +594,10 @@ static void __exit ghash_ce_mod_exit(void)
 	crypto_unregister_aead(&gcm_aes_alg);
 }
 
-module_cpu_feature_match(PMULL, ghash_ce_mod_init);
+static const struct cpu_feature ghash_cpu_feature[] = {
+	{ cpu_feature(PMULL) }, { }
+};
+MODULE_DEVICE_TABLE(cpu, ghash_cpu_feature);
+
+module_init(ghash_ce_mod_init);
 module_exit(ghash_ce_mod_exit);

From 0d149ce67d4409d7ff13c2d08c3474f3319e1b7e Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 24 Jul 2017 11:28:19 +0100
Subject: [PATCH 089/116] crypto: arm/aes - avoid expanded lookup tables in the
 final round

For the final round, avoid the expanded and padded lookup tables
exported by the generic AES driver. Instead, for encryption, we can
perform byte loads from the same table we used for the inner rounds,
which will still be hot in the caches. For decryption, use the inverse
AES Sbox directly, which is 4x smaller than the inverse lookup table
exported by the generic driver.

This should significantly reduce the Dcache footprint of our code,
which makes the code more robust against timing attacks. It does not
introduce any additional module dependencies, given that we already
rely on the core AES module for the shared key expansion routines.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/aes-cipher-core.S | 88 +++++++++++++++++++++++--------
 1 file changed, 65 insertions(+), 23 deletions(-)

diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S
index c817a86c4ca89..54b384084637b 100644
--- a/arch/arm/crypto/aes-cipher-core.S
+++ b/arch/arm/crypto/aes-cipher-core.S
@@ -10,6 +10,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/cache.h>
 
 	.text
 	.align		5
@@ -32,19 +33,19 @@
 	.endif
 	.endm
 
-	.macro		__load, out, in, idx
+	.macro		__load, out, in, idx, sz, op
 	.if		__LINUX_ARM_ARCH__ < 7 && \idx > 0
-	ldr		\out, [ttab, \in, lsr #(8 * \idx) - 2]
+	ldr\op		\out, [ttab, \in, lsr #(8 * \idx) - \sz]
 	.else
-	ldr		\out, [ttab, \in, lsl #2]
+	ldr\op		\out, [ttab, \in, lsl #\sz]
 	.endif
 	.endm
 
-	.macro		__hround, out0, out1, in0, in1, in2, in3, t3, t4, enc
+	.macro		__hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op
 	__select	\out0, \in0, 0
 	__select	t0, \in1, 1
-	__load		\out0, \out0, 0
-	__load		t0, t0, 1
+	__load		\out0, \out0, 0, \sz, \op
+	__load		t0, t0, 1, \sz, \op
 
 	.if		\enc
 	__select	\out1, \in1, 0
@@ -53,10 +54,10 @@
 	__select	\out1, \in3, 0
 	__select	t1, \in0, 1
 	.endif
-	__load		\out1, \out1, 0
+	__load		\out1, \out1, 0, \sz, \op
 	__select	t2, \in2, 2
-	__load		t1, t1, 1
-	__load		t2, t2, 2
+	__load		t1, t1, 1, \sz, \op
+	__load		t2, t2, 2, \sz, \op
 
 	eor		\out0, \out0, t0, ror #24
 
@@ -68,9 +69,9 @@
 	__select	\t3, \in1, 2
 	__select	\t4, \in2, 3
 	.endif
-	__load		\t3, \t3, 2
-	__load		t0, t0, 3
-	__load		\t4, \t4, 3
+	__load		\t3, \t3, 2, \sz, \op
+	__load		t0, t0, 3, \sz, \op
+	__load		\t4, \t4, 3, \sz, \op
 
 	eor		\out1, \out1, t1, ror #24
 	eor		\out0, \out0, t2, ror #16
@@ -82,14 +83,14 @@
 	eor		\out1, \out1, t2
 	.endm
 
-	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3
-	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
-	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
+	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
+	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
 	.endm
 
-	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3
-	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
-	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
+	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
+	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
 	.endm
 
 	.macro		__rev, out, in
@@ -114,7 +115,7 @@
 	.endif
 	.endm
 
-	.macro		do_crypt, round, ttab, ltab
+	.macro		do_crypt, round, ttab, ltab, bsz
 	push		{r3-r11, lr}
 
 	ldr		r4, [in]
@@ -146,9 +147,12 @@
 
 1:	subs		rounds, rounds, #4
 	\round		r8, r9, r10, r11, r4, r5, r6, r7
-	__adrl		ttab, \ltab, ls
+	bls		2f
 	\round		r4, r5, r6, r7, r8, r9, r10, r11
-	bhi		0b
+	b		0b
+
+2:	__adrl		ttab, \ltab
+	\round		r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
 	__rev		r4, r4
@@ -170,10 +174,48 @@
 	.ltorg
 	.endm
 
+	.align		L1_CACHE_SHIFT
+	.type		__aes_arm_inverse_sbox, %object
+__aes_arm_inverse_sbox:
+	.byte		0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+	.byte		0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+	.byte		0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+	.byte		0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+	.byte		0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+	.byte		0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+	.byte		0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+	.byte		0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+	.byte		0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+	.byte		0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+	.byte		0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+	.byte		0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+	.byte		0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+	.byte		0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+	.byte		0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+	.byte		0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+	.byte		0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+	.byte		0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+	.byte		0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+	.byte		0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+	.byte		0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+	.byte		0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+	.byte		0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+	.byte		0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+	.byte		0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+	.byte		0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+	.byte		0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+	.byte		0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+	.byte		0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+	.byte		0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+	.size		__aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox
+
 ENTRY(__aes_arm_encrypt)
-	do_crypt	fround, crypto_ft_tab, crypto_fl_tab
+	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
 ENDPROC(__aes_arm_encrypt)
 
+	.align		5
 ENTRY(__aes_arm_decrypt)
-	do_crypt	iround, crypto_it_tab, crypto_il_tab
+	do_crypt	iround, crypto_it_tab, __aes_arm_inverse_sbox, 0
 ENDPROC(__aes_arm_decrypt)

From 7c83d689c71fd6ca8829d4a34416685938368c13 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 24 Jul 2017 11:28:20 +0100
Subject: [PATCH 090/116] crypto: arm64/aes - avoid expanded lookup tables in
 the final round

For the final round, avoid the expanded and padded lookup tables
exported by the generic AES driver. Instead, for encryption, we can
perform byte loads from the same table we used for the inner rounds,
which will still be hot in the caches. For decryption, use the inverse
AES Sbox directly, which is 4x smaller than the inverse lookup table
exported by the generic driver.

This should significantly reduce the Dcache footprint of our code,
which makes the code more robust against timing attacks. It does not
introduce any additional module dependencies, given that we already
rely on the core AES module for the shared key expansion routines.
It also frees up register x18, which is not available as a scratch
register on all platforms, which and so avoiding it improves
shareability of this code.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/aes-cipher-core.S | 152 ++++++++++++++++++++--------
 1 file changed, 107 insertions(+), 45 deletions(-)

diff --git a/arch/arm64/crypto/aes-cipher-core.S b/arch/arm64/crypto/aes-cipher-core.S
index f2f9cc519309c..6d2445d603cc2 100644
--- a/arch/arm64/crypto/aes-cipher-core.S
+++ b/arch/arm64/crypto/aes-cipher-core.S
@@ -10,6 +10,7 @@
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/cache.h>
 
 	.text
 
@@ -17,94 +18,155 @@
 	out		.req	x1
 	in		.req	x2
 	rounds		.req	x3
-	tt		.req	x4
-	lt		.req	x2
+	tt		.req	x2
 
-	.macro		__pair, enc, reg0, reg1, in0, in1e, in1d, shift
+	.macro		__pair1, sz, op, reg0, reg1, in0, in1e, in1d, shift
+	.ifc		\op\shift, b0
+	ubfiz		\reg0, \in0, #2, #8
+	ubfiz		\reg1, \in1e, #2, #8
+	.else
 	ubfx		\reg0, \in0, #\shift, #8
-	.if		\enc
 	ubfx		\reg1, \in1e, #\shift, #8
-	.else
-	ubfx		\reg1, \in1d, #\shift, #8
 	.endif
+
+	/*
+	 * AArch64 cannot do byte size indexed loads from a table containing
+	 * 32-bit quantities, i.e., 'ldrb w12, [tt, w12, uxtw #2]' is not a
+	 * valid instruction. So perform the shift explicitly first for the
+	 * high bytes (the low byte is shifted implicitly by using ubfiz rather
+	 * than ubfx above)
+	 */
+	.ifnc		\op, b
 	ldr		\reg0, [tt, \reg0, uxtw #2]
 	ldr		\reg1, [tt, \reg1, uxtw #2]
+	.else
+	.if		\shift > 0
+	lsl		\reg0, \reg0, #2
+	lsl		\reg1, \reg1, #2
+	.endif
+	ldrb		\reg0, [tt, \reg0, uxtw]
+	ldrb		\reg1, [tt, \reg1, uxtw]
+	.endif
 	.endm
 
-	.macro		__hround, out0, out1, in0, in1, in2, in3, t0, t1, enc
+	.macro		__pair0, sz, op, reg0, reg1, in0, in1e, in1d, shift
+	ubfx		\reg0, \in0, #\shift, #8
+	ubfx		\reg1, \in1d, #\shift, #8
+	ldr\op		\reg0, [tt, \reg0, uxtw #\sz]
+	ldr\op		\reg1, [tt, \reg1, uxtw #\sz]
+	.endm
+
+	.macro		__hround, out0, out1, in0, in1, in2, in3, t0, t1, enc, sz, op
 	ldp		\out0, \out1, [rk], #8
 
-	__pair		\enc, w13, w14, \in0, \in1, \in3, 0
-	__pair		\enc, w15, w16, \in1, \in2, \in0, 8
-	__pair		\enc, w17, w18, \in2, \in3, \in1, 16
-	__pair		\enc, \t0, \t1, \in3, \in0, \in2, 24
-
-	eor		\out0, \out0, w13
-	eor		\out1, \out1, w14
-	eor		\out0, \out0, w15, ror #24
-	eor		\out1, \out1, w16, ror #24
-	eor		\out0, \out0, w17, ror #16
-	eor		\out1, \out1, w18, ror #16
+	__pair\enc	\sz, \op, w12, w13, \in0, \in1, \in3, 0
+	__pair\enc	\sz, \op, w14, w15, \in1, \in2, \in0, 8
+	__pair\enc	\sz, \op, w16, w17, \in2, \in3, \in1, 16
+	__pair\enc	\sz, \op, \t0, \t1, \in3, \in0, \in2, 24
+
+	eor		\out0, \out0, w12
+	eor		\out1, \out1, w13
+	eor		\out0, \out0, w14, ror #24
+	eor		\out1, \out1, w15, ror #24
+	eor		\out0, \out0, w16, ror #16
+	eor		\out1, \out1, w17, ror #16
 	eor		\out0, \out0, \t0, ror #8
 	eor		\out1, \out1, \t1, ror #8
 	.endm
 
-	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3
-	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
-	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
+	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
+	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
 	.endm
 
-	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3
-	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
-	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
+	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
+	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
 	.endm
 
-	.macro		do_crypt, round, ttab, ltab
-	ldp		w5, w6, [in]
-	ldp		w7, w8, [in, #8]
-	ldp		w9, w10, [rk], #16
-	ldp		w11, w12, [rk, #-8]
+	.macro		do_crypt, round, ttab, ltab, bsz
+	ldp		w4, w5, [in]
+	ldp		w6, w7, [in, #8]
+	ldp		w8, w9, [rk], #16
+	ldp		w10, w11, [rk, #-8]
 
+CPU_BE(	rev		w4, w4		)
 CPU_BE(	rev		w5, w5		)
 CPU_BE(	rev		w6, w6		)
 CPU_BE(	rev		w7, w7		)
-CPU_BE(	rev		w8, w8		)
 
+	eor		w4, w4, w8
 	eor		w5, w5, w9
 	eor		w6, w6, w10
 	eor		w7, w7, w11
-	eor		w8, w8, w12
 
 	adr_l		tt, \ttab
-	adr_l		lt, \ltab
 
 	tbnz		rounds, #1, 1f
 
-0:	\round		w9, w10, w11, w12, w5, w6, w7, w8
-	\round		w5, w6, w7, w8, w9, w10, w11, w12
+0:	\round		w8, w9, w10, w11, w4, w5, w6, w7
+	\round		w4, w5, w6, w7, w8, w9, w10, w11
 
 1:	subs		rounds, rounds, #4
-	\round		w9, w10, w11, w12, w5, w6, w7, w8
-	csel		tt, tt, lt, hi
-	\round		w5, w6, w7, w8, w9, w10, w11, w12
-	b.hi		0b
-
+	\round		w8, w9, w10, w11, w4, w5, w6, w7
+	b.ls		3f
+2:	\round		w4, w5, w6, w7, w8, w9, w10, w11
+	b		0b
+3:	adr_l		tt, \ltab
+	\round		w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b
+
+CPU_BE(	rev		w4, w4		)
 CPU_BE(	rev		w5, w5		)
 CPU_BE(	rev		w6, w6		)
 CPU_BE(	rev		w7, w7		)
-CPU_BE(	rev		w8, w8		)
 
-	stp		w5, w6, [out]
-	stp		w7, w8, [out, #8]
+	stp		w4, w5, [out]
+	stp		w6, w7, [out, #8]
 	ret
 	.endm
 
-	.align		5
+	.align		L1_CACHE_SHIFT
+	.type		__aes_arm64_inverse_sbox, %object
+__aes_arm64_inverse_sbox:
+	.byte		0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+	.byte		0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+	.byte		0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+	.byte		0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+	.byte		0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+	.byte		0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+	.byte		0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+	.byte		0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+	.byte		0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+	.byte		0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+	.byte		0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+	.byte		0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+	.byte		0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+	.byte		0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+	.byte		0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+	.byte		0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+	.byte		0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+	.byte		0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+	.byte		0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+	.byte		0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+	.byte		0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+	.byte		0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+	.byte		0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+	.byte		0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+	.byte		0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+	.byte		0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+	.byte		0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+	.byte		0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+	.byte		0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+	.byte		0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+	.size		__aes_arm64_inverse_sbox, . - __aes_arm64_inverse_sbox
+
 ENTRY(__aes_arm64_encrypt)
-	do_crypt	fround, crypto_ft_tab, crypto_fl_tab
+	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
 ENDPROC(__aes_arm64_encrypt)
 
 	.align		5
 ENTRY(__aes_arm64_decrypt)
-	do_crypt	iround, crypto_it_tab, crypto_il_tab
+	do_crypt	iround, crypto_it_tab, __aes_arm64_inverse_sbox, 0
 ENDPROC(__aes_arm64_decrypt)

From e652399edba99a5497f0d80f240c9075d3b43493 Mon Sep 17 00:00:00 2001
From: Gary R Hook <gary.hook@amd.com>
Date: Tue, 25 Jul 2017 14:12:11 -0500
Subject: [PATCH 091/116] crypto: ccp - Fix XTS-AES-128 support on v5 CCPs

Version 5 CCPs have some new requirements for XTS-AES: the type field
must be specified, and the key requires 512 bits, with each part
occupying 256 bits and padded with zeroes.

cc: <stable@vger.kernel.org> # 4.9.x+

Signed-off-by: Gary R Hook <ghook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-crypto-aes-xts.c |  4 ++-
 drivers/crypto/ccp/ccp-dev-v5.c         |  2 ++
 drivers/crypto/ccp/ccp-dev.h            |  2 ++
 drivers/crypto/ccp/ccp-ops.c            | 43 +++++++++++++++++++------
 include/linux/ccp.h                     |  3 +-
 5 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
index 58a4244b47529..3f26a415ef44b 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
@@ -1,8 +1,9 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) AES XTS crypto API support
  *
- * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
+ * Author: Gary R Hook <gary.hook@amd.com>
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -164,6 +165,7 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
 	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
 	INIT_LIST_HEAD(&rctx->cmd.entry);
 	rctx->cmd.engine = CCP_ENGINE_XTS_AES_128;
+	rctx->cmd.u.xts.type = CCP_AES_TYPE_128;
 	rctx->cmd.u.xts.action = (encrypt) ? CCP_AES_ACTION_ENCRYPT
 					   : CCP_AES_ACTION_DECRYPT;
 	rctx->cmd.u.xts.unit_size = unit_size;
diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c
index 6ace6dd5a239c..65604fc65e8f3 100644
--- a/drivers/crypto/ccp/ccp-dev-v5.c
+++ b/drivers/crypto/ccp/ccp-dev-v5.c
@@ -145,6 +145,7 @@ union ccp_function {
 #define	CCP_AES_MODE(p)		((p)->aes.mode)
 #define	CCP_AES_TYPE(p)		((p)->aes.type)
 #define	CCP_XTS_SIZE(p)		((p)->aes_xts.size)
+#define	CCP_XTS_TYPE(p)		((p)->aes_xts.type)
 #define	CCP_XTS_ENCRYPT(p)	((p)->aes_xts.encrypt)
 #define	CCP_DES3_SIZE(p)	((p)->des3.size)
 #define	CCP_DES3_ENCRYPT(p)	((p)->des3.encrypt)
@@ -344,6 +345,7 @@ static int ccp5_perform_xts_aes(struct ccp_op *op)
 	CCP5_CMD_PROT(&desc) = 0;
 
 	function.raw = 0;
+	CCP_XTS_TYPE(&function) = op->u.xts.type;
 	CCP_XTS_ENCRYPT(&function) = op->u.xts.action;
 	CCP_XTS_SIZE(&function) = op->u.xts.unit_size;
 	CCP5_CMD_FUNCTION(&desc) = function.raw;
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index 1f6deee117a73..6810b65c1939c 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -194,6 +194,7 @@
 #define CCP_AES_CTX_SB_COUNT		1
 
 #define CCP_XTS_AES_KEY_SB_COUNT	1
+#define CCP5_XTS_AES_KEY_SB_COUNT	2
 #define CCP_XTS_AES_CTX_SB_COUNT	1
 
 #define CCP_DES3_KEY_SB_COUNT		1
@@ -498,6 +499,7 @@ struct ccp_aes_op {
 };
 
 struct ccp_xts_aes_op {
+	enum ccp_aes_type type;
 	enum ccp_aes_action action;
 	enum ccp_xts_aes_unit_size unit_size;
 };
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index 40c062ad87263..10e3be8d93ced 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -1038,6 +1038,8 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
 	struct ccp_op op;
 	unsigned int unit_size, dm_offset;
 	bool in_place = false;
+	unsigned int sb_count;
+	enum ccp_aes_type aestype;
 	int ret;
 
 	switch (xts->unit_size) {
@@ -1061,7 +1063,9 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
 		return -EINVAL;
 	}
 
-	if (xts->key_len != AES_KEYSIZE_128)
+	if (xts->key_len == AES_KEYSIZE_128)
+		aestype = CCP_AES_TYPE_128;
+	else
 		return -EINVAL;
 
 	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
@@ -1083,23 +1087,44 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
 	op.sb_key = cmd_q->sb_key;
 	op.sb_ctx = cmd_q->sb_ctx;
 	op.init = 1;
+	op.u.xts.type = aestype;
 	op.u.xts.action = xts->action;
 	op.u.xts.unit_size = xts->unit_size;
 
-	/* All supported key sizes fit in a single (32-byte) SB entry
-	 * and must be in little endian format. Use the 256-bit byte
-	 * swap passthru option to convert from big endian to little
-	 * endian.
+	/* A version 3 device only supports 128-bit keys, which fits into a
+	 * single SB entry. A version 5 device uses a 512-bit vector, so two
+	 * SB entries.
 	 */
+	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0))
+		sb_count = CCP_XTS_AES_KEY_SB_COUNT;
+	else
+		sb_count = CCP5_XTS_AES_KEY_SB_COUNT;
 	ret = ccp_init_dm_workarea(&key, cmd_q,
-				   CCP_XTS_AES_KEY_SB_COUNT * CCP_SB_BYTES,
+				   sb_count * CCP_SB_BYTES,
 				   DMA_TO_DEVICE);
 	if (ret)
 		return ret;
 
-	dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128;
-	ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
-	ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
+	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) {
+		/* All supported key sizes must be in little endian format.
+		 * Use the 256-bit byte swap passthru option to convert from
+		 * big endian to little endian.
+		 */
+		dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128;
+		ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
+		ccp_set_dm_area(&key, 0, xts->key, xts->key_len, xts->key_len);
+	} else {
+		/* Version 5 CCPs use a 512-bit space for the key: each portion
+		 * occupies 256 bits, or one entire slot, and is zero-padded.
+		 */
+		unsigned int pad;
+
+		dm_offset = CCP_SB_BYTES;
+		pad = dm_offset - xts->key_len;
+		ccp_set_dm_area(&key, pad, xts->key, 0, xts->key_len);
+		ccp_set_dm_area(&key, dm_offset + pad, xts->key, xts->key_len,
+				xts->key_len);
+	}
 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
 			     CCP_PASSTHRU_BYTESWAP_256BIT);
 	if (ret) {
diff --git a/include/linux/ccp.h b/include/linux/ccp.h
index 3fd9a6dac3441..7e9c991c95e03 100644
--- a/include/linux/ccp.h
+++ b/include/linux/ccp.h
@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) driver
  *
- * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>
@@ -229,6 +229,7 @@ enum ccp_xts_aes_unit_size {
  * AES operation the new IV overwrites the old IV.
  */
 struct ccp_xts_aes_engine {
+	enum ccp_aes_type type;
 	enum ccp_aes_action action;
 	enum ccp_xts_aes_unit_size unit_size;
 

From 47f27f160be6d26d61d3e75923e635e4e6c099fb Mon Sep 17 00:00:00 2001
From: Gary R Hook <gary.hook@amd.com>
Date: Tue, 25 Jul 2017 14:21:23 -0500
Subject: [PATCH 092/116] crypto: ccp - Add a call to xts_check_key()

Vet the key using the available standard function

Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-crypto-aes-xts.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
index 3f26a415ef44b..2b5d3a62fad90 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
@@ -16,6 +16,7 @@
 #include <linux/delay.h>
 #include <linux/scatterlist.h>
 #include <crypto/aes.h>
+#include <crypto/xts.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/scatterwalk.h>
 
@@ -97,7 +98,13 @@ static int ccp_aes_xts_complete(struct crypto_async_request *async_req, int ret)
 static int ccp_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 			      unsigned int key_len)
 {
-	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm));
+	struct crypto_tfm *xfm = crypto_ablkcipher_tfm(tfm);
+	struct ccp_ctx *ctx = crypto_tfm_ctx(xfm);
+	int ret;
+
+	ret = xts_check_key(xfm, key, key_len);
+	if (ret)
+		return ret;
 
 	/* Only support 128-bit AES key with a 128-bit Tweak key,
 	 * otherwise use the fallback

From 7f7216cfaf3f07a41fe6e88b779bdc9690c2d515 Mon Sep 17 00:00:00 2001
From: Gary R Hook <gary.hook@amd.com>
Date: Tue, 25 Jul 2017 14:21:34 -0500
Subject: [PATCH 093/116] crypto: ccp - Rework the unit-size check for XTS-AES

The CCP supports a limited set of unit-size values. Change the check
for this parameter such that acceptable values match the enumeration.
Then clarify the conditions under which we must use the fallback
implementation.

Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-crypto-aes-xts.c | 57 +++++++++----------------
 1 file changed, 20 insertions(+), 37 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
index 2b5d3a62fad90..5c2df880ab488 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
@@ -39,46 +39,26 @@ struct ccp_unit_size_map {
 	u32 value;
 };
 
-static struct ccp_unit_size_map unit_size_map[] = {
+static struct ccp_unit_size_map xts_unit_sizes[] = {
 	{
-		.size	= 4096,
-		.value	= CCP_XTS_AES_UNIT_SIZE_4096,
-	},
-	{
-		.size	= 2048,
-		.value	= CCP_XTS_AES_UNIT_SIZE_2048,
-	},
-	{
-		.size	= 1024,
-		.value	= CCP_XTS_AES_UNIT_SIZE_1024,
+		.size   = 16,
+		.value	= CCP_XTS_AES_UNIT_SIZE_16,
 	},
 	{
-		.size	= 512,
+		.size   = 512,
 		.value	= CCP_XTS_AES_UNIT_SIZE_512,
 	},
 	{
-		.size	= 256,
-		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
-	},
-	{
-		.size	= 128,
-		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
-	},
-	{
-		.size	= 64,
-		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
-	},
-	{
-		.size	= 32,
-		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
+		.size   = 1024,
+		.value	= CCP_XTS_AES_UNIT_SIZE_1024,
 	},
 	{
-		.size	= 16,
-		.value	= CCP_XTS_AES_UNIT_SIZE_16,
+		.size   = 2048,
+		.value	= CCP_XTS_AES_UNIT_SIZE_2048,
 	},
 	{
-		.size	= 1,
-		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
+		.size   = 4096,
+		.value	= CCP_XTS_AES_UNIT_SIZE_4096,
 	},
 };
 
@@ -138,16 +118,19 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
 	if (!req->info)
 		return -EINVAL;
 
+	/* Check conditions under which the CCP can fulfill a request. The
+	 * device can handle input plaintext of a length that is a multiple
+	 * of the unit_size, bug the crypto implementation only supports
+	 * the unit_size being equal to the input length. This limits the
+	 * number of scenarios we can handle.
+	 */
 	unit_size = CCP_XTS_AES_UNIT_SIZE__LAST;
-	if (req->nbytes <= unit_size_map[0].size) {
-		for (unit = 0; unit < ARRAY_SIZE(unit_size_map); unit++) {
-			if (!(req->nbytes & (unit_size_map[unit].size - 1))) {
-				unit_size = unit_size_map[unit].value;
-				break;
-			}
+	for (unit = 0; unit < ARRAY_SIZE(xts_unit_sizes); unit++) {
+		if (req->nbytes == xts_unit_sizes[unit].size) {
+			unit_size = unit;
+			break;
 		}
 	}
-
 	if ((unit_size == CCP_XTS_AES_UNIT_SIZE__LAST) ||
 	    (ctx->u.aes.key_len != AES_KEYSIZE_128)) {
 		SKCIPHER_REQUEST_ON_STACK(subreq, ctx->u.aes.tfm_skcipher);

From 5060ffc97bc6fdd31595b3ecbc027b0a8df94b5c Mon Sep 17 00:00:00 2001
From: Gary R Hook <gary.hook@amd.com>
Date: Tue, 25 Jul 2017 14:21:43 -0500
Subject: [PATCH 094/116] crypto: ccp - Add XTS-AES-256 support for CCP version
 5

Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-crypto-aes-xts.c | 26 +++++++++++++++++++++----
 drivers/crypto/ccp/ccp-crypto.h         |  2 +-
 drivers/crypto/ccp/ccp-ops.c            |  2 ++
 3 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
index 5c2df880ab488..94b5bcf5b628a 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
@@ -80,19 +80,24 @@ static int ccp_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 {
 	struct crypto_tfm *xfm = crypto_ablkcipher_tfm(tfm);
 	struct ccp_ctx *ctx = crypto_tfm_ctx(xfm);
+	unsigned int ccpversion = ccp_version();
 	int ret;
 
 	ret = xts_check_key(xfm, key, key_len);
 	if (ret)
 		return ret;
 
-	/* Only support 128-bit AES key with a 128-bit Tweak key,
-	 * otherwise use the fallback
+	/* Version 3 devices support 128-bit keys; version 5 devices can
+	 * accommodate 128- and 256-bit keys.
 	 */
 	switch (key_len) {
 	case AES_KEYSIZE_128 * 2:
 		memcpy(ctx->u.aes.key, key, key_len);
 		break;
+	case AES_KEYSIZE_256 * 2:
+		if (ccpversion > CCP_VERSION(3, 0))
+			memcpy(ctx->u.aes.key, key, key_len);
+		break;
 	}
 	ctx->u.aes.key_len = key_len / 2;
 	sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len);
@@ -105,6 +110,8 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
 {
 	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
 	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+	unsigned int ccpversion = ccp_version();
+	unsigned int fallback = 0;
 	unsigned int unit;
 	u32 unit_size;
 	int ret;
@@ -131,8 +138,19 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
 			break;
 		}
 	}
-	if ((unit_size == CCP_XTS_AES_UNIT_SIZE__LAST) ||
-	    (ctx->u.aes.key_len != AES_KEYSIZE_128)) {
+	/* The CCP has restrictions on block sizes. Also, a version 3 device
+	 * only supports AES-128 operations; version 5 CCPs support both
+	 * AES-128 and -256 operations.
+	 */
+	if (unit_size == CCP_XTS_AES_UNIT_SIZE__LAST)
+		fallback = 1;
+	if ((ccpversion < CCP_VERSION(5, 0)) &&
+	    (ctx->u.aes.key_len != AES_KEYSIZE_128))
+		fallback = 1;
+	if ((ctx->u.aes.key_len != AES_KEYSIZE_128) &&
+	    (ctx->u.aes.key_len != AES_KEYSIZE_256))
+		fallback = 1;
+	if (fallback) {
 		SKCIPHER_REQUEST_ON_STACK(subreq, ctx->u.aes.tfm_skcipher);
 
 		/* Use the fallback to process the request for any
diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h
index 67c7620029e38..b9fd090c46c25 100644
--- a/drivers/crypto/ccp/ccp-crypto.h
+++ b/drivers/crypto/ccp/ccp-crypto.h
@@ -99,7 +99,7 @@ struct ccp_aes_ctx {
 
 	struct scatterlist key_sg;
 	unsigned int key_len;
-	u8 key[AES_MAX_KEY_SIZE];
+	u8 key[AES_MAX_KEY_SIZE * 2];
 
 	u8 nonce[CTR_RFC3686_NONCE_SIZE];
 
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index 10e3be8d93ced..cc16cb0fc3af4 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -1065,6 +1065,8 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
 
 	if (xts->key_len == AES_KEYSIZE_128)
 		aestype = CCP_AES_TYPE_128;
+	else if (xts->key_len == AES_KEYSIZE_256)
+		aestype = CCP_AES_TYPE_256;
 	else
 		return -EINVAL;
 

From 5703c826b758e0b33c998739af093879979315b8 Mon Sep 17 00:00:00 2001
From: Stephan Mueller <smueller@chronox.de>
Date: Sun, 30 Jul 2017 14:31:18 +0200
Subject: [PATCH 095/116] crypto: algif - return error code when no data was
 processed

If no data has been processed during recvmsg, return the error code.
This covers all errors received during non-AIO operations.

If any error occurs during a synchronous operation in addition to
-EIOCBQUEUED or -EBADMSG (like -ENOMEM), it should be relayed to the
caller.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algif_aead.c     | 4 +++-
 crypto/algif_skcipher.c | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 9755aac0fe26f..2de056c3139cf 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -791,9 +791,11 @@ static int aead_recvmsg(struct socket *sock, struct msghdr *msg,
 		 * only handle one AIO request. If the caller wants to have
 		 * multiple AIO requests in parallel, he must make multiple
 		 * separate AIO calls.
+		 *
+		 * Also return the error if no data has been processed so far.
 		 */
 		if (err <= 0) {
-			if (err == -EIOCBQUEUED || err == -EBADMSG)
+			if (err == -EIOCBQUEUED || err == -EBADMSG || !ret)
 				ret = err;
 			goto out;
 		}
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index 968d094f0bcc0..ce3b5fba22799 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -714,9 +714,11 @@ static int skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
 		 * only handle one AIO request. If the caller wants to have
 		 * multiple AIO requests in parallel, he must make multiple
 		 * separate AIO calls.
+		 *
+		 * Also return the error if no data has been processed so far.
 		 */
 		if (err <= 0) {
-			if (err == -EIOCBQUEUED)
+			if (err == -EIOCBQUEUED || !ret)
 				ret = err;
 			goto out;
 		}

From 72548b093ee38a6d4f2a19e6ef1948ae05c181f7 Mon Sep 17 00:00:00 2001
From: Stephan Mueller <smueller@chronox.de>
Date: Sun, 30 Jul 2017 14:32:58 +0200
Subject: [PATCH 096/116] crypto: algif_aead - copy AAD from src to dst

Use the NULL cipher to copy the AAD and PT/CT from the TX SGL
to the RX SGL. This allows an in-place crypto operation on the
RX SGL for encryption, because the TX data is always smaller or
equal to the RX data (the RX data will hold the tag).

For decryption, a per-request TX SGL is created which will only hold
the tag value. As the RX SGL will have no space for the tag value and
an in-place operation will not write the tag buffer, the TX SGL with the
tag value is chained to the RX SGL. This now allows an in-place
crypto operation.

For example:

* without the patch:
kcapi -x 2 -e -c "gcm(aes)" -p 89154d0d4129d322e4487bafaa4f6b46 -k c0ece3e63198af382b5603331cc23fa8 -i 7e489b83622e7228314d878d -a afcd7202d621e06ca53b70c2bdff7fb2 -l 16 -u -s
00000000000000000000000000000000f4a3eacfbdadd3b1a17117b1d67ffc1f1e21efbbc6d83724a8c296e3bb8cda0c

* with the patch:
kcapi -x 2 -e -c "gcm(aes)" -p 89154d0d4129d322e4487bafaa4f6b46 -k c0ece3e63198af382b5603331cc23fa8 -i 7e489b83622e7228314d878d -a afcd7202d621e06ca53b70c2bdff7fb2 -l 16 -u -s
afcd7202d621e06ca53b70c2bdff7fb2f4a3eacfbdadd3b1a17117b1d67ffc1f1e21efbbc6d83724a8c296e3bb8cda0c

Tests covering this functionality have been added to libkcapi.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Kconfig      |   2 +
 crypto/algif_aead.c | 183 ++++++++++++++++++++++++++++++++++++++------
 2 files changed, 162 insertions(+), 23 deletions(-)

diff --git a/crypto/Kconfig b/crypto/Kconfig
index caa770e535a2e..0a121f9ddf8e3 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1753,6 +1753,8 @@ config CRYPTO_USER_API_AEAD
 	tristate "User-space interface for AEAD cipher algorithms"
 	depends on NET
 	select CRYPTO_AEAD
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_NULL
 	select CRYPTO_USER_API
 	help
 	  This option enables the user-spaces interface for AEAD
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 2de056c3139cf..1f0696dd64f41 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -30,6 +30,8 @@
 #include <crypto/internal/aead.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/if_alg.h>
+#include <crypto/skcipher.h>
+#include <crypto/null.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/kernel.h>
@@ -70,6 +72,7 @@ struct aead_async_req {
 struct aead_tfm {
 	struct crypto_aead *aead;
 	bool has_key;
+	struct crypto_skcipher *null_tfm;
 };
 
 struct aead_ctx {
@@ -168,7 +171,12 @@ static int aead_alloc_tsgl(struct sock *sk)
 	return 0;
 }
 
-static unsigned int aead_count_tsgl(struct sock *sk, size_t bytes)
+/**
+ * Count number of SG entries from the beginning of the SGL to @bytes. If
+ * an offset is provided, the counting of the SG entries starts at the offset.
+ */
+static unsigned int aead_count_tsgl(struct sock *sk, size_t bytes,
+				    size_t offset)
 {
 	struct alg_sock *ask = alg_sk(sk);
 	struct aead_ctx *ctx = ask->private;
@@ -183,32 +191,55 @@ static unsigned int aead_count_tsgl(struct sock *sk, size_t bytes)
 		struct scatterlist *sg = sgl->sg;
 
 		for (i = 0; i < sgl->cur; i++) {
+			size_t bytes_count;
+
+			/* Skip offset */
+			if (offset >= sg[i].length) {
+				offset -= sg[i].length;
+				bytes -= sg[i].length;
+				continue;
+			}
+
+			bytes_count = sg[i].length - offset;
+
+			offset = 0;
 			sgl_count++;
-			if (sg[i].length >= bytes)
+
+			/* If we have seen requested number of bytes, stop */
+			if (bytes_count >= bytes)
 				return sgl_count;
 
-			bytes -= sg[i].length;
+			bytes -= bytes_count;
 		}
 	}
 
 	return sgl_count;
 }
 
+/**
+ * Release the specified buffers from TX SGL pointed to by ctx->tsgl_list for
+ * @used bytes.
+ *
+ * If @dst is non-null, reassign the pages to dst. The caller must release
+ * the pages. If @dst_offset is given only reassign the pages to @dst starting
+ * at the @dst_offset (byte). The caller must ensure that @dst is large
+ * enough (e.g. by using aead_count_tsgl with the same offset).
+ */
 static void aead_pull_tsgl(struct sock *sk, size_t used,
-			   struct scatterlist *dst)
+			   struct scatterlist *dst, size_t dst_offset)
 {
 	struct alg_sock *ask = alg_sk(sk);
 	struct aead_ctx *ctx = ask->private;
 	struct aead_tsgl *sgl;
 	struct scatterlist *sg;
-	unsigned int i;
+	unsigned int i, j;
 
 	while (!list_empty(&ctx->tsgl_list)) {
 		sgl = list_first_entry(&ctx->tsgl_list, struct aead_tsgl,
 				       list);
 		sg = sgl->sg;
 
-		for (i = 0; i < sgl->cur; i++) {
+		for (i = 0, j = 0; i < sgl->cur; i++) {
 			size_t plen = min_t(size_t, used, sg[i].length);
 			struct page *page = sg_page(sg + i);
 
@@ -219,8 +250,20 @@ static void aead_pull_tsgl(struct sock *sk, size_t used,
 			 * Assumption: caller created aead_count_tsgl(len)
 			 * SG entries in dst.
 			 */
-			if (dst)
-				sg_set_page(dst + i, page, plen, sg[i].offset);
+			if (dst) {
+				if (dst_offset >= plen) {
+					/* discard page before offset */
+					dst_offset -= plen;
+					put_page(page);
+				} else {
+					/* reassign page to dst after offset */
+					sg_set_page(dst + j, page,
+						    plen - dst_offset,
+						    sg[i].offset + dst_offset);
+					dst_offset = 0;
+					j++;
+				}
+			}
 
 			sg[i].length -= plen;
 			sg[i].offset += plen;
@@ -233,6 +276,7 @@ static void aead_pull_tsgl(struct sock *sk, size_t used,
 
 			if (!dst)
 				put_page(page);
+
 			sg_assign_page(sg + i, NULL);
 		}
 
@@ -583,6 +627,20 @@ static void aead_async_cb(struct crypto_async_request *_req, int err)
 	release_sock(sk);
 }
 
+static int crypto_aead_copy_sgl(struct crypto_skcipher *null_tfm,
+				struct scatterlist *src,
+				struct scatterlist *dst, unsigned int len)
+{
+	SKCIPHER_REQUEST_ON_STACK(skreq, null_tfm);
+
+	skcipher_request_set_tfm(skreq, null_tfm);
+	skcipher_request_set_callback(skreq, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				      NULL, NULL);
+	skcipher_request_set_crypt(skreq, src, dst, len, NULL);
+
+	return crypto_skcipher_encrypt(skreq);
+}
+
 static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 			 size_t ignored, int flags)
 {
@@ -593,11 +651,14 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 	struct aead_ctx *ctx = ask->private;
 	struct aead_tfm *aeadc = pask->private;
 	struct crypto_aead *tfm = aeadc->aead;
+	struct crypto_skcipher *null_tfm = aeadc->null_tfm;
 	unsigned int as = crypto_aead_authsize(tfm);
 	unsigned int areqlen =
 		sizeof(struct aead_async_req) + crypto_aead_reqsize(tfm);
 	struct aead_async_req *areq;
 	struct aead_rsgl *last_rsgl = NULL;
+	struct aead_tsgl *tsgl;
+	struct scatterlist *src;
 	int err = 0;
 	size_t used = 0;		/* [in]  TX bufs to be en/decrypted */
 	size_t outlen = 0;		/* [out] RX bufs produced by kernel */
@@ -716,25 +777,91 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 		outlen -= less;
 	}
 
+	processed = used + ctx->aead_assoclen;
+	tsgl = list_first_entry(&ctx->tsgl_list, struct aead_tsgl, list);
+
 	/*
-	 * Create a per request TX SGL for this request which tracks the
-	 * SG entries from the global TX SGL.
+	 * Copy of AAD from source to destination
+	 *
+	 * The AAD is copied to the destination buffer without change. Even
+	 * when user space uses an in-place cipher operation, the kernel
+	 * will copy the data as it does not see whether such in-place operation
+	 * is initiated.
+	 *
+	 * To ensure efficiency, the following implementation ensure that the
+	 * ciphers are invoked to perform a crypto operation in-place. This
+	 * is achieved by memory management specified as follows.
 	 */
-	processed = used + ctx->aead_assoclen;
-	areq->tsgl_entries = aead_count_tsgl(sk, processed);
-	if (!areq->tsgl_entries)
-		areq->tsgl_entries = 1;
-	areq->tsgl = sock_kmalloc(sk, sizeof(*areq->tsgl) * areq->tsgl_entries,
-				  GFP_KERNEL);
-	if (!areq->tsgl) {
-		err = -ENOMEM;
-		goto free;
+
+	/* Use the RX SGL as source (and destination) for crypto op. */
+	src = areq->first_rsgl.sgl.sg;
+
+	if (ctx->enc) {
+		/*
+		 * Encryption operation - The in-place cipher operation is
+		 * achieved by the following operation:
+		 *
+		 * TX SGL: AAD || PT || Tag
+		 *	    |	   |
+		 *	    | copy |
+		 *	    v	   v
+		 * RX SGL: AAD || PT
+		 */
+		err = crypto_aead_copy_sgl(null_tfm, tsgl->sg,
+					   areq->first_rsgl.sgl.sg, processed);
+		if (err)
+			goto free;
+		aead_pull_tsgl(sk, processed, NULL, 0);
+	} else {
+		/*
+		 * Decryption operation - To achieve an in-place cipher
+		 * operation, the following  SGL structure is used:
+		 *
+		 * TX SGL: AAD || CT || Tag
+		 *	    |	   |	 ^
+		 *	    | copy |	 | Create SGL link.
+		 *	    v	   v	 |
+		 * RX SGL: AAD || CT ----+
+		 */
+
+		 /* Copy AAD || CT to RX SGL buffer for in-place operation. */
+		err = crypto_aead_copy_sgl(null_tfm, tsgl->sg,
+					   areq->first_rsgl.sgl.sg, outlen);
+		if (err)
+			goto free;
+
+		/* Create TX SGL for tag and chain it to RX SGL. */
+		areq->tsgl_entries = aead_count_tsgl(sk, processed,
+						     processed - as);
+		if (!areq->tsgl_entries)
+			areq->tsgl_entries = 1;
+		areq->tsgl = sock_kmalloc(sk, sizeof(*areq->tsgl) *
+					      areq->tsgl_entries,
+					  GFP_KERNEL);
+		if (!areq->tsgl) {
+			err = -ENOMEM;
+			goto free;
+		}
+		sg_init_table(areq->tsgl, areq->tsgl_entries);
+
+		/* Release TX SGL, except for tag data and reassign tag data. */
+		aead_pull_tsgl(sk, processed, areq->tsgl, processed - as);
+
+		/* chain the areq TX SGL holding the tag with RX SGL */
+		if (last_rsgl) {
+			/* RX SGL present */
+			struct af_alg_sgl *sgl_prev = &last_rsgl->sgl;
+
+			sg_unmark_end(sgl_prev->sg + sgl_prev->npages - 1);
+			sg_chain(sgl_prev->sg, sgl_prev->npages + 1,
+				 areq->tsgl);
+		} else
+			/* no RX SGL present (e.g. authentication only) */
+			src = areq->tsgl;
 	}
-	sg_init_table(areq->tsgl, areq->tsgl_entries);
-	aead_pull_tsgl(sk, processed, areq->tsgl);
 
 	/* Initialize the crypto operation */
-	aead_request_set_crypt(&areq->aead_req, areq->tsgl,
+	aead_request_set_crypt(&areq->aead_req, src,
 			       areq->first_rsgl.sgl.sg, used, ctx->iv);
 	aead_request_set_ad(&areq->aead_req, ctx->aead_assoclen);
 	aead_request_set_tfm(&areq->aead_req, tfm);
@@ -951,6 +1078,7 @@ static void *aead_bind(const char *name, u32 type, u32 mask)
 {
 	struct aead_tfm *tfm;
 	struct crypto_aead *aead;
+	struct crypto_skcipher *null_tfm;
 
 	tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
 	if (!tfm)
@@ -962,7 +1090,15 @@ static void *aead_bind(const char *name, u32 type, u32 mask)
 		return ERR_CAST(aead);
 	}
 
+	null_tfm = crypto_get_default_null_skcipher2();
+	if (IS_ERR(null_tfm)) {
+		crypto_free_aead(aead);
+		kfree(tfm);
+		return ERR_CAST(null_tfm);
+	}
+
 	tfm->aead = aead;
+	tfm->null_tfm = null_tfm;
 
 	return tfm;
 }
@@ -1003,7 +1139,8 @@ static void aead_sock_destruct(struct sock *sk)
 	struct crypto_aead *tfm = aeadc->aead;
 	unsigned int ivlen = crypto_aead_ivsize(tfm);
 
-	aead_pull_tsgl(sk, ctx->used, NULL);
+	aead_pull_tsgl(sk, ctx->used, NULL, 0);
+	crypto_put_default_null_skcipher2();
 	sock_kzfree_s(sk, ctx->iv, ivlen);
 	sock_kfree_s(sk, ctx, ctx->len);
 	af_alg_release_parent(sk);

From c871c10e4ea763e76e30f6ecb64dc9938ae2e6b9 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 31 Jul 2017 22:43:55 +0200
Subject: [PATCH 097/116] crypto: serpent - improve __serpent_setkey with UBSAN

When UBSAN is enabled, we get a very large stack frame for
__serpent_setkey, when the register allocator ends up using more registers
than it has, and has to spill temporary values to the stack. The code
was originally optimized for in-order x86-32 CPU implementations using
older compilers, but it now runs into a highly suboptimal case on all
CPU architectures, as seen by this warning:

crypto/serpent_generic.c: In function '__serpent_setkey':
crypto/serpent_generic.c:436:1: error: the frame size of 2720 bytes is larger than 2048 bytes [-Werror=frame-larger-than=]

Disabling -fsanitize=alignment would avoid that warning, presumably the
option turns off a optimization step that is required for getting the
register allocation right, but there is no easy way to do that on gcc-7
(gcc-8 introduces a function attribute for this).

I tried to figure out a way to modify the source code instead, and noticed
that the two stages of the setkey() function (keyiter and sbox) each are
fine by themselves, but not when combined into one function. Splitting
out the entire sbox into a separate function also happens to work fine
with all compilers I tried (arm, arm64 and x86).

The setkey function uses a strange way to handle offsets into the key
array, using both negative and positive index values, as well as adjusting
the array pointer back and forth. I have checked that this actually
makes no difference to modern compilers, but I left that untouched
to make the patch easier to review and to keep the code closer to
the reference implementation.

Link: https://patchwork.kernel.org/patch/9189575/
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/serpent_generic.c | 77 +++++++++++++++++++++-------------------
 1 file changed, 41 insertions(+), 36 deletions(-)

diff --git a/crypto/serpent_generic.c b/crypto/serpent_generic.c
index 94970a794975a..7c3382facc82e 100644
--- a/crypto/serpent_generic.c
+++ b/crypto/serpent_generic.c
@@ -229,6 +229,46 @@
 	x4 ^= x2;					\
 	})
 
+static void __serpent_setkey_sbox(u32 r0, u32 r1, u32 r2, u32 r3, u32 r4, u32 *k)
+{
+	k += 100;
+	S3(r3, r4, r0, r1, r2); store_and_load_keys(r1, r2, r4, r3, 28, 24);
+	S4(r1, r2, r4, r3, r0); store_and_load_keys(r2, r4, r3, r0, 24, 20);
+	S5(r2, r4, r3, r0, r1); store_and_load_keys(r1, r2, r4, r0, 20, 16);
+	S6(r1, r2, r4, r0, r3); store_and_load_keys(r4, r3, r2, r0, 16, 12);
+	S7(r4, r3, r2, r0, r1); store_and_load_keys(r1, r2, r0, r4, 12, 8);
+	S0(r1, r2, r0, r4, r3); store_and_load_keys(r0, r2, r4, r1, 8, 4);
+	S1(r0, r2, r4, r1, r3); store_and_load_keys(r3, r4, r1, r0, 4, 0);
+	S2(r3, r4, r1, r0, r2); store_and_load_keys(r2, r4, r3, r0, 0, -4);
+	S3(r2, r4, r3, r0, r1); store_and_load_keys(r0, r1, r4, r2, -4, -8);
+	S4(r0, r1, r4, r2, r3); store_and_load_keys(r1, r4, r2, r3, -8, -12);
+	S5(r1, r4, r2, r3, r0); store_and_load_keys(r0, r1, r4, r3, -12, -16);
+	S6(r0, r1, r4, r3, r2); store_and_load_keys(r4, r2, r1, r3, -16, -20);
+	S7(r4, r2, r1, r3, r0); store_and_load_keys(r0, r1, r3, r4, -20, -24);
+	S0(r0, r1, r3, r4, r2); store_and_load_keys(r3, r1, r4, r0, -24, -28);
+	k -= 50;
+	S1(r3, r1, r4, r0, r2); store_and_load_keys(r2, r4, r0, r3, 22, 18);
+	S2(r2, r4, r0, r3, r1); store_and_load_keys(r1, r4, r2, r3, 18, 14);
+	S3(r1, r4, r2, r3, r0); store_and_load_keys(r3, r0, r4, r1, 14, 10);
+	S4(r3, r0, r4, r1, r2); store_and_load_keys(r0, r4, r1, r2, 10, 6);
+	S5(r0, r4, r1, r2, r3); store_and_load_keys(r3, r0, r4, r2, 6, 2);
+	S6(r3, r0, r4, r2, r1); store_and_load_keys(r4, r1, r0, r2, 2, -2);
+	S7(r4, r1, r0, r2, r3); store_and_load_keys(r3, r0, r2, r4, -2, -6);
+	S0(r3, r0, r2, r4, r1); store_and_load_keys(r2, r0, r4, r3, -6, -10);
+	S1(r2, r0, r4, r3, r1); store_and_load_keys(r1, r4, r3, r2, -10, -14);
+	S2(r1, r4, r3, r2, r0); store_and_load_keys(r0, r4, r1, r2, -14, -18);
+	S3(r0, r4, r1, r2, r3); store_and_load_keys(r2, r3, r4, r0, -18, -22);
+	k -= 50;
+	S4(r2, r3, r4, r0, r1); store_and_load_keys(r3, r4, r0, r1, 28, 24);
+	S5(r3, r4, r0, r1, r2); store_and_load_keys(r2, r3, r4, r1, 24, 20);
+	S6(r2, r3, r4, r1, r0); store_and_load_keys(r4, r0, r3, r1, 20, 16);
+	S7(r4, r0, r3, r1, r2); store_and_load_keys(r2, r3, r1, r4, 16, 12);
+	S0(r2, r3, r1, r4, r0); store_and_load_keys(r1, r3, r4, r2, 12, 8);
+	S1(r1, r3, r4, r2, r0); store_and_load_keys(r0, r4, r2, r1, 8, 4);
+	S2(r0, r4, r2, r1, r3); store_and_load_keys(r3, r4, r0, r1, 4, 0);
+	S3(r3, r4, r0, r1, r2); storekeys(r1, r2, r4, r3, 0);
+}
+
 int __serpent_setkey(struct serpent_ctx *ctx, const u8 *key,
 		     unsigned int keylen)
 {
@@ -395,42 +435,7 @@ int __serpent_setkey(struct serpent_ctx *ctx, const u8 *key,
 	keyiter(k[23], r1, r0, r3, 131, 31);
 
 	/* Apply S-boxes */
-
-	S3(r3, r4, r0, r1, r2); store_and_load_keys(r1, r2, r4, r3, 28, 24);
-	S4(r1, r2, r4, r3, r0); store_and_load_keys(r2, r4, r3, r0, 24, 20);
-	S5(r2, r4, r3, r0, r1); store_and_load_keys(r1, r2, r4, r0, 20, 16);
-	S6(r1, r2, r4, r0, r3); store_and_load_keys(r4, r3, r2, r0, 16, 12);
-	S7(r4, r3, r2, r0, r1); store_and_load_keys(r1, r2, r0, r4, 12, 8);
-	S0(r1, r2, r0, r4, r3); store_and_load_keys(r0, r2, r4, r1, 8, 4);
-	S1(r0, r2, r4, r1, r3); store_and_load_keys(r3, r4, r1, r0, 4, 0);
-	S2(r3, r4, r1, r0, r2); store_and_load_keys(r2, r4, r3, r0, 0, -4);
-	S3(r2, r4, r3, r0, r1); store_and_load_keys(r0, r1, r4, r2, -4, -8);
-	S4(r0, r1, r4, r2, r3); store_and_load_keys(r1, r4, r2, r3, -8, -12);
-	S5(r1, r4, r2, r3, r0); store_and_load_keys(r0, r1, r4, r3, -12, -16);
-	S6(r0, r1, r4, r3, r2); store_and_load_keys(r4, r2, r1, r3, -16, -20);
-	S7(r4, r2, r1, r3, r0); store_and_load_keys(r0, r1, r3, r4, -20, -24);
-	S0(r0, r1, r3, r4, r2); store_and_load_keys(r3, r1, r4, r0, -24, -28);
-	k -= 50;
-	S1(r3, r1, r4, r0, r2); store_and_load_keys(r2, r4, r0, r3, 22, 18);
-	S2(r2, r4, r0, r3, r1); store_and_load_keys(r1, r4, r2, r3, 18, 14);
-	S3(r1, r4, r2, r3, r0); store_and_load_keys(r3, r0, r4, r1, 14, 10);
-	S4(r3, r0, r4, r1, r2); store_and_load_keys(r0, r4, r1, r2, 10, 6);
-	S5(r0, r4, r1, r2, r3); store_and_load_keys(r3, r0, r4, r2, 6, 2);
-	S6(r3, r0, r4, r2, r1); store_and_load_keys(r4, r1, r0, r2, 2, -2);
-	S7(r4, r1, r0, r2, r3); store_and_load_keys(r3, r0, r2, r4, -2, -6);
-	S0(r3, r0, r2, r4, r1); store_and_load_keys(r2, r0, r4, r3, -6, -10);
-	S1(r2, r0, r4, r3, r1); store_and_load_keys(r1, r4, r3, r2, -10, -14);
-	S2(r1, r4, r3, r2, r0); store_and_load_keys(r0, r4, r1, r2, -14, -18);
-	S3(r0, r4, r1, r2, r3); store_and_load_keys(r2, r3, r4, r0, -18, -22);
-	k -= 50;
-	S4(r2, r3, r4, r0, r1); store_and_load_keys(r3, r4, r0, r1, 28, 24);
-	S5(r3, r4, r0, r1, r2); store_and_load_keys(r2, r3, r4, r1, 24, 20);
-	S6(r2, r3, r4, r1, r0); store_and_load_keys(r4, r0, r3, r1, 20, 16);
-	S7(r4, r0, r3, r1, r2); store_and_load_keys(r2, r3, r1, r4, 16, 12);
-	S0(r2, r3, r1, r4, r0); store_and_load_keys(r1, r3, r4, r2, 12, 8);
-	S1(r1, r3, r4, r2, r0); store_and_load_keys(r0, r4, r2, r1, 8, 4);
-	S2(r0, r4, r2, r1, r3); store_and_load_keys(r3, r4, r0, r1, 4, 0);
-	S3(r3, r4, r0, r1, r2); storekeys(r1, r2, r4, r3, 0);
+	__serpent_setkey_sbox(r0, r1, r2, r3, r4, ctx->expkey);
 
 	return 0;
 }

From d634baea628af60d20a82db911b9dc99a5f16244 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 31 Jul 2017 22:49:21 +0200
Subject: [PATCH 098/116] crypto: ccp - avoid uninitialized variable warning

The added support for version 5 CCPs introduced a false-positive
warning in the RSA implementation:

drivers/crypto/ccp/ccp-ops.c: In function 'ccp_run_rsa_cmd':
drivers/crypto/ccp/ccp-ops.c:1856:3: error: 'sb_count' may be used uninitialized in this function [-Werror=maybe-uninitialized]

This changes the code in a way that should make it easier for
the compiler to track the state of the sb_count variable, and
avoid the warning.

Fixes: 6ba46c7d4d7e ("crypto: ccp - Fix base RSA function for version 5 CCPs")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-ops.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index cc16cb0fc3af4..804924e48c898 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -1785,6 +1785,7 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	o_len = 32 * ((rsa->key_size + 255) / 256);
 	i_len = o_len * 2;
 
+	sb_count = 0;
 	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) {
 		/* sb_count is the number of storage block slots required
 		 * for the modulus.
@@ -1879,7 +1880,7 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	ccp_dm_free(&exp);
 
 e_sb:
-	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0))
+	if (sb_count)
 		cmd_q->ccp->vdata->perform->sbfree(cmd_q, op.sb_key, sb_count);
 
 	return ret;

From ac360faf955125d362128835e479da0e2646b341 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 31 Jul 2017 23:10:57 +0200
Subject: [PATCH 099/116] crypto: ccp - select CONFIG_CRYPTO_RSA

Without the base RSA code, we run into a link error:

ERROR: "rsa_parse_pub_key" [drivers/crypto/ccp/ccp-crypto.ko] undefined!
ERROR: "rsa_parse_priv_key" [drivers/crypto/ccp/ccp-crypto.ko] undefined!

Like the other drivers implementing RSA in hardware, this
can be avoided by always enabling the base support when we build
CCP.

Fixes: ceeec0afd684 ("crypto: ccp - Add support for RSA on the CCP")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig
index 15b63fd3d180d..6d626606b9c51 100644
--- a/drivers/crypto/ccp/Kconfig
+++ b/drivers/crypto/ccp/Kconfig
@@ -27,6 +27,7 @@ config CRYPTO_DEV_CCP_CRYPTO
 	select CRYPTO_HASH
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_AUTHENC
+	select CRYPTO_RSA
 	help
 	  Support for using the cryptographic API with the AMD Cryptographic
 	  Coprocessor. This module supports offload of SHA and AES algorithms.

From a92f7af3854ce6b80a4cd7e3df6148663f15671b Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Tue, 1 Aug 2017 10:45:01 -0300
Subject: [PATCH 100/116] crypto: caam - Remove unused dentry members
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Most of the dentry members from structure caam_drv_private
are never used at all, so it is safe to remove them.

Since debugfs_remove_recursive() is called, we don't need the
file entries.

Signed-off-by: Fabio Estevam <festevam@gmail.com>
Acked-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/ctrl.c   | 81 +++++++++++++-----------------------
 drivers/crypto/caam/intern.h |  8 ----
 drivers/crypto/caam/qi.c     |  6 +--
 3 files changed, 32 insertions(+), 63 deletions(-)

diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index fdbcba13824c2..dacb53fb690e9 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -747,59 +747,38 @@ static int caam_probe(struct platform_device *pdev)
 		 caam_dpaa2 ? "yes" : "no");
 
 #ifdef CONFIG_DEBUG_FS
-
-	ctrlpriv->ctl_rq_dequeued =
-		debugfs_create_file("rq_dequeued",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->req_dequeued,
-				    &caam_fops_u64_ro);
-	ctrlpriv->ctl_ob_enc_req =
-		debugfs_create_file("ob_rq_encrypted",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->ob_enc_req,
-				    &caam_fops_u64_ro);
-	ctrlpriv->ctl_ib_dec_req =
-		debugfs_create_file("ib_rq_decrypted",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->ib_dec_req,
-				    &caam_fops_u64_ro);
-	ctrlpriv->ctl_ob_enc_bytes =
-		debugfs_create_file("ob_bytes_encrypted",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->ob_enc_bytes,
-				    &caam_fops_u64_ro);
-	ctrlpriv->ctl_ob_prot_bytes =
-		debugfs_create_file("ob_bytes_protected",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->ob_prot_bytes,
-				    &caam_fops_u64_ro);
-	ctrlpriv->ctl_ib_dec_bytes =
-		debugfs_create_file("ib_bytes_decrypted",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->ib_dec_bytes,
-				    &caam_fops_u64_ro);
-	ctrlpriv->ctl_ib_valid_bytes =
-		debugfs_create_file("ib_bytes_validated",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->ib_valid_bytes,
-				    &caam_fops_u64_ro);
+	debugfs_create_file("rq_dequeued", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->req_dequeued,
+			    &caam_fops_u64_ro);
+	debugfs_create_file("ob_rq_encrypted", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->ob_enc_req,
+			    &caam_fops_u64_ro);
+	debugfs_create_file("ib_rq_decrypted", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->ib_dec_req,
+			    &caam_fops_u64_ro);
+	debugfs_create_file("ob_bytes_encrypted", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->ob_enc_bytes,
+			    &caam_fops_u64_ro);
+	debugfs_create_file("ob_bytes_protected", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->ob_prot_bytes,
+			    &caam_fops_u64_ro);
+	debugfs_create_file("ib_bytes_decrypted", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->ib_dec_bytes,
+			    &caam_fops_u64_ro);
+	debugfs_create_file("ib_bytes_validated", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->ib_valid_bytes,
+			    &caam_fops_u64_ro);
 
 	/* Controller level - global status values */
-	ctrlpriv->ctl_faultaddr =
-		debugfs_create_file("fault_addr",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->faultaddr,
-				    &caam_fops_u32_ro);
-	ctrlpriv->ctl_faultdetail =
-		debugfs_create_file("fault_detail",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->faultdetail,
-				    &caam_fops_u32_ro);
-	ctrlpriv->ctl_faultstatus =
-		debugfs_create_file("fault_status",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->status,
-				    &caam_fops_u32_ro);
+	debugfs_create_file("fault_addr", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->faultaddr,
+			    &caam_fops_u32_ro);
+	debugfs_create_file("fault_detail", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->faultdetail,
+			    &caam_fops_u32_ro);
+	debugfs_create_file("fault_status", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->status,
+			    &caam_fops_u32_ro);
 
 	/* Internal covering keys (useful in non-secure mode only) */
 	ctrlpriv->ctl_kek_wrap.data = (__force void *)&ctrlpriv->ctrl->kek[0];
diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h
index 9e3f3e0a7ffa1..a52361258d3a0 100644
--- a/drivers/crypto/caam/intern.h
+++ b/drivers/crypto/caam/intern.h
@@ -102,16 +102,8 @@ struct caam_drv_private {
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *dfs_root;
 	struct dentry *ctl; /* controller dir */
-	struct dentry *ctl_rq_dequeued, *ctl_ob_enc_req, *ctl_ib_dec_req;
-	struct dentry *ctl_ob_enc_bytes, *ctl_ob_prot_bytes;
-	struct dentry *ctl_ib_dec_bytes, *ctl_ib_valid_bytes;
-	struct dentry *ctl_faultaddr, *ctl_faultdetail, *ctl_faultstatus;
-
 	struct debugfs_blob_wrapper ctl_kek_wrap, ctl_tkek_wrap, ctl_tdsk_wrap;
 	struct dentry *ctl_kek, *ctl_tkek, *ctl_tdsk;
-#ifdef CONFIG_CAAM_QI
-	struct dentry *qi_congested;
-#endif
 #endif
 };
 
diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c
index 9a4151ac5e37c..e4cf000142331 100644
--- a/drivers/crypto/caam/qi.c
+++ b/drivers/crypto/caam/qi.c
@@ -791,10 +791,8 @@ int caam_qi_init(struct platform_device *caam_pdev)
 	/* Done with the CGRs; restore the cpus allowed mask */
 	set_cpus_allowed_ptr(current, &old_cpumask);
 #ifdef CONFIG_DEBUG_FS
-	ctrlpriv->qi_congested = debugfs_create_file("qi_congested", 0444,
-						     ctrlpriv->ctl,
-						     &times_congested,
-						     &caam_fops_u64_ro);
+	debugfs_create_file("qi_congested", 0444, ctrlpriv->ctl,
+			    &times_congested, &caam_fops_u64_ro);
 #endif
 	dev_info(qidev, "Linux CAAM Queue I/F driver initialised\n");
 	return 0;

From 2d97591ef43d0587be22ad1b0d758d6df4999a0b Mon Sep 17 00:00:00 2001
From: Stephan Mueller <smueller@chronox.de>
Date: Wed, 2 Aug 2017 07:56:19 +0200
Subject: [PATCH 101/116] crypto: af_alg - consolidation of duplicate code

Consolidate following data structures:

skcipher_async_req, aead_async_req -> af_alg_async_req
skcipher_rsgl, aead_rsql -> af_alg_rsgl
skcipher_tsgl, aead_tsql -> af_alg_tsgl
skcipher_ctx, aead_ctx -> af_alg_ctx

Consolidate following functions:

skcipher_sndbuf, aead_sndbuf -> af_alg_sndbuf
skcipher_writable, aead_writable -> af_alg_writable
skcipher_rcvbuf, aead_rcvbuf -> af_alg_rcvbuf
skcipher_readable, aead_readable -> af_alg_readable
aead_alloc_tsgl, skcipher_alloc_tsgl -> af_alg_alloc_tsgl
aead_count_tsgl, skcipher_count_tsgl -> af_alg_count_tsgl
aead_pull_tsgl, skcipher_pull_tsgl -> af_alg_pull_tsgl
aead_free_areq_sgls, skcipher_free_areq_sgls -> af_alg_free_areq_sgls
aead_wait_for_wmem, skcipher_wait_for_wmem -> af_alg_wait_for_wmem
aead_wmem_wakeup, skcipher_wmem_wakeup -> af_alg_wmem_wakeup
aead_wait_for_data, skcipher_wait_for_data -> af_alg_wait_for_data
aead_data_wakeup, skcipher_data_wakeup -> af_alg_data_wakeup
aead_sendmsg, skcipher_sendmsg -> af_alg_sendmsg
aead_sendpage, skcipher_sendpage -> af_alg_sendpage
aead_async_cb, skcipher_async_cb -> af_alg_async_cb
aead_poll, skcipher_poll -> af_alg_poll

Split out the following common code from recvmsg:

af_alg_alloc_areq: allocation of the request data structure for the
cipher operation

af_alg_get_rsgl: creation of the RX SGL anchored in the request data
structure

The following changes to the implementation without affecting the
functionality have been applied to synchronize slightly different code
bases in algif_skcipher and algif_aead:

The wakeup in af_alg_wait_for_data is triggered when either more data
is received or the indicator that more data is to be expected is
released. The first is triggered by user space, the second is
triggered by the kernel upon finishing the processing of data
(i.e. the kernel is ready for more).

af_alg_sendmsg uses size_t in min_t calculation for obtaining len.
Return code determination is consistent with algif_skcipher. The
scope of the variable i is reduced to match algif_aead. The type of the
variable i is switched from int to unsigned int to match algif_aead.

af_alg_sendpage does not contain the superfluous err = 0 from
aead_sendpage.

af_alg_async_cb requires to store the number of output bytes in
areq->outlen before the AIO callback is triggered.

The POLLIN / POLLRDNORM is now set when either not more data is given or
the kernel is supplied with data. This is consistent to the wakeup from
sleep when the kernel waits for data.

The request data structure is extended by the field last_rsgl which
points to the last RX SGL list entry. This shall help recvmsg
implementation to chain the RX SGL to other SG(L)s if needed. It is
currently used by algif_aead which chains the tag SGL to the RX SGL
during decryption.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/af_alg.c         | 693 +++++++++++++++++++++++++++++++++++++++
 crypto/algif_aead.c     | 701 +++-------------------------------------
 crypto/algif_skcipher.c | 638 +++---------------------------------
 include/crypto/if_alg.h | 170 ++++++++++
 4 files changed, 940 insertions(+), 1262 deletions(-)

diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 92a3d540d9201..d6936c0e08d97 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/net.h>
 #include <linux/rwsem.h>
+#include <linux/sched/signal.h>
 #include <linux/security.h>
 
 struct alg_type_list {
@@ -507,6 +508,698 @@ void af_alg_complete(struct crypto_async_request *req, int err)
 }
 EXPORT_SYMBOL_GPL(af_alg_complete);
 
+/**
+ * af_alg_alloc_tsgl - allocate the TX SGL
+ *
+ * @sk socket of connection to user space
+ * @return: 0 upon success, < 0 upon error
+ */
+int af_alg_alloc_tsgl(struct sock *sk)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct af_alg_tsgl *sgl;
+	struct scatterlist *sg = NULL;
+
+	sgl = list_entry(ctx->tsgl_list.prev, struct af_alg_tsgl, list);
+	if (!list_empty(&ctx->tsgl_list))
+		sg = sgl->sg;
+
+	if (!sg || sgl->cur >= MAX_SGL_ENTS) {
+		sgl = sock_kmalloc(sk, sizeof(*sgl) +
+				       sizeof(sgl->sg[0]) * (MAX_SGL_ENTS + 1),
+				   GFP_KERNEL);
+		if (!sgl)
+			return -ENOMEM;
+
+		sg_init_table(sgl->sg, MAX_SGL_ENTS + 1);
+		sgl->cur = 0;
+
+		if (sg)
+			sg_chain(sg, MAX_SGL_ENTS + 1, sgl->sg);
+
+		list_add_tail(&sgl->list, &ctx->tsgl_list);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(af_alg_alloc_tsgl);
+
+/**
+ * aead_count_tsgl - Count number of TX SG entries
+ *
+ * The counting starts from the beginning of the SGL to @bytes. If
+ * an offset is provided, the counting of the SG entries starts at the offset.
+ *
+ * @sk socket of connection to user space
+ * @bytes Count the number of SG entries holding given number of bytes.
+ * @offset Start the counting of SG entries from the given offset.
+ * @return Number of TX SG entries found given the constraints
+ */
+unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes, size_t offset)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct af_alg_tsgl *sgl, *tmp;
+	unsigned int i;
+	unsigned int sgl_count = 0;
+
+	if (!bytes)
+		return 0;
+
+	list_for_each_entry_safe(sgl, tmp, &ctx->tsgl_list, list) {
+		struct scatterlist *sg = sgl->sg;
+
+		for (i = 0; i < sgl->cur; i++) {
+			size_t bytes_count;
+
+			/* Skip offset */
+			if (offset >= sg[i].length) {
+				offset -= sg[i].length;
+				bytes -= sg[i].length;
+				continue;
+			}
+
+			bytes_count = sg[i].length - offset;
+
+			offset = 0;
+			sgl_count++;
+
+			/* If we have seen requested number of bytes, stop */
+			if (bytes_count >= bytes)
+				return sgl_count;
+
+			bytes -= bytes_count;
+		}
+	}
+
+	return sgl_count;
+}
+EXPORT_SYMBOL_GPL(af_alg_count_tsgl);
+
+/**
+ * aead_pull_tsgl - Release the specified buffers from TX SGL
+ *
+ * If @dst is non-null, reassign the pages to dst. The caller must release
+ * the pages. If @dst_offset is given only reassign the pages to @dst starting
+ * at the @dst_offset (byte). The caller must ensure that @dst is large
+ * enough (e.g. by using af_alg_count_tsgl with the same offset).
+ *
+ * @sk socket of connection to user space
+ * @used Number of bytes to pull from TX SGL
+ * @dst If non-NULL, buffer is reassigned to dst SGL instead of releasing. The
+ *	caller must release the buffers in dst.
+ * @dst_offset Reassign the TX SGL from given offset. All buffers before
+ *	       reaching the offset is released.
+ */
+void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst,
+		      size_t dst_offset)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct af_alg_tsgl *sgl;
+	struct scatterlist *sg;
+	unsigned int i, j;
+
+	while (!list_empty(&ctx->tsgl_list)) {
+		sgl = list_first_entry(&ctx->tsgl_list, struct af_alg_tsgl,
+				       list);
+		sg = sgl->sg;
+
+		for (i = 0, j = 0; i < sgl->cur; i++) {
+			size_t plen = min_t(size_t, used, sg[i].length);
+			struct page *page = sg_page(sg + i);
+
+			if (!page)
+				continue;
+
+			/*
+			 * Assumption: caller created af_alg_count_tsgl(len)
+			 * SG entries in dst.
+			 */
+			if (dst) {
+				if (dst_offset >= plen) {
+					/* discard page before offset */
+					dst_offset -= plen;
+					put_page(page);
+				} else {
+					/* reassign page to dst after offset */
+					sg_set_page(dst + j, page,
+						    plen - dst_offset,
+						    sg[i].offset + dst_offset);
+					dst_offset = 0;
+					j++;
+				}
+			}
+
+			sg[i].length -= plen;
+			sg[i].offset += plen;
+
+			used -= plen;
+			ctx->used -= plen;
+
+			if (sg[i].length)
+				return;
+
+			if (!dst)
+				put_page(page);
+
+			sg_assign_page(sg + i, NULL);
+		}
+
+		list_del(&sgl->list);
+		sock_kfree_s(sk, sgl, sizeof(*sgl) + sizeof(sgl->sg[0]) *
+						     (MAX_SGL_ENTS + 1));
+	}
+
+	if (!ctx->used)
+		ctx->merge = 0;
+}
+EXPORT_SYMBOL_GPL(af_alg_pull_tsgl);
+
+/**
+ * af_alg_free_areq_sgls - Release TX and RX SGLs of the request
+ *
+ * @areq Request holding the TX and RX SGL
+ */
+void af_alg_free_areq_sgls(struct af_alg_async_req *areq)
+{
+	struct sock *sk = areq->sk;
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct af_alg_rsgl *rsgl, *tmp;
+	struct scatterlist *tsgl;
+	struct scatterlist *sg;
+	unsigned int i;
+
+	list_for_each_entry_safe(rsgl, tmp, &areq->rsgl_list, list) {
+		ctx->rcvused -= rsgl->sg_num_bytes;
+		af_alg_free_sg(&rsgl->sgl);
+		list_del(&rsgl->list);
+		if (rsgl != &areq->first_rsgl)
+			sock_kfree_s(sk, rsgl, sizeof(*rsgl));
+	}
+
+	tsgl = areq->tsgl;
+	for_each_sg(tsgl, sg, areq->tsgl_entries, i) {
+		if (!sg_page(sg))
+			continue;
+		put_page(sg_page(sg));
+	}
+
+	if (areq->tsgl && areq->tsgl_entries)
+		sock_kfree_s(sk, tsgl, areq->tsgl_entries * sizeof(*tsgl));
+}
+EXPORT_SYMBOL_GPL(af_alg_free_areq_sgls);
+
+/**
+ * af_alg_wait_for_wmem - wait for availability of writable memory
+ *
+ * @sk socket of connection to user space
+ * @flags If MSG_DONTWAIT is set, then only report if function would sleep
+ * @return 0 when writable memory is available, < 0 upon error
+ */
+int af_alg_wait_for_wmem(struct sock *sk, unsigned int flags)
+{
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+	int err = -ERESTARTSYS;
+	long timeout;
+
+	if (flags & MSG_DONTWAIT)
+		return -EAGAIN;
+
+	sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+
+	add_wait_queue(sk_sleep(sk), &wait);
+	for (;;) {
+		if (signal_pending(current))
+			break;
+		timeout = MAX_SCHEDULE_TIMEOUT;
+		if (sk_wait_event(sk, &timeout, af_alg_writable(sk), &wait)) {
+			err = 0;
+			break;
+		}
+	}
+	remove_wait_queue(sk_sleep(sk), &wait);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(af_alg_wait_for_wmem);
+
+/**
+ * af_alg_wmem_wakeup - wakeup caller when writable memory is available
+ *
+ * @sk socket of connection to user space
+ */
+void af_alg_wmem_wakeup(struct sock *sk)
+{
+	struct socket_wq *wq;
+
+	if (!af_alg_writable(sk))
+		return;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (skwq_has_sleeper(wq))
+		wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
+							   POLLRDNORM |
+							   POLLRDBAND);
+	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(af_alg_wmem_wakeup);
+
+/**
+ * af_alg_wait_for_data - wait for availability of TX data
+ *
+ * @sk socket of connection to user space
+ * @flags If MSG_DONTWAIT is set, then only report if function would sleep
+ * @return 0 when writable memory is available, < 0 upon error
+ */
+int af_alg_wait_for_data(struct sock *sk, unsigned flags)
+{
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	long timeout;
+	int err = -ERESTARTSYS;
+
+	if (flags & MSG_DONTWAIT)
+		return -EAGAIN;
+
+	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+
+	add_wait_queue(sk_sleep(sk), &wait);
+	for (;;) {
+		if (signal_pending(current))
+			break;
+		timeout = MAX_SCHEDULE_TIMEOUT;
+		if (sk_wait_event(sk, &timeout, (ctx->used || !ctx->more),
+				  &wait)) {
+			err = 0;
+			break;
+		}
+	}
+	remove_wait_queue(sk_sleep(sk), &wait);
+
+	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(af_alg_wait_for_data);
+
+/**
+ * af_alg_data_wakeup - wakeup caller when new data can be sent to kernel
+ *
+ * @sk socket of connection to user space
+ */
+
+void af_alg_data_wakeup(struct sock *sk)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct socket_wq *wq;
+
+	if (!ctx->used)
+		return;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (skwq_has_sleeper(wq))
+		wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
+							   POLLRDNORM |
+							   POLLRDBAND);
+	sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(af_alg_data_wakeup);
+
+/**
+ * af_alg_sendmsg - implementation of sendmsg system call handler
+ *
+ * The sendmsg system call handler obtains the user data and stores it
+ * in ctx->tsgl_list. This implies allocation of the required numbers of
+ * struct af_alg_tsgl.
+ *
+ * In addition, the ctx is filled with the information sent via CMSG.
+ *
+ * @sock socket of connection to user space
+ * @msg message from user space
+ * @size size of message from user space
+ * @ivsize the size of the IV for the cipher operation to verify that the
+ *	   user-space-provided IV has the right size
+ * @return the number of copied data upon success, < 0 upon error
+ */
+int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
+		   unsigned int ivsize)
+{
+	struct sock *sk = sock->sk;
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct af_alg_tsgl *sgl;
+	struct af_alg_control con = {};
+	long copied = 0;
+	bool enc = 0;
+	bool init = 0;
+	int err = 0;
+
+	if (msg->msg_controllen) {
+		err = af_alg_cmsg_send(msg, &con);
+		if (err)
+			return err;
+
+		init = 1;
+		switch (con.op) {
+		case ALG_OP_ENCRYPT:
+			enc = 1;
+			break;
+		case ALG_OP_DECRYPT:
+			enc = 0;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		if (con.iv && con.iv->ivlen != ivsize)
+			return -EINVAL;
+	}
+
+	lock_sock(sk);
+	if (!ctx->more && ctx->used) {
+		err = -EINVAL;
+		goto unlock;
+	}
+
+	if (init) {
+		ctx->enc = enc;
+		if (con.iv)
+			memcpy(ctx->iv, con.iv->iv, ivsize);
+
+		ctx->aead_assoclen = con.aead_assoclen;
+	}
+
+	while (size) {
+		struct scatterlist *sg;
+		size_t len = size;
+		size_t plen;
+
+		/* use the existing memory in an allocated page */
+		if (ctx->merge) {
+			sgl = list_entry(ctx->tsgl_list.prev,
+					 struct af_alg_tsgl, list);
+			sg = sgl->sg + sgl->cur - 1;
+			len = min_t(size_t, len,
+				    PAGE_SIZE - sg->offset - sg->length);
+
+			err = memcpy_from_msg(page_address(sg_page(sg)) +
+					      sg->offset + sg->length,
+					      msg, len);
+			if (err)
+				goto unlock;
+
+			sg->length += len;
+			ctx->merge = (sg->offset + sg->length) &
+				     (PAGE_SIZE - 1);
+
+			ctx->used += len;
+			copied += len;
+			size -= len;
+			continue;
+		}
+
+		if (!af_alg_writable(sk)) {
+			err = af_alg_wait_for_wmem(sk, msg->msg_flags);
+			if (err)
+				goto unlock;
+		}
+
+		/* allocate a new page */
+		len = min_t(unsigned long, len, af_alg_sndbuf(sk));
+
+		err = af_alg_alloc_tsgl(sk);
+		if (err)
+			goto unlock;
+
+		sgl = list_entry(ctx->tsgl_list.prev, struct af_alg_tsgl,
+				 list);
+		sg = sgl->sg;
+		if (sgl->cur)
+			sg_unmark_end(sg + sgl->cur - 1);
+
+		do {
+			unsigned int i = sgl->cur;
+
+			plen = min_t(size_t, len, PAGE_SIZE);
+
+			sg_assign_page(sg + i, alloc_page(GFP_KERNEL));
+			if (!sg_page(sg + i)) {
+				err = -ENOMEM;
+				goto unlock;
+			}
+
+			err = memcpy_from_msg(page_address(sg_page(sg + i)),
+					      msg, plen);
+			if (err) {
+				__free_page(sg_page(sg + i));
+				sg_assign_page(sg + i, NULL);
+				goto unlock;
+			}
+
+			sg[i].length = plen;
+			len -= plen;
+			ctx->used += plen;
+			copied += plen;
+			size -= plen;
+			sgl->cur++;
+		} while (len && sgl->cur < MAX_SGL_ENTS);
+
+		if (!size)
+			sg_mark_end(sg + sgl->cur - 1);
+
+		ctx->merge = plen & (PAGE_SIZE - 1);
+	}
+
+	err = 0;
+
+	ctx->more = msg->msg_flags & MSG_MORE;
+
+unlock:
+	af_alg_data_wakeup(sk);
+	release_sock(sk);
+
+	return copied ?: err;
+}
+EXPORT_SYMBOL_GPL(af_alg_sendmsg);
+
+/**
+ * af_alg_sendpage - sendpage system call handler
+ *
+ * This is a generic implementation of sendpage to fill ctx->tsgl_list.
+ */
+ssize_t af_alg_sendpage(struct socket *sock, struct page *page,
+			int offset, size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct af_alg_tsgl *sgl;
+	int err = -EINVAL;
+
+	if (flags & MSG_SENDPAGE_NOTLAST)
+		flags |= MSG_MORE;
+
+	lock_sock(sk);
+	if (!ctx->more && ctx->used)
+		goto unlock;
+
+	if (!size)
+		goto done;
+
+	if (!af_alg_writable(sk)) {
+		err = af_alg_wait_for_wmem(sk, flags);
+		if (err)
+			goto unlock;
+	}
+
+	err = af_alg_alloc_tsgl(sk);
+	if (err)
+		goto unlock;
+
+	ctx->merge = 0;
+	sgl = list_entry(ctx->tsgl_list.prev, struct af_alg_tsgl, list);
+
+	if (sgl->cur)
+		sg_unmark_end(sgl->sg + sgl->cur - 1);
+
+	sg_mark_end(sgl->sg + sgl->cur);
+
+	get_page(page);
+	sg_set_page(sgl->sg + sgl->cur, page, size, offset);
+	sgl->cur++;
+	ctx->used += size;
+
+done:
+	ctx->more = flags & MSG_MORE;
+
+unlock:
+	af_alg_data_wakeup(sk);
+	release_sock(sk);
+
+	return err ?: size;
+}
+EXPORT_SYMBOL_GPL(af_alg_sendpage);
+
+/**
+ * af_alg_async_cb - AIO callback handler
+ *
+ * This handler cleans up the struct af_alg_async_req upon completion of the
+ * AIO operation.
+ *
+ * The number of bytes to be generated with the AIO operation must be set
+ * in areq->outlen before the AIO callback handler is invoked.
+ */
+void af_alg_async_cb(struct crypto_async_request *_req, int err)
+{
+	struct af_alg_async_req *areq = _req->data;
+	struct sock *sk = areq->sk;
+	struct kiocb *iocb = areq->iocb;
+	unsigned int resultlen;
+
+	lock_sock(sk);
+
+	/* Buffer size written by crypto operation. */
+	resultlen = areq->outlen;
+
+	af_alg_free_areq_sgls(areq);
+	sock_kfree_s(sk, areq, areq->areqlen);
+	__sock_put(sk);
+
+	iocb->ki_complete(iocb, err ? err : resultlen, 0);
+
+	release_sock(sk);
+}
+EXPORT_SYMBOL_GPL(af_alg_async_cb);
+
+/**
+ * af_alg_poll - poll system call handler
+ */
+unsigned int af_alg_poll(struct file *file, struct socket *sock,
+			 poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	unsigned int mask;
+
+	sock_poll_wait(file, sk_sleep(sk), wait);
+	mask = 0;
+
+	if (!ctx->more || ctx->used)
+		mask |= POLLIN | POLLRDNORM;
+
+	if (af_alg_writable(sk))
+		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+
+	return mask;
+}
+EXPORT_SYMBOL_GPL(af_alg_poll);
+
+/**
+ * af_alg_alloc_areq - allocate struct af_alg_async_req
+ *
+ * @sk socket of connection to user space
+ * @areqlen size of struct af_alg_async_req + crypto_*_reqsize
+ * @return allocated data structure or ERR_PTR upon error
+ */
+struct af_alg_async_req *af_alg_alloc_areq(struct sock *sk,
+					   unsigned int areqlen)
+{
+	struct af_alg_async_req *areq = sock_kmalloc(sk, areqlen, GFP_KERNEL);
+
+	if (unlikely(!areq))
+		return ERR_PTR(-ENOMEM);
+
+	areq->areqlen = areqlen;
+	areq->sk = sk;
+	areq->last_rsgl = NULL;
+	INIT_LIST_HEAD(&areq->rsgl_list);
+	areq->tsgl = NULL;
+	areq->tsgl_entries = 0;
+
+	return areq;
+}
+EXPORT_SYMBOL_GPL(af_alg_alloc_areq);
+
+/**
+ * af_alg_get_rsgl - create the RX SGL for the output data from the crypto
+ *		     operation
+ *
+ * @sk socket of connection to user space
+ * @msg user space message
+ * @flags flags used to invoke recvmsg with
+ * @areq instance of the cryptographic request that will hold the RX SGL
+ * @maxsize maximum number of bytes to be pulled from user space
+ * @outlen number of bytes in the RX SGL
+ * @return 0 on success, < 0 upon error
+ */
+int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
+		    struct af_alg_async_req *areq, size_t maxsize,
+		    size_t *outlen)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	size_t len = 0;
+
+	while (maxsize > len && msg_data_left(msg)) {
+		struct af_alg_rsgl *rsgl;
+		size_t seglen;
+		int err;
+
+		/* limit the amount of readable buffers */
+		if (!af_alg_readable(sk))
+			break;
+
+		if (!ctx->used) {
+			err = af_alg_wait_for_data(sk, flags);
+			if (err)
+				return err;
+		}
+
+		seglen = min_t(size_t, (maxsize - len),
+			       msg_data_left(msg));
+
+		if (list_empty(&areq->rsgl_list)) {
+			rsgl = &areq->first_rsgl;
+		} else {
+			rsgl = sock_kmalloc(sk, sizeof(*rsgl), GFP_KERNEL);
+			if (unlikely(!rsgl))
+				return -ENOMEM;
+		}
+
+		rsgl->sgl.npages = 0;
+		list_add_tail(&rsgl->list, &areq->rsgl_list);
+
+		/* make one iovec available as scatterlist */
+		err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen);
+		if (err < 0)
+			return err;
+
+		/* chain the new scatterlist with previous one */
+		if (areq->last_rsgl)
+			af_alg_link_sg(&areq->last_rsgl->sgl, &rsgl->sgl);
+
+		areq->last_rsgl = rsgl;
+		len += err;
+		ctx->rcvused += err;
+		rsgl->sg_num_bytes = err;
+		iov_iter_advance(&msg->msg_iter, err);
+	}
+
+	*outlen = len;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(af_alg_get_rsgl);
+
 static int __init af_alg_init(void)
 {
 	int err = proto_register(&alg_proto, 0);
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 1f0696dd64f41..48d46e74ed0d3 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -35,101 +35,23 @@
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/kernel.h>
-#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/net.h>
 #include <net/sock.h>
 
-struct aead_tsgl {
-	struct list_head list;
-	unsigned int cur;		/* Last processed SG entry */
-	struct scatterlist sg[0];	/* Array of SGs forming the SGL */
-};
-
-struct aead_rsgl {
-	struct af_alg_sgl sgl;
-	struct list_head list;
-	size_t sg_num_bytes;		/* Bytes of data in that SGL */
-};
-
-struct aead_async_req {
-	struct kiocb *iocb;
-	struct sock *sk;
-
-	struct aead_rsgl first_rsgl;	/* First RX SG */
-	struct list_head rsgl_list;	/* Track RX SGs */
-
-	struct scatterlist *tsgl;	/* priv. TX SGL of buffers to process */
-	unsigned int tsgl_entries;	/* number of entries in priv. TX SGL */
-
-	unsigned int outlen;		/* Filled output buf length */
-
-	unsigned int areqlen;		/* Length of this data struct */
-	struct aead_request aead_req;	/* req ctx trails this struct */
-};
-
 struct aead_tfm {
 	struct crypto_aead *aead;
 	bool has_key;
 	struct crypto_skcipher *null_tfm;
 };
 
-struct aead_ctx {
-	struct list_head tsgl_list;	/* Link to TX SGL */
-
-	void *iv;
-	size_t aead_assoclen;
-
-	struct af_alg_completion completion;	/* sync work queue */
-
-	size_t used;		/* TX bytes sent to kernel */
-	size_t rcvused;		/* total RX bytes to be processed by kernel */
-
-	bool more;		/* More data to be expected? */
-	bool merge;		/* Merge new data into existing SG */
-	bool enc;		/* Crypto operation: enc, dec */
-
-	unsigned int len;	/* Length of allocated memory for this struct */
-};
-
-#define MAX_SGL_ENTS ((4096 - sizeof(struct aead_tsgl)) / \
-		      sizeof(struct scatterlist) - 1)
-
-static inline int aead_sndbuf(struct sock *sk)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-
-	return max_t(int, max_t(int, sk->sk_sndbuf & PAGE_MASK, PAGE_SIZE) -
-			  ctx->used, 0);
-}
-
-static inline bool aead_writable(struct sock *sk)
-{
-	return PAGE_SIZE <= aead_sndbuf(sk);
-}
-
-static inline int aead_rcvbuf(struct sock *sk)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-
-	return max_t(int, max_t(int, sk->sk_rcvbuf & PAGE_MASK, PAGE_SIZE) -
-			  ctx->rcvused, 0);
-}
-
-static inline bool aead_readable(struct sock *sk)
-{
-	return PAGE_SIZE <= aead_rcvbuf(sk);
-}
-
 static inline bool aead_sufficient_data(struct sock *sk)
 {
 	struct alg_sock *ask = alg_sk(sk);
 	struct sock *psk = ask->parent;
 	struct alg_sock *pask = alg_sk(psk);
-	struct aead_ctx *ctx = ask->private;
+	struct af_alg_ctx *ctx = ask->private;
 	struct aead_tfm *aeadc = pask->private;
 	struct crypto_aead *tfm = aeadc->aead;
 	unsigned int as = crypto_aead_authsize(tfm);
@@ -141,490 +63,17 @@ static inline bool aead_sufficient_data(struct sock *sk)
 	return ctx->used >= ctx->aead_assoclen + (ctx->enc ? 0 : as);
 }
 
-static int aead_alloc_tsgl(struct sock *sk)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	struct aead_tsgl *sgl;
-	struct scatterlist *sg = NULL;
-
-	sgl = list_entry(ctx->tsgl_list.prev, struct aead_tsgl, list);
-	if (!list_empty(&ctx->tsgl_list))
-		sg = sgl->sg;
-
-	if (!sg || sgl->cur >= MAX_SGL_ENTS) {
-		sgl = sock_kmalloc(sk, sizeof(*sgl) +
-				       sizeof(sgl->sg[0]) * (MAX_SGL_ENTS + 1),
-				   GFP_KERNEL);
-		if (!sgl)
-			return -ENOMEM;
-
-		sg_init_table(sgl->sg, MAX_SGL_ENTS + 1);
-		sgl->cur = 0;
-
-		if (sg)
-			sg_chain(sg, MAX_SGL_ENTS + 1, sgl->sg);
-
-		list_add_tail(&sgl->list, &ctx->tsgl_list);
-	}
-
-	return 0;
-}
-
-/**
- * Count number of SG entries from the beginning of the SGL to @bytes. If
- * an offset is provided, the counting of the SG entries starts at the offset.
- */
-static unsigned int aead_count_tsgl(struct sock *sk, size_t bytes,
-				    size_t offset)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	struct aead_tsgl *sgl, *tmp;
-	unsigned int i;
-	unsigned int sgl_count = 0;
-
-	if (!bytes)
-		return 0;
-
-	list_for_each_entry_safe(sgl, tmp, &ctx->tsgl_list, list) {
-		struct scatterlist *sg = sgl->sg;
-
-		for (i = 0; i < sgl->cur; i++) {
-			size_t bytes_count;
-
-			/* Skip offset */
-			if (offset >= sg[i].length) {
-				offset -= sg[i].length;
-				bytes -= sg[i].length;
-				continue;
-			}
-
-			bytes_count = sg[i].length - offset;
-
-			offset = 0;
-			sgl_count++;
-
-			/* If we have seen requested number of bytes, stop */
-			if (bytes_count >= bytes)
-				return sgl_count;
-
-			bytes -= bytes_count;
-		}
-	}
-
-	return sgl_count;
-}
-
-/**
- * Release the specified buffers from TX SGL pointed to by ctx->tsgl_list for
- * @used bytes.
- *
- * If @dst is non-null, reassign the pages to dst. The caller must release
- * the pages. If @dst_offset is given only reassign the pages to @dst starting
- * at the @dst_offset (byte). The caller must ensure that @dst is large
- * enough (e.g. by using aead_count_tsgl with the same offset).
- */
-static void aead_pull_tsgl(struct sock *sk, size_t used,
-			   struct scatterlist *dst, size_t dst_offset)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	struct aead_tsgl *sgl;
-	struct scatterlist *sg;
-	unsigned int i, j;
-
-	while (!list_empty(&ctx->tsgl_list)) {
-		sgl = list_first_entry(&ctx->tsgl_list, struct aead_tsgl,
-				       list);
-		sg = sgl->sg;
-
-		for (i = 0, j = 0; i < sgl->cur; i++) {
-			size_t plen = min_t(size_t, used, sg[i].length);
-			struct page *page = sg_page(sg + i);
-
-			if (!page)
-				continue;
-
-			/*
-			 * Assumption: caller created aead_count_tsgl(len)
-			 * SG entries in dst.
-			 */
-			if (dst) {
-				if (dst_offset >= plen) {
-					/* discard page before offset */
-					dst_offset -= plen;
-					put_page(page);
-				} else {
-					/* reassign page to dst after offset */
-					sg_set_page(dst + j, page,
-						    plen - dst_offset,
-						    sg[i].offset + dst_offset);
-					dst_offset = 0;
-					j++;
-				}
-			}
-
-			sg[i].length -= plen;
-			sg[i].offset += plen;
-
-			used -= plen;
-			ctx->used -= plen;
-
-			if (sg[i].length)
-				return;
-
-			if (!dst)
-				put_page(page);
-
-			sg_assign_page(sg + i, NULL);
-		}
-
-		list_del(&sgl->list);
-		sock_kfree_s(sk, sgl, sizeof(*sgl) + sizeof(sgl->sg[0]) *
-						     (MAX_SGL_ENTS + 1));
-	}
-
-	if (!ctx->used)
-		ctx->merge = 0;
-}
-
-static void aead_free_areq_sgls(struct aead_async_req *areq)
-{
-	struct sock *sk = areq->sk;
-	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	struct aead_rsgl *rsgl, *tmp;
-	struct scatterlist *tsgl;
-	struct scatterlist *sg;
-	unsigned int i;
-
-	list_for_each_entry_safe(rsgl, tmp, &areq->rsgl_list, list) {
-		ctx->rcvused -= rsgl->sg_num_bytes;
-		af_alg_free_sg(&rsgl->sgl);
-		list_del(&rsgl->list);
-		if (rsgl != &areq->first_rsgl)
-			sock_kfree_s(sk, rsgl, sizeof(*rsgl));
-	}
-
-	tsgl = areq->tsgl;
-	for_each_sg(tsgl, sg, areq->tsgl_entries, i) {
-		if (!sg_page(sg))
-			continue;
-		put_page(sg_page(sg));
-	}
-
-	if (areq->tsgl && areq->tsgl_entries)
-		sock_kfree_s(sk, tsgl, areq->tsgl_entries * sizeof(*tsgl));
-}
-
-static int aead_wait_for_wmem(struct sock *sk, unsigned int flags)
-{
-	DEFINE_WAIT_FUNC(wait, woken_wake_function);
-	int err = -ERESTARTSYS;
-	long timeout;
-
-	if (flags & MSG_DONTWAIT)
-		return -EAGAIN;
-
-	sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
-
-	add_wait_queue(sk_sleep(sk), &wait);
-	for (;;) {
-		if (signal_pending(current))
-			break;
-		timeout = MAX_SCHEDULE_TIMEOUT;
-		if (sk_wait_event(sk, &timeout, aead_writable(sk), &wait)) {
-			err = 0;
-			break;
-		}
-	}
-	remove_wait_queue(sk_sleep(sk), &wait);
-
-	return err;
-}
-
-static void aead_wmem_wakeup(struct sock *sk)
-{
-	struct socket_wq *wq;
-
-	if (!aead_writable(sk))
-		return;
-
-	rcu_read_lock();
-	wq = rcu_dereference(sk->sk_wq);
-	if (skwq_has_sleeper(wq))
-		wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
-							   POLLRDNORM |
-							   POLLRDBAND);
-	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
-	rcu_read_unlock();
-}
-
-static int aead_wait_for_data(struct sock *sk, unsigned flags)
-{
-	DEFINE_WAIT_FUNC(wait, woken_wake_function);
-	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	long timeout;
-	int err = -ERESTARTSYS;
-
-	if (flags & MSG_DONTWAIT)
-		return -EAGAIN;
-
-	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-
-	add_wait_queue(sk_sleep(sk), &wait);
-	for (;;) {
-		if (signal_pending(current))
-			break;
-		timeout = MAX_SCHEDULE_TIMEOUT;
-		if (sk_wait_event(sk, &timeout, !ctx->more, &wait)) {
-			err = 0;
-			break;
-		}
-	}
-	remove_wait_queue(sk_sleep(sk), &wait);
-
-	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-
-	return err;
-}
-
-static void aead_data_wakeup(struct sock *sk)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	struct socket_wq *wq;
-
-	if (!ctx->used)
-		return;
-
-	rcu_read_lock();
-	wq = rcu_dereference(sk->sk_wq);
-	if (skwq_has_sleeper(wq))
-		wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
-							   POLLRDNORM |
-							   POLLRDBAND);
-	sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
-	rcu_read_unlock();
-}
-
 static int aead_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 {
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
 	struct sock *psk = ask->parent;
 	struct alg_sock *pask = alg_sk(psk);
-	struct aead_ctx *ctx = ask->private;
 	struct aead_tfm *aeadc = pask->private;
 	struct crypto_aead *tfm = aeadc->aead;
 	unsigned int ivsize = crypto_aead_ivsize(tfm);
-	struct aead_tsgl *sgl;
-	struct af_alg_control con = {};
-	long copied = 0;
-	bool enc = 0;
-	bool init = 0;
-	int err = 0;
-
-	if (msg->msg_controllen) {
-		err = af_alg_cmsg_send(msg, &con);
-		if (err)
-			return err;
-
-		init = 1;
-		switch (con.op) {
-		case ALG_OP_ENCRYPT:
-			enc = 1;
-			break;
-		case ALG_OP_DECRYPT:
-			enc = 0;
-			break;
-		default:
-			return -EINVAL;
-		}
-
-		if (con.iv && con.iv->ivlen != ivsize)
-			return -EINVAL;
-	}
-
-	lock_sock(sk);
-	if (!ctx->more && ctx->used) {
-		err = -EINVAL;
-		goto unlock;
-	}
-
-	if (init) {
-		ctx->enc = enc;
-		if (con.iv)
-			memcpy(ctx->iv, con.iv->iv, ivsize);
-
-		ctx->aead_assoclen = con.aead_assoclen;
-	}
-
-	while (size) {
-		struct scatterlist *sg;
-		size_t len = size;
-		size_t plen;
-
-		/* use the existing memory in an allocated page */
-		if (ctx->merge) {
-			sgl = list_entry(ctx->tsgl_list.prev,
-					 struct aead_tsgl, list);
-			sg = sgl->sg + sgl->cur - 1;
-			len = min_t(unsigned long, len,
-				    PAGE_SIZE - sg->offset - sg->length);
-			err = memcpy_from_msg(page_address(sg_page(sg)) +
-					      sg->offset + sg->length,
-					      msg, len);
-			if (err)
-				goto unlock;
-
-			sg->length += len;
-			ctx->merge = (sg->offset + sg->length) &
-				     (PAGE_SIZE - 1);
-
-			ctx->used += len;
-			copied += len;
-			size -= len;
-			continue;
-		}
-
-		if (!aead_writable(sk)) {
-			err = aead_wait_for_wmem(sk, msg->msg_flags);
-			if (err)
-				goto unlock;
-		}
-
-		/* allocate a new page */
-		len = min_t(unsigned long, size, aead_sndbuf(sk));
-
-		err = aead_alloc_tsgl(sk);
-		if (err)
-			goto unlock;
-
-		sgl = list_entry(ctx->tsgl_list.prev, struct aead_tsgl,
-				 list);
-		sg = sgl->sg;
-		if (sgl->cur)
-			sg_unmark_end(sg + sgl->cur - 1);
-
-		do {
-			unsigned int i = sgl->cur;
-
-			plen = min_t(size_t, len, PAGE_SIZE);
-
-			sg_assign_page(sg + i, alloc_page(GFP_KERNEL));
-			if (!sg_page(sg + i)) {
-				err = -ENOMEM;
-				goto unlock;
-			}
-
-			err = memcpy_from_msg(page_address(sg_page(sg + i)),
-					      msg, plen);
-			if (err) {
-				__free_page(sg_page(sg + i));
-				sg_assign_page(sg + i, NULL);
-				goto unlock;
-			}
-
-			sg[i].length = plen;
-			len -= plen;
-			ctx->used += plen;
-			copied += plen;
-			size -= plen;
-			sgl->cur++;
-		} while (len && sgl->cur < MAX_SGL_ENTS);
-
-		if (!size)
-			sg_mark_end(sg + sgl->cur - 1);
-
-		ctx->merge = plen & (PAGE_SIZE - 1);
-	}
-
-	err = 0;
-
-	ctx->more = msg->msg_flags & MSG_MORE;
-
-unlock:
-	aead_data_wakeup(sk);
-	release_sock(sk);
-
-	return err ?: copied;
-}
-
-static ssize_t aead_sendpage(struct socket *sock, struct page *page,
-			     int offset, size_t size, int flags)
-{
-	struct sock *sk = sock->sk;
-	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	struct aead_tsgl *sgl;
-	int err = -EINVAL;
-
-	if (flags & MSG_SENDPAGE_NOTLAST)
-		flags |= MSG_MORE;
-
-	lock_sock(sk);
-	if (!ctx->more && ctx->used)
-		goto unlock;
-
-	if (!size)
-		goto done;
-
-	if (!aead_writable(sk)) {
-		err = aead_wait_for_wmem(sk, flags);
-		if (err)
-			goto unlock;
-	}
-
-	err = aead_alloc_tsgl(sk);
-	if (err)
-		goto unlock;
-
-	ctx->merge = 0;
-	sgl = list_entry(ctx->tsgl_list.prev, struct aead_tsgl, list);
-
-	if (sgl->cur)
-		sg_unmark_end(sgl->sg + sgl->cur - 1);
-
-	sg_mark_end(sgl->sg + sgl->cur);
-
-	get_page(page);
-	sg_set_page(sgl->sg + sgl->cur, page, size, offset);
-	sgl->cur++;
-	ctx->used += size;
-
-	err = 0;
 
-done:
-	ctx->more = flags & MSG_MORE;
-unlock:
-	aead_data_wakeup(sk);
-	release_sock(sk);
-
-	return err ?: size;
-}
-
-static void aead_async_cb(struct crypto_async_request *_req, int err)
-{
-	struct aead_async_req *areq = _req->data;
-	struct sock *sk = areq->sk;
-	struct kiocb *iocb = areq->iocb;
-	unsigned int resultlen;
-
-	lock_sock(sk);
-
-	/* Buffer size written by crypto operation. */
-	resultlen = areq->outlen;
-
-	aead_free_areq_sgls(areq);
-	sock_kfree_s(sk, areq, areq->areqlen);
-	__sock_put(sk);
-
-	iocb->ki_complete(iocb, err ? err : resultlen, 0);
-
-	release_sock(sk);
+	return af_alg_sendmsg(sock, msg, size, ivsize);
 }
 
 static int crypto_aead_copy_sgl(struct crypto_skcipher *null_tfm,
@@ -648,16 +97,13 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 	struct alg_sock *ask = alg_sk(sk);
 	struct sock *psk = ask->parent;
 	struct alg_sock *pask = alg_sk(psk);
-	struct aead_ctx *ctx = ask->private;
+	struct af_alg_ctx *ctx = ask->private;
 	struct aead_tfm *aeadc = pask->private;
 	struct crypto_aead *tfm = aeadc->aead;
 	struct crypto_skcipher *null_tfm = aeadc->null_tfm;
 	unsigned int as = crypto_aead_authsize(tfm);
-	unsigned int areqlen =
-		sizeof(struct aead_async_req) + crypto_aead_reqsize(tfm);
-	struct aead_async_req *areq;
-	struct aead_rsgl *last_rsgl = NULL;
-	struct aead_tsgl *tsgl;
+	struct af_alg_async_req *areq;
+	struct af_alg_tsgl *tsgl;
 	struct scatterlist *src;
 	int err = 0;
 	size_t used = 0;		/* [in]  TX bufs to be en/decrypted */
@@ -703,61 +149,15 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 	used -= ctx->aead_assoclen;
 
 	/* Allocate cipher request for current operation. */
-	areq = sock_kmalloc(sk, areqlen, GFP_KERNEL);
-	if (unlikely(!areq))
-		return -ENOMEM;
-	areq->areqlen = areqlen;
-	areq->sk = sk;
-	INIT_LIST_HEAD(&areq->rsgl_list);
-	areq->tsgl = NULL;
-	areq->tsgl_entries = 0;
+	areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) +
+				     crypto_aead_reqsize(tfm));
+	if (IS_ERR(areq))
+		return PTR_ERR(areq);
 
 	/* convert iovecs of output buffers into RX SGL */
-	while (outlen > usedpages && msg_data_left(msg)) {
-		struct aead_rsgl *rsgl;
-		size_t seglen;
-
-		/* limit the amount of readable buffers */
-		if (!aead_readable(sk))
-			break;
-
-		if (!ctx->used) {
-			err = aead_wait_for_data(sk, flags);
-			if (err)
-				goto free;
-		}
-
-		seglen = min_t(size_t, (outlen - usedpages),
-			       msg_data_left(msg));
-
-		if (list_empty(&areq->rsgl_list)) {
-			rsgl = &areq->first_rsgl;
-		} else {
-			rsgl = sock_kmalloc(sk, sizeof(*rsgl), GFP_KERNEL);
-			if (unlikely(!rsgl)) {
-				err = -ENOMEM;
-				goto free;
-			}
-		}
-
-		rsgl->sgl.npages = 0;
-		list_add_tail(&rsgl->list, &areq->rsgl_list);
-
-		/* make one iovec available as scatterlist */
-		err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen);
-		if (err < 0)
-			goto free;
-
-		/* chain the new scatterlist with previous one */
-		if (last_rsgl)
-			af_alg_link_sg(&last_rsgl->sgl, &rsgl->sgl);
-
-		last_rsgl = rsgl;
-		usedpages += err;
-		ctx->rcvused += err;
-		rsgl->sg_num_bytes = err;
-		iov_iter_advance(&msg->msg_iter, err);
-	}
+	err = af_alg_get_rsgl(sk, msg, flags, areq, outlen, &usedpages);
+	if (err)
+		goto free;
 
 	/*
 	 * Ensure output buffer is sufficiently large. If the caller provides
@@ -778,7 +178,7 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 	}
 
 	processed = used + ctx->aead_assoclen;
-	tsgl = list_first_entry(&ctx->tsgl_list, struct aead_tsgl, list);
+	tsgl = list_first_entry(&ctx->tsgl_list, struct af_alg_tsgl, list);
 
 	/*
 	 * Copy of AAD from source to destination
@@ -811,7 +211,7 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 					   areq->first_rsgl.sgl.sg, processed);
 		if (err)
 			goto free;
-		aead_pull_tsgl(sk, processed, NULL, 0);
+		af_alg_pull_tsgl(sk, processed, NULL, 0);
 	} else {
 		/*
 		 * Decryption operation - To achieve an in-place cipher
@@ -831,8 +231,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 			goto free;
 
 		/* Create TX SGL for tag and chain it to RX SGL. */
-		areq->tsgl_entries = aead_count_tsgl(sk, processed,
-						     processed - as);
+		areq->tsgl_entries = af_alg_count_tsgl(sk, processed,
+						       processed - as);
 		if (!areq->tsgl_entries)
 			areq->tsgl_entries = 1;
 		areq->tsgl = sock_kmalloc(sk, sizeof(*areq->tsgl) *
@@ -845,12 +245,12 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 		sg_init_table(areq->tsgl, areq->tsgl_entries);
 
 		/* Release TX SGL, except for tag data and reassign tag data. */
-		aead_pull_tsgl(sk, processed, areq->tsgl, processed - as);
+		af_alg_pull_tsgl(sk, processed, areq->tsgl, processed - as);
 
 		/* chain the areq TX SGL holding the tag with RX SGL */
-		if (last_rsgl) {
+		if (usedpages) {
 			/* RX SGL present */
-			struct af_alg_sgl *sgl_prev = &last_rsgl->sgl;
+			struct af_alg_sgl *sgl_prev = &areq->last_rsgl->sgl;
 
 			sg_unmark_end(sgl_prev->sg + sgl_prev->npages - 1);
 			sg_chain(sgl_prev->sg, sgl_prev->npages + 1,
@@ -861,28 +261,28 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 	}
 
 	/* Initialize the crypto operation */
-	aead_request_set_crypt(&areq->aead_req, src,
+	aead_request_set_crypt(&areq->cra_u.aead_req, src,
 			       areq->first_rsgl.sgl.sg, used, ctx->iv);
-	aead_request_set_ad(&areq->aead_req, ctx->aead_assoclen);
-	aead_request_set_tfm(&areq->aead_req, tfm);
+	aead_request_set_ad(&areq->cra_u.aead_req, ctx->aead_assoclen);
+	aead_request_set_tfm(&areq->cra_u.aead_req, tfm);
 
 	if (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) {
 		/* AIO operation */
 		areq->iocb = msg->msg_iocb;
-		aead_request_set_callback(&areq->aead_req,
+		aead_request_set_callback(&areq->cra_u.aead_req,
 					  CRYPTO_TFM_REQ_MAY_BACKLOG,
-					  aead_async_cb, areq);
-		err = ctx->enc ? crypto_aead_encrypt(&areq->aead_req) :
-				 crypto_aead_decrypt(&areq->aead_req);
+					  af_alg_async_cb, areq);
+		err = ctx->enc ? crypto_aead_encrypt(&areq->cra_u.aead_req) :
+				 crypto_aead_decrypt(&areq->cra_u.aead_req);
 	} else {
 		/* Synchronous operation */
-		aead_request_set_callback(&areq->aead_req,
+		aead_request_set_callback(&areq->cra_u.aead_req,
 					  CRYPTO_TFM_REQ_MAY_BACKLOG,
 					  af_alg_complete, &ctx->completion);
 		err = af_alg_wait_for_completion(ctx->enc ?
-					 crypto_aead_encrypt(&areq->aead_req) :
-					 crypto_aead_decrypt(&areq->aead_req),
-					 &ctx->completion);
+				crypto_aead_encrypt(&areq->cra_u.aead_req) :
+				crypto_aead_decrypt(&areq->cra_u.aead_req),
+						 &ctx->completion);
 	}
 
 	/* AIO operation in progress */
@@ -896,9 +296,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 	}
 
 free:
-	aead_free_areq_sgls(areq);
-	if (areq)
-		sock_kfree_s(sk, areq, areqlen);
+	af_alg_free_areq_sgls(areq);
+	sock_kfree_s(sk, areq, areq->areqlen);
 
 	return err ? err : outlen;
 }
@@ -931,31 +330,11 @@ static int aead_recvmsg(struct socket *sock, struct msghdr *msg,
 	}
 
 out:
-	aead_wmem_wakeup(sk);
+	af_alg_wmem_wakeup(sk);
 	release_sock(sk);
 	return ret;
 }
 
-static unsigned int aead_poll(struct file *file, struct socket *sock,
-			      poll_table *wait)
-{
-	struct sock *sk = sock->sk;
-	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	unsigned int mask;
-
-	sock_poll_wait(file, sk_sleep(sk), wait);
-	mask = 0;
-
-	if (!ctx->more)
-		mask |= POLLIN | POLLRDNORM;
-
-	if (aead_writable(sk))
-		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
-
-	return mask;
-}
-
 static struct proto_ops algif_aead_ops = {
 	.family		=	PF_ALG,
 
@@ -973,9 +352,9 @@ static struct proto_ops algif_aead_ops = {
 
 	.release	=	af_alg_release,
 	.sendmsg	=	aead_sendmsg,
-	.sendpage	=	aead_sendpage,
+	.sendpage	=	af_alg_sendpage,
 	.recvmsg	=	aead_recvmsg,
-	.poll		=	aead_poll,
+	.poll		=	af_alg_poll,
 };
 
 static int aead_check_key(struct socket *sock)
@@ -1037,7 +416,7 @@ static ssize_t aead_sendpage_nokey(struct socket *sock, struct page *page,
 	if (err)
 		return err;
 
-	return aead_sendpage(sock, page, offset, size, flags);
+	return af_alg_sendpage(sock, page, offset, size, flags);
 }
 
 static int aead_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
@@ -1071,7 +450,7 @@ static struct proto_ops algif_aead_ops_nokey = {
 	.sendmsg	=	aead_sendmsg_nokey,
 	.sendpage	=	aead_sendpage_nokey,
 	.recvmsg	=	aead_recvmsg_nokey,
-	.poll		=	aead_poll,
+	.poll		=	af_alg_poll,
 };
 
 static void *aead_bind(const char *name, u32 type, u32 mask)
@@ -1132,14 +511,14 @@ static int aead_setkey(void *private, const u8 *key, unsigned int keylen)
 static void aead_sock_destruct(struct sock *sk)
 {
 	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
+	struct af_alg_ctx *ctx = ask->private;
 	struct sock *psk = ask->parent;
 	struct alg_sock *pask = alg_sk(psk);
 	struct aead_tfm *aeadc = pask->private;
 	struct crypto_aead *tfm = aeadc->aead;
 	unsigned int ivlen = crypto_aead_ivsize(tfm);
 
-	aead_pull_tsgl(sk, ctx->used, NULL, 0);
+	af_alg_pull_tsgl(sk, ctx->used, NULL, 0);
 	crypto_put_default_null_skcipher2();
 	sock_kzfree_s(sk, ctx->iv, ivlen);
 	sock_kfree_s(sk, ctx, ctx->len);
@@ -1148,7 +527,7 @@ static void aead_sock_destruct(struct sock *sk)
 
 static int aead_accept_parent_nokey(void *private, struct sock *sk)
 {
-	struct aead_ctx *ctx;
+	struct af_alg_ctx *ctx;
 	struct alg_sock *ask = alg_sk(sk);
 	struct aead_tfm *tfm = private;
 	struct crypto_aead *aead = tfm->aead;
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index ce3b5fba22799..8ae4170aaeb4f 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -33,320 +33,16 @@
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/kernel.h>
-#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/net.h>
 #include <net/sock.h>
 
-struct skcipher_tsgl {
-	struct list_head list;
-	int cur;
-	struct scatterlist sg[0];
-};
-
-struct skcipher_rsgl {
-	struct af_alg_sgl sgl;
-	struct list_head list;
-	size_t sg_num_bytes;
-};
-
-struct skcipher_async_req {
-	struct kiocb *iocb;
-	struct sock *sk;
-
-	struct skcipher_rsgl first_sgl;
-	struct list_head rsgl_list;
-
-	struct scatterlist *tsgl;
-	unsigned int tsgl_entries;
-
-	unsigned int areqlen;
-	struct skcipher_request req;
-};
-
 struct skcipher_tfm {
 	struct crypto_skcipher *skcipher;
 	bool has_key;
 };
 
-struct skcipher_ctx {
-	struct list_head tsgl_list;
-
-	void *iv;
-
-	struct af_alg_completion completion;
-
-	size_t used;
-	size_t rcvused;
-
-	bool more;
-	bool merge;
-	bool enc;
-
-	unsigned int len;
-};
-
-#define MAX_SGL_ENTS ((4096 - sizeof(struct skcipher_tsgl)) / \
-		      sizeof(struct scatterlist) - 1)
-
-static inline int skcipher_sndbuf(struct sock *sk)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-
-	return max_t(int, max_t(int, sk->sk_sndbuf & PAGE_MASK, PAGE_SIZE) -
-			  ctx->used, 0);
-}
-
-static inline bool skcipher_writable(struct sock *sk)
-{
-	return PAGE_SIZE <= skcipher_sndbuf(sk);
-}
-
-static inline int skcipher_rcvbuf(struct sock *sk)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-
-	return max_t(int, max_t(int, sk->sk_rcvbuf & PAGE_MASK, PAGE_SIZE) -
-			  ctx->rcvused, 0);
-}
-
-static inline bool skcipher_readable(struct sock *sk)
-{
-	return PAGE_SIZE <= skcipher_rcvbuf(sk);
-}
-
-static int skcipher_alloc_tsgl(struct sock *sk)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	struct skcipher_tsgl *sgl;
-	struct scatterlist *sg = NULL;
-
-	sgl = list_entry(ctx->tsgl_list.prev, struct skcipher_tsgl, list);
-	if (!list_empty(&ctx->tsgl_list))
-		sg = sgl->sg;
-
-	if (!sg || sgl->cur >= MAX_SGL_ENTS) {
-		sgl = sock_kmalloc(sk, sizeof(*sgl) +
-				       sizeof(sgl->sg[0]) * (MAX_SGL_ENTS + 1),
-				   GFP_KERNEL);
-		if (!sgl)
-			return -ENOMEM;
-
-		sg_init_table(sgl->sg, MAX_SGL_ENTS + 1);
-		sgl->cur = 0;
-
-		if (sg)
-			sg_chain(sg, MAX_SGL_ENTS + 1, sgl->sg);
-
-		list_add_tail(&sgl->list, &ctx->tsgl_list);
-	}
-
-	return 0;
-}
-
-static unsigned int skcipher_count_tsgl(struct sock *sk, size_t bytes)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	struct skcipher_tsgl *sgl, *tmp;
-	unsigned int i;
-	unsigned int sgl_count = 0;
-
-	if (!bytes)
-		return 0;
-
-	list_for_each_entry_safe(sgl, tmp, &ctx->tsgl_list, list) {
-		struct scatterlist *sg = sgl->sg;
-
-		for (i = 0; i < sgl->cur; i++) {
-			sgl_count++;
-			if (sg[i].length >= bytes)
-				return sgl_count;
-
-			bytes -= sg[i].length;
-		}
-	}
-
-	return sgl_count;
-}
-
-static void skcipher_pull_tsgl(struct sock *sk, size_t used,
-			       struct scatterlist *dst)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	struct skcipher_tsgl *sgl;
-	struct scatterlist *sg;
-	unsigned int i;
-
-	while (!list_empty(&ctx->tsgl_list)) {
-		sgl = list_first_entry(&ctx->tsgl_list, struct skcipher_tsgl,
-				       list);
-		sg = sgl->sg;
-
-		for (i = 0; i < sgl->cur; i++) {
-			size_t plen = min_t(size_t, used, sg[i].length);
-			struct page *page = sg_page(sg + i);
-
-			if (!page)
-				continue;
-
-			/*
-			 * Assumption: caller created skcipher_count_tsgl(len)
-			 * SG entries in dst.
-			 */
-			if (dst)
-				sg_set_page(dst + i, page, plen, sg[i].offset);
-
-			sg[i].length -= plen;
-			sg[i].offset += plen;
-
-			used -= plen;
-			ctx->used -= plen;
-
-			if (sg[i].length)
-				return;
-
-			if (!dst)
-				put_page(page);
-			sg_assign_page(sg + i, NULL);
-		}
-
-		list_del(&sgl->list);
-		sock_kfree_s(sk, sgl, sizeof(*sgl) + sizeof(sgl->sg[0]) *
-						     (MAX_SGL_ENTS + 1));
-	}
-
-	if (!ctx->used)
-		ctx->merge = 0;
-}
-
-static void skcipher_free_areq_sgls(struct skcipher_async_req *areq)
-{
-	struct sock *sk = areq->sk;
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	struct skcipher_rsgl *rsgl, *tmp;
-	struct scatterlist *tsgl;
-	struct scatterlist *sg;
-	unsigned int i;
-
-	list_for_each_entry_safe(rsgl, tmp, &areq->rsgl_list, list) {
-		ctx->rcvused -= rsgl->sg_num_bytes;
-		af_alg_free_sg(&rsgl->sgl);
-		list_del(&rsgl->list);
-		if (rsgl != &areq->first_sgl)
-			sock_kfree_s(sk, rsgl, sizeof(*rsgl));
-	}
-
-	tsgl = areq->tsgl;
-	for_each_sg(tsgl, sg, areq->tsgl_entries, i) {
-		if (!sg_page(sg))
-			continue;
-		put_page(sg_page(sg));
-	}
-
-	if (areq->tsgl && areq->tsgl_entries)
-		sock_kfree_s(sk, tsgl, areq->tsgl_entries * sizeof(*tsgl));
-}
-
-static int skcipher_wait_for_wmem(struct sock *sk, unsigned flags)
-{
-	DEFINE_WAIT_FUNC(wait, woken_wake_function);
-	int err = -ERESTARTSYS;
-	long timeout;
-
-	if (flags & MSG_DONTWAIT)
-		return -EAGAIN;
-
-	sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
-
-	add_wait_queue(sk_sleep(sk), &wait);
-	for (;;) {
-		if (signal_pending(current))
-			break;
-		timeout = MAX_SCHEDULE_TIMEOUT;
-		if (sk_wait_event(sk, &timeout, skcipher_writable(sk), &wait)) {
-			err = 0;
-			break;
-		}
-	}
-	remove_wait_queue(sk_sleep(sk), &wait);
-
-	return err;
-}
-
-static void skcipher_wmem_wakeup(struct sock *sk)
-{
-	struct socket_wq *wq;
-
-	if (!skcipher_writable(sk))
-		return;
-
-	rcu_read_lock();
-	wq = rcu_dereference(sk->sk_wq);
-	if (skwq_has_sleeper(wq))
-		wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
-							   POLLRDNORM |
-							   POLLRDBAND);
-	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
-	rcu_read_unlock();
-}
-
-static int skcipher_wait_for_data(struct sock *sk, unsigned flags)
-{
-	DEFINE_WAIT_FUNC(wait, woken_wake_function);
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	long timeout;
-	int err = -ERESTARTSYS;
-
-	if (flags & MSG_DONTWAIT) {
-		return -EAGAIN;
-	}
-
-	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-
-	add_wait_queue(sk_sleep(sk), &wait);
-	for (;;) {
-		if (signal_pending(current))
-			break;
-		timeout = MAX_SCHEDULE_TIMEOUT;
-		if (sk_wait_event(sk, &timeout, ctx->used, &wait)) {
-			err = 0;
-			break;
-		}
-	}
-	remove_wait_queue(sk_sleep(sk), &wait);
-
-	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-
-	return err;
-}
-
-static void skcipher_data_wakeup(struct sock *sk)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	struct socket_wq *wq;
-
-	if (!ctx->used)
-		return;
-
-	rcu_read_lock();
-	wq = rcu_dereference(sk->sk_wq);
-	if (skwq_has_sleeper(wq))
-		wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
-							   POLLRDNORM |
-							   POLLRDBAND);
-	sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
-	rcu_read_unlock();
-}
-
 static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
 			    size_t size)
 {
@@ -354,208 +50,11 @@ static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
 	struct alg_sock *ask = alg_sk(sk);
 	struct sock *psk = ask->parent;
 	struct alg_sock *pask = alg_sk(psk);
-	struct skcipher_ctx *ctx = ask->private;
 	struct skcipher_tfm *skc = pask->private;
 	struct crypto_skcipher *tfm = skc->skcipher;
 	unsigned ivsize = crypto_skcipher_ivsize(tfm);
-	struct skcipher_tsgl *sgl;
-	struct af_alg_control con = {};
-	long copied = 0;
-	bool enc = 0;
-	bool init = 0;
-	int err;
-	int i;
-
-	if (msg->msg_controllen) {
-		err = af_alg_cmsg_send(msg, &con);
-		if (err)
-			return err;
-
-		init = 1;
-		switch (con.op) {
-		case ALG_OP_ENCRYPT:
-			enc = 1;
-			break;
-		case ALG_OP_DECRYPT:
-			enc = 0;
-			break;
-		default:
-			return -EINVAL;
-		}
-
-		if (con.iv && con.iv->ivlen != ivsize)
-			return -EINVAL;
-	}
-
-	err = -EINVAL;
-
-	lock_sock(sk);
-	if (!ctx->more && ctx->used)
-		goto unlock;
-
-	if (init) {
-		ctx->enc = enc;
-		if (con.iv)
-			memcpy(ctx->iv, con.iv->iv, ivsize);
-	}
-
-	while (size) {
-		struct scatterlist *sg;
-		unsigned long len = size;
-		size_t plen;
-
-		if (ctx->merge) {
-			sgl = list_entry(ctx->tsgl_list.prev,
-					 struct skcipher_tsgl, list);
-			sg = sgl->sg + sgl->cur - 1;
-			len = min_t(unsigned long, len,
-				    PAGE_SIZE - sg->offset - sg->length);
-
-			err = memcpy_from_msg(page_address(sg_page(sg)) +
-					      sg->offset + sg->length,
-					      msg, len);
-			if (err)
-				goto unlock;
-
-			sg->length += len;
-			ctx->merge = (sg->offset + sg->length) &
-				     (PAGE_SIZE - 1);
-
-			ctx->used += len;
-			copied += len;
-			size -= len;
-			continue;
-		}
-
-		if (!skcipher_writable(sk)) {
-			err = skcipher_wait_for_wmem(sk, msg->msg_flags);
-			if (err)
-				goto unlock;
-		}
-
-		len = min_t(unsigned long, len, skcipher_sndbuf(sk));
-
-		err = skcipher_alloc_tsgl(sk);
-		if (err)
-			goto unlock;
-
-		sgl = list_entry(ctx->tsgl_list.prev, struct skcipher_tsgl,
-				 list);
-		sg = sgl->sg;
-		if (sgl->cur)
-			sg_unmark_end(sg + sgl->cur - 1);
-		do {
-			i = sgl->cur;
-			plen = min_t(size_t, len, PAGE_SIZE);
-
-			sg_assign_page(sg + i, alloc_page(GFP_KERNEL));
-			err = -ENOMEM;
-			if (!sg_page(sg + i))
-				goto unlock;
-
-			err = memcpy_from_msg(page_address(sg_page(sg + i)),
-					      msg, plen);
-			if (err) {
-				__free_page(sg_page(sg + i));
-				sg_assign_page(sg + i, NULL);
-				goto unlock;
-			}
-
-			sg[i].length = plen;
-			len -= plen;
-			ctx->used += plen;
-			copied += plen;
-			size -= plen;
-			sgl->cur++;
-		} while (len && sgl->cur < MAX_SGL_ENTS);
-
-		if (!size)
-			sg_mark_end(sg + sgl->cur - 1);
-
-		ctx->merge = plen & (PAGE_SIZE - 1);
-	}
-
-	err = 0;
-
-	ctx->more = msg->msg_flags & MSG_MORE;
-
-unlock:
-	skcipher_data_wakeup(sk);
-	release_sock(sk);
 
-	return copied ?: err;
-}
-
-static ssize_t skcipher_sendpage(struct socket *sock, struct page *page,
-				 int offset, size_t size, int flags)
-{
-	struct sock *sk = sock->sk;
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	struct skcipher_tsgl *sgl;
-	int err = -EINVAL;
-
-	if (flags & MSG_SENDPAGE_NOTLAST)
-		flags |= MSG_MORE;
-
-	lock_sock(sk);
-	if (!ctx->more && ctx->used)
-		goto unlock;
-
-	if (!size)
-		goto done;
-
-	if (!skcipher_writable(sk)) {
-		err = skcipher_wait_for_wmem(sk, flags);
-		if (err)
-			goto unlock;
-	}
-
-	err = skcipher_alloc_tsgl(sk);
-	if (err)
-		goto unlock;
-
-	ctx->merge = 0;
-	sgl = list_entry(ctx->tsgl_list.prev, struct skcipher_tsgl, list);
-
-	if (sgl->cur)
-		sg_unmark_end(sgl->sg + sgl->cur - 1);
-
-	sg_mark_end(sgl->sg + sgl->cur);
-	get_page(page);
-	sg_set_page(sgl->sg + sgl->cur, page, size, offset);
-	sgl->cur++;
-	ctx->used += size;
-
-done:
-	ctx->more = flags & MSG_MORE;
-
-unlock:
-	skcipher_data_wakeup(sk);
-	release_sock(sk);
-
-	return err ?: size;
-}
-
-static void skcipher_async_cb(struct crypto_async_request *req, int err)
-{
-	struct skcipher_async_req *areq = req->data;
-	struct sock *sk = areq->sk;
-	struct kiocb *iocb = areq->iocb;
-	unsigned int resultlen;
-
-	lock_sock(sk);
-
-	/* Buffer size written by crypto operation. */
-	resultlen = areq->req.cryptlen;
-
-	skcipher_free_areq_sgls(areq);
-	sock_kfree_s(sk, areq, areq->areqlen);
-	__sock_put(sk);
-
-	iocb->ki_complete(iocb, err ? err : resultlen, 0);
-
-	release_sock(sk);
+	return af_alg_sendmsg(sock, msg, size, ivsize);
 }
 
 static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
@@ -565,72 +64,24 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
 	struct alg_sock *ask = alg_sk(sk);
 	struct sock *psk = ask->parent;
 	struct alg_sock *pask = alg_sk(psk);
-	struct skcipher_ctx *ctx = ask->private;
+	struct af_alg_ctx *ctx = ask->private;
 	struct skcipher_tfm *skc = pask->private;
 	struct crypto_skcipher *tfm = skc->skcipher;
 	unsigned int bs = crypto_skcipher_blocksize(tfm);
-	unsigned int areqlen = sizeof(struct skcipher_async_req) +
-			       crypto_skcipher_reqsize(tfm);
-	struct skcipher_async_req *areq;
-	struct skcipher_rsgl *last_rsgl = NULL;
+	struct af_alg_async_req *areq;
 	int err = 0;
 	size_t len = 0;
 
 	/* Allocate cipher request for current operation. */
-	areq = sock_kmalloc(sk, areqlen, GFP_KERNEL);
-	if (unlikely(!areq))
-		return -ENOMEM;
-	areq->areqlen = areqlen;
-	areq->sk = sk;
-	INIT_LIST_HEAD(&areq->rsgl_list);
-	areq->tsgl = NULL;
-	areq->tsgl_entries = 0;
+	areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) +
+				     crypto_skcipher_reqsize(tfm));
+	if (IS_ERR(areq))
+		return PTR_ERR(areq);
 
 	/* convert iovecs of output buffers into RX SGL */
-	while (msg_data_left(msg)) {
-		struct skcipher_rsgl *rsgl;
-		size_t seglen;
-
-		/* limit the amount of readable buffers */
-		if (!skcipher_readable(sk))
-			break;
-
-		if (!ctx->used) {
-			err = skcipher_wait_for_data(sk, flags);
-			if (err)
-				goto free;
-		}
-
-		seglen = min_t(size_t, ctx->used, msg_data_left(msg));
-
-		if (list_empty(&areq->rsgl_list)) {
-			rsgl = &areq->first_sgl;
-		} else {
-			rsgl = sock_kmalloc(sk, sizeof(*rsgl), GFP_KERNEL);
-			if (!rsgl) {
-				err = -ENOMEM;
-				goto free;
-			}
-		}
-
-		rsgl->sgl.npages = 0;
-		list_add_tail(&rsgl->list, &areq->rsgl_list);
-
-		/* make one iovec available as scatterlist */
-		err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen);
-		if (err < 0)
-			goto free;
-
-		/* chain the new scatterlist with previous one */
-		if (last_rsgl)
-			af_alg_link_sg(&last_rsgl->sgl, &rsgl->sgl);
-
-		last_rsgl = rsgl;
-		len += err;
-		ctx->rcvused += err;
-		rsgl->sg_num_bytes = err;
-		iov_iter_advance(&msg->msg_iter, err);
-	}
+	err = af_alg_get_rsgl(sk, msg, flags, areq, -1, &len);
+	if (err)
+		goto free;
 
 	/* Process only as much RX buffers for which we have TX data */
 	if (len > ctx->used)
@@ -647,7 +98,7 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
 	 * Create a per request TX SGL for this request which tracks the
 	 * SG entries from the global TX SGL.
 	 */
-	areq->tsgl_entries = skcipher_count_tsgl(sk, len);
+	areq->tsgl_entries = af_alg_count_tsgl(sk, len, 0);
 	if (!areq->tsgl_entries)
 		areq->tsgl_entries = 1;
 	areq->tsgl = sock_kmalloc(sk, sizeof(*areq->tsgl) * areq->tsgl_entries,
@@ -657,44 +108,48 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
 		goto free;
 	}
 	sg_init_table(areq->tsgl, areq->tsgl_entries);
-	skcipher_pull_tsgl(sk, len, areq->tsgl);
+	af_alg_pull_tsgl(sk, len, areq->tsgl, 0);
 
 	/* Initialize the crypto operation */
-	skcipher_request_set_tfm(&areq->req, tfm);
-	skcipher_request_set_crypt(&areq->req, areq->tsgl,
-				   areq->first_sgl.sgl.sg, len, ctx->iv);
+	skcipher_request_set_tfm(&areq->cra_u.skcipher_req, tfm);
+	skcipher_request_set_crypt(&areq->cra_u.skcipher_req, areq->tsgl,
+				   areq->first_rsgl.sgl.sg, len, ctx->iv);
 
 	if (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) {
 		/* AIO operation */
 		areq->iocb = msg->msg_iocb;
-		skcipher_request_set_callback(&areq->req,
+		skcipher_request_set_callback(&areq->cra_u.skcipher_req,
 					      CRYPTO_TFM_REQ_MAY_SLEEP,
-					      skcipher_async_cb, areq);
-		err = ctx->enc ? crypto_skcipher_encrypt(&areq->req) :
-				 crypto_skcipher_decrypt(&areq->req);
+					      af_alg_async_cb, areq);
+		err = ctx->enc ?
+			crypto_skcipher_encrypt(&areq->cra_u.skcipher_req) :
+			crypto_skcipher_decrypt(&areq->cra_u.skcipher_req);
 	} else {
 		/* Synchronous operation */
-		skcipher_request_set_callback(&areq->req,
+		skcipher_request_set_callback(&areq->cra_u.skcipher_req,
 					      CRYPTO_TFM_REQ_MAY_SLEEP |
 					      CRYPTO_TFM_REQ_MAY_BACKLOG,
 					      af_alg_complete,
 					      &ctx->completion);
 		err = af_alg_wait_for_completion(ctx->enc ?
-					crypto_skcipher_encrypt(&areq->req) :
-					crypto_skcipher_decrypt(&areq->req),
+			crypto_skcipher_encrypt(&areq->cra_u.skcipher_req) :
+			crypto_skcipher_decrypt(&areq->cra_u.skcipher_req),
 						 &ctx->completion);
 	}
 
 	/* AIO operation in progress */
 	if (err == -EINPROGRESS) {
 		sock_hold(sk);
+
+		/* Remember output size that will be generated. */
+		areq->outlen = len;
+
 		return -EIOCBQUEUED;
 	}
 
 free:
-	skcipher_free_areq_sgls(areq);
-	if (areq)
-		sock_kfree_s(sk, areq, areqlen);
+	af_alg_free_areq_sgls(areq);
+	sock_kfree_s(sk, areq, areq->areqlen);
 
 	return err ? err : len;
 }
@@ -727,30 +182,11 @@ static int skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
 	}
 
 out:
-	skcipher_wmem_wakeup(sk);
+	af_alg_wmem_wakeup(sk);
 	release_sock(sk);
 	return ret;
 }
 
-static unsigned int skcipher_poll(struct file *file, struct socket *sock,
-				  poll_table *wait)
-{
-	struct sock *sk = sock->sk;
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	unsigned int mask;
-
-	sock_poll_wait(file, sk_sleep(sk), wait);
-	mask = 0;
-
-	if (ctx->used)
-		mask |= POLLIN | POLLRDNORM;
-
-	if (skcipher_writable(sk))
-		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
-
-	return mask;
-}
 
 static struct proto_ops algif_skcipher_ops = {
 	.family		=	PF_ALG,
@@ -769,9 +205,9 @@ static struct proto_ops algif_skcipher_ops = {
 
 	.release	=	af_alg_release,
 	.sendmsg	=	skcipher_sendmsg,
-	.sendpage	=	skcipher_sendpage,
+	.sendpage	=	af_alg_sendpage,
 	.recvmsg	=	skcipher_recvmsg,
-	.poll		=	skcipher_poll,
+	.poll		=	af_alg_poll,
 };
 
 static int skcipher_check_key(struct socket *sock)
@@ -833,7 +269,7 @@ static ssize_t skcipher_sendpage_nokey(struct socket *sock, struct page *page,
 	if (err)
 		return err;
 
-	return skcipher_sendpage(sock, page, offset, size, flags);
+	return af_alg_sendpage(sock, page, offset, size, flags);
 }
 
 static int skcipher_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
@@ -867,7 +303,7 @@ static struct proto_ops algif_skcipher_ops_nokey = {
 	.sendmsg	=	skcipher_sendmsg_nokey,
 	.sendpage	=	skcipher_sendpage_nokey,
 	.recvmsg	=	skcipher_recvmsg_nokey,
-	.poll		=	skcipher_poll,
+	.poll		=	af_alg_poll,
 };
 
 static void *skcipher_bind(const char *name, u32 type, u32 mask)
@@ -912,13 +348,13 @@ static int skcipher_setkey(void *private, const u8 *key, unsigned int keylen)
 static void skcipher_sock_destruct(struct sock *sk)
 {
 	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
+	struct af_alg_ctx *ctx = ask->private;
 	struct sock *psk = ask->parent;
 	struct alg_sock *pask = alg_sk(psk);
 	struct skcipher_tfm *skc = pask->private;
 	struct crypto_skcipher *tfm = skc->skcipher;
 
-	skcipher_pull_tsgl(sk, ctx->used, NULL);
+	af_alg_pull_tsgl(sk, ctx->used, NULL, 0);
 	sock_kzfree_s(sk, ctx->iv, crypto_skcipher_ivsize(tfm));
 	sock_kfree_s(sk, ctx, ctx->len);
 	af_alg_release_parent(sk);
@@ -926,7 +362,7 @@ static void skcipher_sock_destruct(struct sock *sk)
 
 static int skcipher_accept_parent_nokey(void *private, struct sock *sk)
 {
-	struct skcipher_ctx *ctx;
+	struct af_alg_ctx *ctx;
 	struct alg_sock *ask = alg_sk(sk);
 	struct skcipher_tfm *tfm = private;
 	struct crypto_skcipher *skcipher = tfm->skcipher;
diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
index e2b9c6fe27149..75ec9c662268b 100644
--- a/include/crypto/if_alg.h
+++ b/include/crypto/if_alg.h
@@ -20,6 +20,9 @@
 #include <linux/types.h>
 #include <net/sock.h>
 
+#include <crypto/aead.h>
+#include <crypto/skcipher.h>
+
 #define ALG_MAX_PAGES			16
 
 struct crypto_async_request;
@@ -68,6 +71,99 @@ struct af_alg_sgl {
 	unsigned int npages;
 };
 
+/* TX SGL entry */
+struct af_alg_tsgl {
+	struct list_head list;
+	unsigned int cur;		/* Last processed SG entry */
+	struct scatterlist sg[0];	/* Array of SGs forming the SGL */
+};
+
+#define MAX_SGL_ENTS ((4096 - sizeof(struct af_alg_tsgl)) / \
+		      sizeof(struct scatterlist) - 1)
+
+/* RX SGL entry */
+struct af_alg_rsgl {
+	struct af_alg_sgl sgl;
+	struct list_head list;
+	size_t sg_num_bytes;		/* Bytes of data in that SGL */
+};
+
+/**
+ * struct af_alg_async_req - definition of crypto request
+ * @iocb:		IOCB for AIO operations
+ * @sk:			Socket the request is associated with
+ * @first_rsgl:		First RX SG
+ * @last_rsgl:		Pointer to last RX SG
+ * @rsgl_list:		Track RX SGs
+ * @tsgl:		Private, per request TX SGL of buffers to process
+ * @tsgl_entries:	Number of entries in priv. TX SGL
+ * @outlen:		Number of output bytes generated by crypto op
+ * @areqlen:		Length of this data structure
+ * @cra_u:		Cipher request
+ */
+struct af_alg_async_req {
+	struct kiocb *iocb;
+	struct sock *sk;
+
+	struct af_alg_rsgl first_rsgl;
+	struct af_alg_rsgl *last_rsgl;
+	struct list_head rsgl_list;
+
+	struct scatterlist *tsgl;
+	unsigned int tsgl_entries;
+
+	unsigned int outlen;
+	unsigned int areqlen;
+
+	union {
+		struct aead_request aead_req;
+		struct skcipher_request skcipher_req;
+	} cra_u;
+
+	/* req ctx trails this struct */
+};
+
+/**
+ * struct af_alg_ctx - definition of the crypto context
+ *
+ * The crypto context tracks the input data during the lifetime of an AF_ALG
+ * socket.
+ *
+ * @tsgl_list:		Link to TX SGL
+ * @iv:			IV for cipher operation
+ * @aead_assoclen:	Length of AAD for AEAD cipher operations
+ * @completion:		Work queue for synchronous operation
+ * @used:		TX bytes sent to kernel. This variable is used to
+ *			ensure that user space cannot cause the kernel
+ *			to allocate too much memory in sendmsg operation.
+ * @rcvused:		Total RX bytes to be filled by kernel. This variable
+ *			is used to ensure user space cannot cause the kernel
+ *			to allocate too much memory in a recvmsg operation.
+ * @more:		More data to be expected from user space?
+ * @merge:		Shall new data from user space be merged into existing
+ *			SG?
+ * @enc:		Cryptographic operation to be performed when
+ *			recvmsg is invoked.
+ * @len:		Length of memory allocated for this data structure.
+ */
+struct af_alg_ctx {
+	struct list_head tsgl_list;
+
+	void *iv;
+	size_t aead_assoclen;
+
+	struct af_alg_completion completion;
+
+	size_t used;
+	size_t rcvused;
+
+	bool more;
+	bool merge;
+	bool enc;
+
+	unsigned int len;
+};
+
 int af_alg_register_type(const struct af_alg_type *type);
 int af_alg_unregister_type(const struct af_alg_type *type);
 
@@ -94,4 +190,78 @@ static inline void af_alg_init_completion(struct af_alg_completion *completion)
 	init_completion(&completion->completion);
 }
 
+/**
+ * Size of available buffer for sending data from user space to kernel.
+ *
+ * @sk socket of connection to user space
+ * @return number of bytes still available
+ */
+static inline int af_alg_sndbuf(struct sock *sk)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+
+	return max_t(int, max_t(int, sk->sk_sndbuf & PAGE_MASK, PAGE_SIZE) -
+			  ctx->used, 0);
+}
+
+/**
+ * Can the send buffer still be written to?
+ *
+ * @sk socket of connection to user space
+ * @return true => writable, false => not writable
+ */
+static inline bool af_alg_writable(struct sock *sk)
+{
+	return PAGE_SIZE <= af_alg_sndbuf(sk);
+}
+
+/**
+ * Size of available buffer used by kernel for the RX user space operation.
+ *
+ * @sk socket of connection to user space
+ * @return number of bytes still available
+ */
+static inline int af_alg_rcvbuf(struct sock *sk)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+
+	return max_t(int, max_t(int, sk->sk_rcvbuf & PAGE_MASK, PAGE_SIZE) -
+			  ctx->rcvused, 0);
+}
+
+/**
+ * Can the RX buffer still be written to?
+ *
+ * @sk socket of connection to user space
+ * @return true => writable, false => not writable
+ */
+static inline bool af_alg_readable(struct sock *sk)
+{
+	return PAGE_SIZE <= af_alg_rcvbuf(sk);
+}
+
+int af_alg_alloc_tsgl(struct sock *sk);
+unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes, size_t offset);
+void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst,
+		      size_t dst_offset);
+void af_alg_free_areq_sgls(struct af_alg_async_req *areq);
+int af_alg_wait_for_wmem(struct sock *sk, unsigned int flags);
+void af_alg_wmem_wakeup(struct sock *sk);
+int af_alg_wait_for_data(struct sock *sk, unsigned flags);
+void af_alg_data_wakeup(struct sock *sk);
+int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
+		   unsigned int ivsize);
+ssize_t af_alg_sendpage(struct socket *sock, struct page *page,
+			int offset, size_t size, int flags);
+void af_alg_async_cb(struct crypto_async_request *_req, int err);
+unsigned int af_alg_poll(struct file *file, struct socket *sock,
+			 poll_table *wait);
+struct af_alg_async_req *af_alg_alloc_areq(struct sock *sk,
+					   unsigned int areqlen);
+int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
+		    struct af_alg_async_req *areq, size_t maxsize,
+		    size_t *outlen);
+
 #endif	/* _CRYPTO_IF_ALG_H */

From 1e3204104cf7f608e90bc824523aaa8129ea944a Mon Sep 17 00:00:00 2001
From: Mogens Lauridsen <mlauridsen171@gmail.com>
Date: Thu, 3 Aug 2017 15:34:12 +0200
Subject: [PATCH 102/116] crypto: sahara - Fix dma unmap direction

The direction used in dma_unmap_sg in aes calc is wrong.
This result in the cache not being invalidated correct when aes
calculation is done and result has been dma'ed to memory.
This is seen as sporadic wrong result from aes calc.

Signed-off-by: Mogens Lauridsen <mlauridsen171@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/sahara.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c
index c2174ec88e2a8..474da36bdd2c7 100644
--- a/drivers/crypto/sahara.c
+++ b/drivers/crypto/sahara.c
@@ -543,10 +543,10 @@ static int sahara_hw_descriptor_create(struct sahara_dev *dev)
 
 unmap_out:
 	dma_unmap_sg(dev->device, dev->out_sg, dev->nb_out_sg,
-		DMA_TO_DEVICE);
+		DMA_FROM_DEVICE);
 unmap_in:
 	dma_unmap_sg(dev->device, dev->in_sg, dev->nb_in_sg,
-		DMA_FROM_DEVICE);
+		DMA_TO_DEVICE);
 
 	return -EINVAL;
 }
@@ -594,9 +594,9 @@ static int sahara_aes_process(struct ablkcipher_request *req)
 	}
 
 	dma_unmap_sg(dev->device, dev->out_sg, dev->nb_out_sg,
-		DMA_TO_DEVICE);
-	dma_unmap_sg(dev->device, dev->in_sg, dev->nb_in_sg,
 		DMA_FROM_DEVICE);
+	dma_unmap_sg(dev->device, dev->in_sg, dev->nb_in_sg,
+		DMA_TO_DEVICE);
 
 	return 0;
 }

From 67adc432f424063cb82dfcb56fa883c7d8f7e9e5 Mon Sep 17 00:00:00 2001
From: Mogens Lauridsen <mlauridsen171@gmail.com>
Date: Thu, 3 Aug 2017 15:46:35 +0200
Subject: [PATCH 103/116] crypto: sahara - Remove leftover from previous used
 spinlock

This driver previously used a spinlock. The spinlock is not
used any more, but the spinlock variable was still there
and also being initialized.

Signed-off-by: Mogens Lauridsen <mlauridsen171@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/sahara.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c
index 474da36bdd2c7..03a8abad7e76a 100644
--- a/drivers/crypto/sahara.c
+++ b/drivers/crypto/sahara.c
@@ -202,7 +202,6 @@ struct sahara_dev {
 	struct completion	dma_completion;
 
 	struct sahara_ctx	*ctx;
-	spinlock_t		lock;
 	struct crypto_queue	queue;
 	unsigned long		flags;
 
@@ -1487,7 +1486,6 @@ static int sahara_probe(struct platform_device *pdev)
 
 	crypto_init_queue(&dev->queue, SAHARA_QUEUE_LENGTH);
 
-	spin_lock_init(&dev->lock);
 	mutex_init(&dev->queue_mutex);
 
 	dev_ptr = dev;

From dea632cadd129d1901efde6b1167a6250b2157fb Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Mon, 7 Aug 2017 10:31:20 -0700
Subject: [PATCH 104/116] lib/mpi: fix build with clang

Use just @ to denote comments which works with gcc and clang.
Otherwise clang reports an escape sequence error:
  error: invalid % escape in inline assembly string

Use %0-%3 as operand references, this avoids:
  error: invalid operand in inline asm: 'umull ${1:r}, ${0:r}, ${2:r}, ${3:r}'

Also remove superfluous casts on output operands to avoid warnings
such as:
  warning: invalid use of a cast in an inline asm context requiring an l-value

Signed-off-by: Stefan Agner <stefan@agner.ch>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 lib/mpi/longlong.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h
index 93336502af089..57fd45ab7af1a 100644
--- a/lib/mpi/longlong.h
+++ b/lib/mpi/longlong.h
@@ -176,8 +176,8 @@ extern UDItype __udiv_qrnnd(UDItype *, UDItype, UDItype, UDItype);
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 	__asm__ ("adds %1, %4, %5\n" \
 		"adc  %0, %2, %3" \
-	: "=r" ((USItype)(sh)), \
-		"=&r" ((USItype)(sl)) \
+	: "=r" (sh), \
+		"=&r" (sl) \
 	: "%r" ((USItype)(ah)), \
 		"rI" ((USItype)(bh)), \
 		"%r" ((USItype)(al)), \
@@ -185,15 +185,15 @@ extern UDItype __udiv_qrnnd(UDItype *, UDItype, UDItype, UDItype);
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 	__asm__ ("subs %1, %4, %5\n" \
 		"sbc  %0, %2, %3" \
-	: "=r" ((USItype)(sh)), \
-		"=&r" ((USItype)(sl)) \
+	: "=r" (sh), \
+		"=&r" (sl) \
 	: "r" ((USItype)(ah)), \
 		"rI" ((USItype)(bh)), \
 		"r" ((USItype)(al)), \
 		"rI" ((USItype)(bl)))
 #if defined __ARM_ARCH_2__ || defined __ARM_ARCH_3__
 #define umul_ppmm(xh, xl, a, b) \
-	__asm__ ("%@ Inlined umul_ppmm\n" \
+	__asm__ ("@ Inlined umul_ppmm\n" \
 		"mov	%|r0, %2, lsr #16		@ AAAA\n" \
 		"mov	%|r2, %3, lsr #16		@ BBBB\n" \
 		"bic	%|r1, %2, %|r0, lsl #16		@ aaaa\n" \
@@ -206,19 +206,19 @@ extern UDItype __udiv_qrnnd(UDItype *, UDItype, UDItype, UDItype);
 		"addcs	%|r2, %|r2, #65536\n" \
 		"adds	%1, %|r1, %|r0, lsl #16\n" \
 		"adc	%0, %|r2, %|r0, lsr #16" \
-	: "=&r" ((USItype)(xh)), \
-		"=r" ((USItype)(xl)) \
+	: "=&r" (xh), \
+		"=r" (xl) \
 	: "r" ((USItype)(a)), \
 		"r" ((USItype)(b)) \
 	: "r0", "r1", "r2")
 #else
 #define umul_ppmm(xh, xl, a, b) \
-	__asm__ ("%@ Inlined umul_ppmm\n" \
-		"umull %r1, %r0, %r2, %r3" \
-	: "=&r" ((USItype)(xh)), \
-			"=&r" ((USItype)(xl)) \
+	__asm__ ("@ Inlined umul_ppmm\n" \
+		"umull %1, %0, %2, %3" \
+	: "=&r" (xh), \
+		"=&r" (xl) \
 	: "r" ((USItype)(a)), \
-			"r" ((USItype)(b)) \
+		"r" ((USItype)(b)) \
 	: "r0", "r1")
 #endif
 #define UMUL_TIME 20

From ef4064bb3f6a1037cbda8d025f564aab5bd2de35 Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Tue, 8 Aug 2017 21:42:47 +0800
Subject: [PATCH 105/116] crypto: ccp - use dma_mapping_error to check map
 error

The return value of dma_map_single() should be checked by
dma_mapping_error(). However, in function ccp_init_dm_workarea(), its
return value is checked against NULL, which could result in failures.

Signed-off-by: Pan Bian <bianpan2016@163.com>
Acked-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-ops.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index 804924e48c898..406b95329b3d2 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -168,7 +168,7 @@ static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
 
 		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
 						 dir);
-		if (!wa->dma.address)
+		if (dma_mapping_error(wa->dev, wa->dma.address))
 			return -ENOMEM;
 
 		wa->dma.length = len;

From 75d11e75357c1c643f65b478214008a5bf0230cb Mon Sep 17 00:00:00 2001
From: Stephan Mueller <smueller@chronox.de>
Date: Wed, 9 Aug 2017 16:20:00 +0200
Subject: [PATCH 106/116] crypto: algif_aead - fix comment regarding memory
 layout

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algif_aead.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 48d46e74ed0d3..516b38c3a1695 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -201,11 +201,11 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 		 * Encryption operation - The in-place cipher operation is
 		 * achieved by the following operation:
 		 *
-		 * TX SGL: AAD || PT || Tag
+		 * TX SGL: AAD || PT
 		 *	    |	   |
 		 *	    | copy |
 		 *	    v	   v
-		 * RX SGL: AAD || PT
+		 * RX SGL: AAD || PT || Tag
 		 */
 		err = crypto_aead_copy_sgl(null_tfm, tsgl->sg,
 					   areq->first_rsgl.sgl.sg, processed);

From ac6b6f4531bb0d113ad8ba8c08247df793271a96 Mon Sep 17 00:00:00 2001
From: Lars Persson <lars.persson@axis.com>
Date: Thu, 10 Aug 2017 14:53:51 +0200
Subject: [PATCH 107/116] dt-bindings: crypto: add ARTPEC crypto

Document the device tree bindings for the ARTPEC crypto accelerator on
ARTPEC-6 and ARTPEC-7 SoCs.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Lars Persson <larper@axis.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../bindings/crypto/artpec6-crypto.txt           | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/crypto/artpec6-crypto.txt

diff --git a/Documentation/devicetree/bindings/crypto/artpec6-crypto.txt b/Documentation/devicetree/bindings/crypto/artpec6-crypto.txt
new file mode 100644
index 0000000000000..d9cca4875bd64
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/artpec6-crypto.txt
@@ -0,0 +1,16 @@
+Axis crypto engine with PDMA interface.
+
+Required properties:
+- compatible : Should be one of the following strings:
+	"axis,artpec6-crypto" for the version in the Axis ARTPEC-6 SoC
+	"axis,artpec7-crypto" for the version in the Axis ARTPEC-7 SoC.
+- reg: Base address and size for the PDMA register area.
+- interrupts: Interrupt handle for the PDMA interrupt line.
+
+Example:
+
+crypto@f4264000 {
+	compatible = "axis,artpec6-crypto";
+	reg = <0xf4264000 0x1000>;
+	interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
+};

From 6f7473c524cc4f875dcd9397ec9a6ec039bd08b6 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabinv@axis.com>
Date: Thu, 10 Aug 2017 14:53:52 +0200
Subject: [PATCH 108/116] crypto: hash - add crypto_(un)register_ahashes()

There are already helpers to (un)register multiple normal
and AEAD algos.  Add one for ahashes too.

Signed-off-by: Lars Persson <larper@axis.com>
Signed-off-by: Rabin Vincent <rabinv@axis.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/ahash.c                 | 29 +++++++++++++++++++++++++++++
 include/crypto/internal/hash.h |  2 ++
 2 files changed, 31 insertions(+)

diff --git a/crypto/ahash.c b/crypto/ahash.c
index 826cd7ab4d4a2..5e8666e6ccaea 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -588,6 +588,35 @@ int crypto_unregister_ahash(struct ahash_alg *alg)
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_ahash);
 
+int crypto_register_ahashes(struct ahash_alg *algs, int count)
+{
+	int i, ret;
+
+	for (i = 0; i < count; i++) {
+		ret = crypto_register_ahash(&algs[i]);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	for (--i; i >= 0; --i)
+		crypto_unregister_ahash(&algs[i]);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_register_ahashes);
+
+void crypto_unregister_ahashes(struct ahash_alg *algs, int count)
+{
+	int i;
+
+	for (i = count - 1; i >= 0; --i)
+		crypto_unregister_ahash(&algs[i]);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_ahashes);
+
 int ahash_register_instance(struct crypto_template *tmpl,
 			    struct ahash_instance *inst)
 {
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index f6d9af3efa45a..f0b44c16e88f2 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -76,6 +76,8 @@ static inline int crypto_ahash_walk_last(struct crypto_hash_walk *walk)
 
 int crypto_register_ahash(struct ahash_alg *alg);
 int crypto_unregister_ahash(struct ahash_alg *alg);
+int crypto_register_ahashes(struct ahash_alg *algs, int count);
+void crypto_unregister_ahashes(struct ahash_alg *algs, int count);
 int ahash_register_instance(struct crypto_template *tmpl,
 			    struct ahash_instance *inst);
 void ahash_free_instance(struct crypto_instance *inst);

From a21eb94fc4d3c6472de53bd30a543ec06eaf8914 Mon Sep 17 00:00:00 2001
From: Lars Persson <lars.persson@axis.com>
Date: Thu, 10 Aug 2017 14:53:53 +0200
Subject: [PATCH 109/116] crypto: axis - add ARTPEC-6/7 crypto accelerator
 driver

This is an asynchronous crypto API driver for the accelerator present
in the ARTPEC-6 and -7 SoCs from Axis Communications AB.

The driver supports AES in ECB/CTR/CBC/XTS/GCM modes and SHA1/2 hash
standards.

Signed-off-by: Lars Persson <larper@axis.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Kconfig               |   21 +
 drivers/crypto/Makefile              |    1 +
 drivers/crypto/axis/Makefile         |    1 +
 drivers/crypto/axis/artpec6_crypto.c | 3192 ++++++++++++++++++++++++++
 4 files changed, 3215 insertions(+)
 create mode 100644 drivers/crypto/axis/Makefile
 create mode 100644 drivers/crypto/axis/artpec6_crypto.c

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 5b5393f1b87ab..fe33c199fc1a5 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -708,4 +708,25 @@ config CRYPTO_DEV_SAFEXCEL
 	  chain mode, AES cipher mode and SHA1/SHA224/SHA256/SHA512 hash
 	  algorithms.
 
+config CRYPTO_DEV_ARTPEC6
+	tristate "Support for Axis ARTPEC-6/7 hardware crypto acceleration."
+	depends on ARM && (ARCH_ARTPEC || COMPILE_TEST)
+	depends on HAS_DMA
+	depends on OF
+	select CRYPTO_AEAD
+	select CRYPTO_AES
+	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_CTR
+	select CRYPTO_HASH
+	select CRYPTO_SHA1
+	select CRYPTO_SHA256
+	select CRYPTO_SHA384
+	select CRYPTO_SHA512
+	help
+	  Enables the driver for the on-chip crypto accelerator
+	  of Axis ARTPEC SoCs.
+
+	  To compile this driver as a module, choose M here.
+
 endif # CRYPTO_HW
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index b12eb3c994302..808432b44c6bb 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -44,3 +44,4 @@ obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/
 obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
 obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/
 obj-$(CONFIG_CRYPTO_DEV_SAFEXCEL) += inside-secure/
+obj-$(CONFIG_CRYPTO_DEV_ARTPEC6) += axis/
diff --git a/drivers/crypto/axis/Makefile b/drivers/crypto/axis/Makefile
new file mode 100644
index 0000000000000..be9a84a4b6672
--- /dev/null
+++ b/drivers/crypto/axis/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_CRYPTO_DEV_ARTPEC6) := artpec6_crypto.o
diff --git a/drivers/crypto/axis/artpec6_crypto.c b/drivers/crypto/axis/artpec6_crypto.c
new file mode 100644
index 0000000000000..d9fbbf01062bc
--- /dev/null
+++ b/drivers/crypto/axis/artpec6_crypto.c
@@ -0,0 +1,3192 @@
+/*
+ *   Driver for ARTPEC-6 crypto block using the kernel asynchronous crypto api.
+ *
+ *    Copyright (C) 2014-2017  Axis Communications AB
+ */
+#define pr_fmt(fmt)     KBUILD_MODNAME ": " fmt
+
+#include <linux/bitfield.h>
+#include <linux/crypto.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/fault-inject.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+
+#include <crypto/aes.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/sha.h>
+#include <crypto/xts.h>
+
+/* Max length of a line in all cache levels for Artpec SoCs. */
+#define ARTPEC_CACHE_LINE_MAX	32
+
+#define PDMA_OUT_CFG		0x0000
+#define PDMA_OUT_BUF_CFG	0x0004
+#define PDMA_OUT_CMD		0x0008
+#define PDMA_OUT_DESCRQ_PUSH	0x0010
+#define PDMA_OUT_DESCRQ_STAT	0x0014
+
+#define A6_PDMA_IN_CFG		0x0028
+#define A6_PDMA_IN_BUF_CFG	0x002c
+#define A6_PDMA_IN_CMD		0x0030
+#define A6_PDMA_IN_STATQ_PUSH	0x0038
+#define A6_PDMA_IN_DESCRQ_PUSH	0x0044
+#define A6_PDMA_IN_DESCRQ_STAT	0x0048
+#define A6_PDMA_INTR_MASK	0x0068
+#define A6_PDMA_ACK_INTR	0x006c
+#define A6_PDMA_MASKED_INTR	0x0074
+
+#define A7_PDMA_IN_CFG		0x002c
+#define A7_PDMA_IN_BUF_CFG	0x0030
+#define A7_PDMA_IN_CMD		0x0034
+#define A7_PDMA_IN_STATQ_PUSH	0x003c
+#define A7_PDMA_IN_DESCRQ_PUSH	0x0048
+#define A7_PDMA_IN_DESCRQ_STAT	0x004C
+#define A7_PDMA_INTR_MASK	0x006c
+#define A7_PDMA_ACK_INTR	0x0070
+#define A7_PDMA_MASKED_INTR	0x0078
+
+#define PDMA_OUT_CFG_EN				BIT(0)
+
+#define PDMA_OUT_BUF_CFG_DATA_BUF_SIZE		GENMASK(4, 0)
+#define PDMA_OUT_BUF_CFG_DESCR_BUF_SIZE		GENMASK(9, 5)
+
+#define PDMA_OUT_CMD_START			BIT(0)
+#define A6_PDMA_OUT_CMD_STOP			BIT(3)
+#define A7_PDMA_OUT_CMD_STOP			BIT(2)
+
+#define PDMA_OUT_DESCRQ_PUSH_LEN		GENMASK(5, 0)
+#define PDMA_OUT_DESCRQ_PUSH_ADDR		GENMASK(31, 6)
+
+#define PDMA_OUT_DESCRQ_STAT_LEVEL		GENMASK(3, 0)
+#define PDMA_OUT_DESCRQ_STAT_SIZE		GENMASK(7, 4)
+
+#define PDMA_IN_CFG_EN				BIT(0)
+
+#define PDMA_IN_BUF_CFG_DATA_BUF_SIZE		GENMASK(4, 0)
+#define PDMA_IN_BUF_CFG_DESCR_BUF_SIZE		GENMASK(9, 5)
+#define PDMA_IN_BUF_CFG_STAT_BUF_SIZE		GENMASK(14, 10)
+
+#define PDMA_IN_CMD_START			BIT(0)
+#define A6_PDMA_IN_CMD_FLUSH_STAT		BIT(2)
+#define A6_PDMA_IN_CMD_STOP			BIT(3)
+#define A7_PDMA_IN_CMD_FLUSH_STAT		BIT(1)
+#define A7_PDMA_IN_CMD_STOP			BIT(2)
+
+#define PDMA_IN_STATQ_PUSH_LEN			GENMASK(5, 0)
+#define PDMA_IN_STATQ_PUSH_ADDR			GENMASK(31, 6)
+
+#define PDMA_IN_DESCRQ_PUSH_LEN			GENMASK(5, 0)
+#define PDMA_IN_DESCRQ_PUSH_ADDR		GENMASK(31, 6)
+
+#define PDMA_IN_DESCRQ_STAT_LEVEL		GENMASK(3, 0)
+#define PDMA_IN_DESCRQ_STAT_SIZE		GENMASK(7, 4)
+
+#define A6_PDMA_INTR_MASK_IN_DATA		BIT(2)
+#define A6_PDMA_INTR_MASK_IN_EOP		BIT(3)
+#define A6_PDMA_INTR_MASK_IN_EOP_FLUSH		BIT(4)
+
+#define A7_PDMA_INTR_MASK_IN_DATA		BIT(3)
+#define A7_PDMA_INTR_MASK_IN_EOP		BIT(4)
+#define A7_PDMA_INTR_MASK_IN_EOP_FLUSH		BIT(5)
+
+#define A6_CRY_MD_OPER		GENMASK(19, 16)
+
+#define A6_CRY_MD_HASH_SEL_CTX	GENMASK(21, 20)
+#define A6_CRY_MD_HASH_HMAC_FIN	BIT(23)
+
+#define A6_CRY_MD_CIPHER_LEN	GENMASK(21, 20)
+#define A6_CRY_MD_CIPHER_DECR	BIT(22)
+#define A6_CRY_MD_CIPHER_TWEAK	BIT(23)
+#define A6_CRY_MD_CIPHER_DSEQ	BIT(24)
+
+#define A7_CRY_MD_OPER		GENMASK(11, 8)
+
+#define A7_CRY_MD_HASH_SEL_CTX	GENMASK(13, 12)
+#define A7_CRY_MD_HASH_HMAC_FIN	BIT(15)
+
+#define A7_CRY_MD_CIPHER_LEN	GENMASK(13, 12)
+#define A7_CRY_MD_CIPHER_DECR	BIT(14)
+#define A7_CRY_MD_CIPHER_TWEAK	BIT(15)
+#define A7_CRY_MD_CIPHER_DSEQ	BIT(16)
+
+/* DMA metadata constants */
+#define regk_crypto_aes_cbc     0x00000002
+#define regk_crypto_aes_ctr     0x00000003
+#define regk_crypto_aes_ecb     0x00000001
+#define regk_crypto_aes_gcm     0x00000004
+#define regk_crypto_aes_xts     0x00000005
+#define regk_crypto_cache       0x00000002
+#define a6_regk_crypto_dlkey    0x0000000a
+#define a7_regk_crypto_dlkey    0x0000000e
+#define regk_crypto_ext         0x00000001
+#define regk_crypto_hmac_sha1   0x00000007
+#define regk_crypto_hmac_sha256 0x00000009
+#define regk_crypto_hmac_sha384 0x0000000b
+#define regk_crypto_hmac_sha512 0x0000000d
+#define regk_crypto_init        0x00000000
+#define regk_crypto_key_128     0x00000000
+#define regk_crypto_key_192     0x00000001
+#define regk_crypto_key_256     0x00000002
+#define regk_crypto_null        0x00000000
+#define regk_crypto_sha1        0x00000006
+#define regk_crypto_sha256      0x00000008
+#define regk_crypto_sha384      0x0000000a
+#define regk_crypto_sha512      0x0000000c
+
+/* DMA descriptor structures */
+struct pdma_descr_ctrl  {
+	unsigned char short_descr : 1;
+	unsigned char pad1        : 1;
+	unsigned char eop         : 1;
+	unsigned char intr        : 1;
+	unsigned char short_len   : 3;
+	unsigned char pad2        : 1;
+} __packed;
+
+struct pdma_data_descr {
+	unsigned int len : 24;
+	unsigned int buf : 32;
+} __packed;
+
+struct pdma_short_descr {
+	unsigned char data[7];
+} __packed;
+
+struct pdma_descr {
+	struct pdma_descr_ctrl ctrl;
+	union {
+		struct pdma_data_descr   data;
+		struct pdma_short_descr  shrt;
+	};
+};
+
+struct pdma_stat_descr {
+	unsigned char pad1        : 1;
+	unsigned char pad2        : 1;
+	unsigned char eop         : 1;
+	unsigned char pad3        : 5;
+	unsigned int  len         : 24;
+};
+
+/* Each descriptor array can hold max 64 entries */
+#define PDMA_DESCR_COUNT	64
+
+#define MODULE_NAME   "Artpec-6 CA"
+
+/* Hash modes (including HMAC variants) */
+#define ARTPEC6_CRYPTO_HASH_SHA1	1
+#define ARTPEC6_CRYPTO_HASH_SHA256	2
+#define ARTPEC6_CRYPTO_HASH_SHA384	3
+#define ARTPEC6_CRYPTO_HASH_SHA512	4
+
+/* Crypto modes */
+#define ARTPEC6_CRYPTO_CIPHER_AES_ECB	1
+#define ARTPEC6_CRYPTO_CIPHER_AES_CBC	2
+#define ARTPEC6_CRYPTO_CIPHER_AES_CTR	3
+#define ARTPEC6_CRYPTO_CIPHER_AES_XTS	5
+
+/* The PDMA is a DMA-engine tightly coupled with a ciphering engine.
+ * It operates on a descriptor array with up to 64 descriptor entries.
+ * The arrays must be 64 byte aligned in memory.
+ *
+ * The ciphering unit has no registers and is completely controlled by
+ * a 4-byte metadata that is inserted at the beginning of each dma packet.
+ *
+ * A dma packet is a sequence of descriptors terminated by setting the .eop
+ * field in the final descriptor of the packet.
+ *
+ * Multiple packets are used for providing context data, key data and
+ * the plain/ciphertext.
+ *
+ *   PDMA Descriptors (Array)
+ *  +------+------+------+~~+-------+------+----
+ *  |  0   |  1   |  2   |~~| 11 EOP|  12  |  ....
+ *  +--+---+--+---+----+-+~~+-------+----+-+----
+ *     |      |        |       |         |
+ *     |      |        |       |         |
+ *   __|__  +-------++-------++-------+ +----+
+ *  | MD  | |Payload||Payload||Payload| | MD |
+ *  +-----+ +-------++-------++-------+ +----+
+ */
+
+struct artpec6_crypto_bounce_buffer {
+	struct list_head list;
+	size_t length;
+	struct scatterlist *sg;
+	size_t offset;
+	/* buf is aligned to ARTPEC_CACHE_LINE_MAX and
+	 * holds up to ARTPEC_CACHE_LINE_MAX bytes data.
+	 */
+	void *buf;
+};
+
+struct artpec6_crypto_dma_map {
+	dma_addr_t dma_addr;
+	size_t size;
+	enum dma_data_direction dir;
+};
+
+struct artpec6_crypto_dma_descriptors {
+	struct pdma_descr out[PDMA_DESCR_COUNT] __aligned(64);
+	struct pdma_descr in[PDMA_DESCR_COUNT] __aligned(64);
+	u32 stat[PDMA_DESCR_COUNT] __aligned(64);
+	struct list_head bounce_buffers;
+	/* Enough maps for all out/in buffers, and all three descr. arrays */
+	struct artpec6_crypto_dma_map maps[PDMA_DESCR_COUNT * 2 + 2];
+	dma_addr_t out_dma_addr;
+	dma_addr_t in_dma_addr;
+	dma_addr_t stat_dma_addr;
+	size_t out_cnt;
+	size_t in_cnt;
+	size_t map_count;
+};
+
+enum artpec6_crypto_variant {
+	ARTPEC6_CRYPTO,
+	ARTPEC7_CRYPTO,
+};
+
+struct artpec6_crypto {
+	void __iomem *base;
+	spinlock_t queue_lock;
+	struct list_head queue; /* waiting for pdma fifo space */
+	struct list_head pending; /* submitted to pdma fifo */
+	struct tasklet_struct task;
+	struct kmem_cache *dma_cache;
+	int pending_count;
+	struct timer_list timer;
+	enum artpec6_crypto_variant variant;
+	void *pad_buffer; /* cache-aligned block padding buffer */
+	void *zero_buffer;
+};
+
+enum artpec6_crypto_hash_flags {
+	HASH_FLAG_INIT_CTX = 2,
+	HASH_FLAG_UPDATE = 4,
+	HASH_FLAG_FINALIZE = 8,
+	HASH_FLAG_HMAC = 16,
+	HASH_FLAG_UPDATE_KEY = 32,
+};
+
+struct artpec6_crypto_req_common {
+	struct list_head list;
+	struct artpec6_crypto_dma_descriptors *dma;
+	struct crypto_async_request *req;
+	void (*complete)(struct crypto_async_request *req);
+	gfp_t gfp_flags;
+};
+
+struct artpec6_hash_request_context {
+	char partial_buffer[SHA512_BLOCK_SIZE];
+	char partial_buffer_out[SHA512_BLOCK_SIZE];
+	char key_buffer[SHA512_BLOCK_SIZE];
+	char pad_buffer[SHA512_BLOCK_SIZE + 32];
+	unsigned char digeststate[SHA512_DIGEST_SIZE];
+	size_t partial_bytes;
+	u64 digcnt;
+	u32 key_md;
+	u32 hash_md;
+	enum artpec6_crypto_hash_flags hash_flags;
+	struct artpec6_crypto_req_common common;
+};
+
+struct artpec6_hash_export_state {
+	char partial_buffer[SHA512_BLOCK_SIZE];
+	unsigned char digeststate[SHA512_DIGEST_SIZE];
+	size_t partial_bytes;
+	u64 digcnt;
+	int oper;
+	unsigned int hash_flags;
+};
+
+struct artpec6_hashalg_context {
+	char hmac_key[SHA512_BLOCK_SIZE];
+	size_t hmac_key_length;
+	struct crypto_shash *child_hash;
+};
+
+struct artpec6_crypto_request_context {
+	u32 cipher_md;
+	bool decrypt;
+	struct artpec6_crypto_req_common common;
+};
+
+struct artpec6_cryptotfm_context {
+	unsigned char aes_key[2*AES_MAX_KEY_SIZE];
+	size_t key_length;
+	u32 key_md;
+	int crypto_type;
+	struct crypto_skcipher *fallback;
+};
+
+struct artpec6_crypto_aead_hw_ctx {
+	__be64	aad_length_bits;
+	__be64  text_length_bits;
+	__u8	J0[AES_BLOCK_SIZE];
+};
+
+struct artpec6_crypto_aead_req_ctx {
+	struct artpec6_crypto_aead_hw_ctx hw_ctx;
+	u32 cipher_md;
+	bool decrypt;
+	struct artpec6_crypto_req_common common;
+	__u8 decryption_tag[AES_BLOCK_SIZE] ____cacheline_aligned;
+};
+
+/* The crypto framework makes it hard to avoid this global. */
+static struct device *artpec6_crypto_dev;
+
+static struct dentry *dbgfs_root;
+
+#ifdef CONFIG_FAULT_INJECTION
+static DECLARE_FAULT_ATTR(artpec6_crypto_fail_status_read);
+static DECLARE_FAULT_ATTR(artpec6_crypto_fail_dma_array_full);
+#endif
+
+enum {
+	ARTPEC6_CRYPTO_PREPARE_HASH_NO_START,
+	ARTPEC6_CRYPTO_PREPARE_HASH_START,
+};
+
+static int artpec6_crypto_prepare_aead(struct aead_request *areq);
+static int artpec6_crypto_prepare_crypto(struct skcipher_request *areq);
+static int artpec6_crypto_prepare_hash(struct ahash_request *areq);
+
+static void
+artpec6_crypto_complete_crypto(struct crypto_async_request *req);
+static void
+artpec6_crypto_complete_cbc_encrypt(struct crypto_async_request *req);
+static void
+artpec6_crypto_complete_cbc_decrypt(struct crypto_async_request *req);
+static void
+artpec6_crypto_complete_aead(struct crypto_async_request *req);
+static void
+artpec6_crypto_complete_hash(struct crypto_async_request *req);
+
+static int
+artpec6_crypto_common_destroy(struct artpec6_crypto_req_common *common);
+
+static void
+artpec6_crypto_start_dma(struct artpec6_crypto_req_common *common);
+
+struct artpec6_crypto_walk {
+	struct scatterlist *sg;
+	size_t offset;
+};
+
+static void artpec6_crypto_walk_init(struct artpec6_crypto_walk *awalk,
+				     struct scatterlist *sg)
+{
+	awalk->sg = sg;
+	awalk->offset = 0;
+}
+
+static size_t artpec6_crypto_walk_advance(struct artpec6_crypto_walk *awalk,
+					  size_t nbytes)
+{
+	while (nbytes && awalk->sg) {
+		size_t piece;
+
+		WARN_ON(awalk->offset > awalk->sg->length);
+
+		piece = min(nbytes, (size_t)awalk->sg->length - awalk->offset);
+		nbytes -= piece;
+		awalk->offset += piece;
+		if (awalk->offset == awalk->sg->length) {
+			awalk->sg = sg_next(awalk->sg);
+			awalk->offset = 0;
+		}
+
+	}
+
+	return nbytes;
+}
+
+static size_t
+artpec6_crypto_walk_chunklen(const struct artpec6_crypto_walk *awalk)
+{
+	WARN_ON(awalk->sg->length == awalk->offset);
+
+	return awalk->sg->length - awalk->offset;
+}
+
+static dma_addr_t
+artpec6_crypto_walk_chunk_phys(const struct artpec6_crypto_walk *awalk)
+{
+	return sg_phys(awalk->sg) + awalk->offset;
+}
+
+static void
+artpec6_crypto_copy_bounce_buffers(struct artpec6_crypto_req_common *common)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	struct artpec6_crypto_bounce_buffer *b;
+	struct artpec6_crypto_bounce_buffer *next;
+
+	list_for_each_entry_safe(b, next, &dma->bounce_buffers, list) {
+		pr_debug("bounce entry %p: %zu bytes @ %zu from %p\n",
+			 b, b->length, b->offset, b->buf);
+		sg_pcopy_from_buffer(b->sg,
+				   1,
+				   b->buf,
+				   b->length,
+				   b->offset);
+
+		list_del(&b->list);
+		kfree(b);
+	}
+}
+
+static inline bool artpec6_crypto_busy(void)
+{
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	int fifo_count = ac->pending_count;
+
+	return fifo_count > 6;
+}
+
+static int artpec6_crypto_submit(struct artpec6_crypto_req_common *req)
+{
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	int ret = -EBUSY;
+
+	spin_lock_bh(&ac->queue_lock);
+
+	if (!artpec6_crypto_busy()) {
+		list_add_tail(&req->list, &ac->pending);
+		artpec6_crypto_start_dma(req);
+		ret = -EINPROGRESS;
+	} else if (req->req->flags & CRYPTO_TFM_REQ_MAY_BACKLOG) {
+		list_add_tail(&req->list, &ac->queue);
+	} else {
+		artpec6_crypto_common_destroy(req);
+	}
+
+	spin_unlock_bh(&ac->queue_lock);
+
+	return ret;
+}
+
+static void artpec6_crypto_start_dma(struct artpec6_crypto_req_common *common)
+{
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	enum artpec6_crypto_variant variant = ac->variant;
+	void __iomem *base = ac->base;
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	u32 ind, statd, outd;
+
+	/* Make descriptor content visible to the DMA before starting it. */
+	wmb();
+
+	ind = FIELD_PREP(PDMA_IN_DESCRQ_PUSH_LEN, dma->in_cnt - 1) |
+	      FIELD_PREP(PDMA_IN_DESCRQ_PUSH_ADDR, dma->in_dma_addr >> 6);
+
+	statd = FIELD_PREP(PDMA_IN_STATQ_PUSH_LEN, dma->in_cnt - 1) |
+		FIELD_PREP(PDMA_IN_STATQ_PUSH_ADDR, dma->stat_dma_addr >> 6);
+
+	outd = FIELD_PREP(PDMA_OUT_DESCRQ_PUSH_LEN, dma->out_cnt - 1) |
+	       FIELD_PREP(PDMA_OUT_DESCRQ_PUSH_ADDR, dma->out_dma_addr >> 6);
+
+	if (variant == ARTPEC6_CRYPTO) {
+		writel_relaxed(ind, base + A6_PDMA_IN_DESCRQ_PUSH);
+		writel_relaxed(statd, base + A6_PDMA_IN_STATQ_PUSH);
+		writel_relaxed(PDMA_IN_CMD_START, base + A6_PDMA_IN_CMD);
+	} else {
+		writel_relaxed(ind, base + A7_PDMA_IN_DESCRQ_PUSH);
+		writel_relaxed(statd, base + A7_PDMA_IN_STATQ_PUSH);
+		writel_relaxed(PDMA_IN_CMD_START, base + A7_PDMA_IN_CMD);
+	}
+
+	writel_relaxed(outd, base + PDMA_OUT_DESCRQ_PUSH);
+	writel_relaxed(PDMA_OUT_CMD_START, base + PDMA_OUT_CMD);
+
+	ac->pending_count++;
+}
+
+static void
+artpec6_crypto_init_dma_operation(struct artpec6_crypto_req_common *common)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+
+	dma->out_cnt = 0;
+	dma->in_cnt = 0;
+	dma->map_count = 0;
+	INIT_LIST_HEAD(&dma->bounce_buffers);
+}
+
+static bool fault_inject_dma_descr(void)
+{
+#ifdef CONFIG_FAULT_INJECTION
+	return should_fail(&artpec6_crypto_fail_dma_array_full, 1);
+#else
+	return false;
+#endif
+}
+
+/** artpec6_crypto_setup_out_descr_phys - Setup an out channel with a
+ *                                        physical address
+ *
+ * @addr: The physical address of the data buffer
+ * @len:  The length of the data buffer
+ * @eop:  True if this is the last buffer in the packet
+ *
+ * @return 0 on success or -ENOSPC if there are no more descriptors available
+ */
+static int
+artpec6_crypto_setup_out_descr_phys(struct artpec6_crypto_req_common *common,
+				    dma_addr_t addr, size_t len, bool eop)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	struct pdma_descr *d;
+
+	if (dma->out_cnt >= PDMA_DESCR_COUNT ||
+	    fault_inject_dma_descr()) {
+		pr_err("No free OUT DMA descriptors available!\n");
+		return -ENOSPC;
+	}
+
+	d = &dma->out[dma->out_cnt++];
+	memset(d, 0, sizeof(*d));
+
+	d->ctrl.short_descr = 0;
+	d->ctrl.eop = eop;
+	d->data.len = len;
+	d->data.buf = addr;
+	return 0;
+}
+
+/** artpec6_crypto_setup_out_descr_short - Setup a short out descriptor
+ *
+ * @dst: The virtual address of the data
+ * @len: The length of the data, must be between 1 to 7 bytes
+ * @eop: True if this is the last buffer in the packet
+ *
+ * @return 0 on success
+ *	-ENOSPC if no more descriptors are available
+ *	-EINVAL if the data length exceeds 7 bytes
+ */
+static int
+artpec6_crypto_setup_out_descr_short(struct artpec6_crypto_req_common *common,
+				     void *dst, unsigned int len, bool eop)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	struct pdma_descr *d;
+
+	if (dma->out_cnt >= PDMA_DESCR_COUNT ||
+	    fault_inject_dma_descr()) {
+		pr_err("No free OUT DMA descriptors available!\n");
+		return -ENOSPC;
+	} else if (len > 7 || len < 1) {
+		return -EINVAL;
+	}
+	d = &dma->out[dma->out_cnt++];
+	memset(d, 0, sizeof(*d));
+
+	d->ctrl.short_descr = 1;
+	d->ctrl.short_len = len;
+	d->ctrl.eop = eop;
+	memcpy(d->shrt.data, dst, len);
+	return 0;
+}
+
+static int artpec6_crypto_dma_map_page(struct artpec6_crypto_req_common *common,
+				      struct page *page, size_t offset,
+				      size_t size,
+				      enum dma_data_direction dir,
+				      dma_addr_t *dma_addr_out)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	struct device *dev = artpec6_crypto_dev;
+	struct artpec6_crypto_dma_map *map;
+	dma_addr_t dma_addr;
+
+	*dma_addr_out = 0;
+
+	if (dma->map_count >= ARRAY_SIZE(dma->maps))
+		return -ENOMEM;
+
+	dma_addr = dma_map_page(dev, page, offset, size, dir);
+	if (dma_mapping_error(dev, dma_addr))
+		return -ENOMEM;
+
+	map = &dma->maps[dma->map_count++];
+	map->size = size;
+	map->dma_addr = dma_addr;
+	map->dir = dir;
+
+	*dma_addr_out = dma_addr;
+
+	return 0;
+}
+
+static int
+artpec6_crypto_dma_map_single(struct artpec6_crypto_req_common *common,
+			      void *ptr, size_t size,
+			      enum dma_data_direction dir,
+			      dma_addr_t *dma_addr_out)
+{
+	struct page *page = virt_to_page(ptr);
+	size_t offset = (uintptr_t)ptr & ~PAGE_MASK;
+
+	return artpec6_crypto_dma_map_page(common, page, offset, size, dir,
+					  dma_addr_out);
+}
+
+static int
+artpec6_crypto_dma_map_descs(struct artpec6_crypto_req_common *common)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	int ret;
+
+	ret = artpec6_crypto_dma_map_single(common, dma->in,
+				sizeof(dma->in[0]) * dma->in_cnt,
+				DMA_TO_DEVICE, &dma->in_dma_addr);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_dma_map_single(common, dma->out,
+				sizeof(dma->out[0]) * dma->out_cnt,
+				DMA_TO_DEVICE, &dma->out_dma_addr);
+	if (ret)
+		return ret;
+
+	/* We only read one stat descriptor */
+	dma->stat[dma->in_cnt - 1] = 0;
+
+	/*
+	 * DMA_BIDIRECTIONAL since we need our zeroing of the stat descriptor
+	 * to be written.
+	 */
+	return artpec6_crypto_dma_map_single(common,
+				dma->stat + dma->in_cnt - 1,
+				sizeof(dma->stat[0]),
+				DMA_BIDIRECTIONAL,
+				&dma->stat_dma_addr);
+}
+
+static void
+artpec6_crypto_dma_unmap_all(struct artpec6_crypto_req_common *common)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	struct device *dev = artpec6_crypto_dev;
+	int i;
+
+	for (i = 0; i < dma->map_count; i++) {
+		struct artpec6_crypto_dma_map *map = &dma->maps[i];
+
+		dma_unmap_page(dev, map->dma_addr, map->size, map->dir);
+	}
+
+	dma->map_count = 0;
+}
+
+/** artpec6_crypto_setup_out_descr - Setup an out descriptor
+ *
+ * @dst: The virtual address of the data
+ * @len: The length of the data
+ * @eop: True if this is the last buffer in the packet
+ * @use_short: If this is true and the data length is 7 bytes or less then
+ *	a short descriptor will be used
+ *
+ * @return 0 on success
+ *	Any errors from artpec6_crypto_setup_out_descr_short() or
+ *	setup_out_descr_phys()
+ */
+static int
+artpec6_crypto_setup_out_descr(struct artpec6_crypto_req_common *common,
+			       void *dst, unsigned int len, bool eop,
+			       bool use_short)
+{
+	if (use_short && len < 7) {
+		return artpec6_crypto_setup_out_descr_short(common, dst, len,
+							    eop);
+	} else {
+		int ret;
+		dma_addr_t dma_addr;
+
+		ret = artpec6_crypto_dma_map_single(common, dst, len,
+						   DMA_TO_DEVICE,
+						   &dma_addr);
+		if (ret)
+			return ret;
+
+		return artpec6_crypto_setup_out_descr_phys(common, dma_addr,
+							   len, eop);
+	}
+}
+
+/** artpec6_crypto_setup_in_descr_phys - Setup an in channel with a
+ *                                       physical address
+ *
+ * @addr: The physical address of the data buffer
+ * @len:  The length of the data buffer
+ * @intr: True if an interrupt should be fired after HW processing of this
+ *	  descriptor
+ *
+ */
+static int
+artpec6_crypto_setup_in_descr_phys(struct artpec6_crypto_req_common *common,
+			       dma_addr_t addr, unsigned int len, bool intr)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	struct pdma_descr *d;
+
+	if (dma->in_cnt >= PDMA_DESCR_COUNT ||
+	    fault_inject_dma_descr()) {
+		pr_err("No free IN DMA descriptors available!\n");
+		return -ENOSPC;
+	}
+	d = &dma->in[dma->in_cnt++];
+	memset(d, 0, sizeof(*d));
+
+	d->ctrl.intr = intr;
+	d->data.len = len;
+	d->data.buf = addr;
+	return 0;
+}
+
+/** artpec6_crypto_setup_in_descr - Setup an in channel descriptor
+ *
+ * @buffer: The virtual address to of the data buffer
+ * @len:    The length of the data buffer
+ * @last:   If this is the last data buffer in the request (i.e. an interrupt
+ *	    is needed
+ *
+ * Short descriptors are not used for the in channel
+ */
+static int
+artpec6_crypto_setup_in_descr(struct artpec6_crypto_req_common *common,
+			  void *buffer, unsigned int len, bool last)
+{
+	dma_addr_t dma_addr;
+	int ret;
+
+	ret = artpec6_crypto_dma_map_single(common, buffer, len,
+					   DMA_FROM_DEVICE, &dma_addr);
+	if (ret)
+		return ret;
+
+	return artpec6_crypto_setup_in_descr_phys(common, dma_addr, len, last);
+}
+
+static struct artpec6_crypto_bounce_buffer *
+artpec6_crypto_alloc_bounce(gfp_t flags)
+{
+	void *base;
+	size_t alloc_size = sizeof(struct artpec6_crypto_bounce_buffer) +
+			    2 * ARTPEC_CACHE_LINE_MAX;
+	struct artpec6_crypto_bounce_buffer *bbuf = kzalloc(alloc_size, flags);
+
+	if (!bbuf)
+		return NULL;
+
+	base = bbuf + 1;
+	bbuf->buf = PTR_ALIGN(base, ARTPEC_CACHE_LINE_MAX);
+	return bbuf;
+}
+
+static int setup_bounce_buffer_in(struct artpec6_crypto_req_common *common,
+				  struct artpec6_crypto_walk *walk, size_t size)
+{
+	struct artpec6_crypto_bounce_buffer *bbuf;
+	int ret;
+
+	bbuf = artpec6_crypto_alloc_bounce(common->gfp_flags);
+	if (!bbuf)
+		return -ENOMEM;
+
+	bbuf->length = size;
+	bbuf->sg = walk->sg;
+	bbuf->offset = walk->offset;
+
+	ret =  artpec6_crypto_setup_in_descr(common, bbuf->buf, size, false);
+	if (ret) {
+		kfree(bbuf);
+		return ret;
+	}
+
+	pr_debug("BOUNCE %zu offset %zu\n", size, walk->offset);
+	list_add_tail(&bbuf->list, &common->dma->bounce_buffers);
+	return 0;
+}
+
+static int
+artpec6_crypto_setup_sg_descrs_in(struct artpec6_crypto_req_common *common,
+				  struct artpec6_crypto_walk *walk,
+				  size_t count)
+{
+	size_t chunk;
+	int ret;
+	dma_addr_t addr;
+
+	while (walk->sg && count) {
+		chunk = min(count, artpec6_crypto_walk_chunklen(walk));
+		addr = artpec6_crypto_walk_chunk_phys(walk);
+
+		/* When destination buffers are not aligned to the cache line
+		 * size we need bounce buffers. The DMA-API requires that the
+		 * entire line is owned by the DMA buffer and this holds also
+		 * for the case when coherent DMA is used.
+		 */
+		if (!IS_ALIGNED(addr, ARTPEC_CACHE_LINE_MAX)) {
+			chunk = min_t(dma_addr_t, chunk,
+				      ALIGN(addr, ARTPEC_CACHE_LINE_MAX) -
+				      addr);
+
+			pr_debug("CHUNK-b %pad:%zu\n", &addr, chunk);
+			ret = setup_bounce_buffer_in(common, walk, chunk);
+		} else if (chunk < ARTPEC_CACHE_LINE_MAX) {
+			pr_debug("CHUNK-b %pad:%zu\n", &addr, chunk);
+			ret = setup_bounce_buffer_in(common, walk, chunk);
+		} else {
+			dma_addr_t dma_addr;
+
+			chunk = chunk & ~(ARTPEC_CACHE_LINE_MAX-1);
+
+			pr_debug("CHUNK %pad:%zu\n", &addr, chunk);
+
+			ret = artpec6_crypto_dma_map_page(common,
+							 sg_page(walk->sg),
+							 walk->sg->offset +
+							 walk->offset,
+							 chunk,
+							 DMA_FROM_DEVICE,
+							 &dma_addr);
+			if (ret)
+				return ret;
+
+			ret = artpec6_crypto_setup_in_descr_phys(common,
+								 dma_addr,
+								 chunk, false);
+		}
+
+		if (ret)
+			return ret;
+
+		count = count - chunk;
+		artpec6_crypto_walk_advance(walk, chunk);
+	}
+
+	if (count)
+		pr_err("EOL unexpected %zu bytes left\n", count);
+
+	return count ? -EINVAL : 0;
+}
+
+static int
+artpec6_crypto_setup_sg_descrs_out(struct artpec6_crypto_req_common *common,
+				   struct artpec6_crypto_walk *walk,
+				   size_t count)
+{
+	size_t chunk;
+	int ret;
+	dma_addr_t addr;
+
+	while (walk->sg && count) {
+		chunk = min(count, artpec6_crypto_walk_chunklen(walk));
+		addr = artpec6_crypto_walk_chunk_phys(walk);
+
+		pr_debug("OUT-CHUNK %pad:%zu\n", &addr, chunk);
+
+		if (addr & 3) {
+			char buf[3];
+
+			chunk = min_t(size_t, chunk, (4-(addr&3)));
+
+			sg_pcopy_to_buffer(walk->sg, 1, buf, chunk,
+					   walk->offset);
+
+			ret = artpec6_crypto_setup_out_descr_short(common, buf,
+								   chunk,
+								   false);
+		} else {
+			dma_addr_t dma_addr;
+
+			ret = artpec6_crypto_dma_map_page(common,
+							 sg_page(walk->sg),
+							 walk->sg->offset +
+							 walk->offset,
+							 chunk,
+							 DMA_TO_DEVICE,
+							 &dma_addr);
+			if (ret)
+				return ret;
+
+			ret = artpec6_crypto_setup_out_descr_phys(common,
+								 dma_addr,
+								 chunk, false);
+		}
+
+		if (ret)
+			return ret;
+
+		count = count - chunk;
+		artpec6_crypto_walk_advance(walk, chunk);
+	}
+
+	if (count)
+		pr_err("EOL unexpected %zu bytes left\n", count);
+
+	return count ? -EINVAL : 0;
+}
+
+
+/** artpec6_crypto_terminate_out_descrs - Set the EOP on the last out descriptor
+ *
+ * If the out descriptor list is non-empty, then the eop flag on the
+ * last used out descriptor will be set.
+ *
+ * @return  0 on success
+ *	-EINVAL if the out descriptor is empty or has overflown
+ */
+static int
+artpec6_crypto_terminate_out_descrs(struct artpec6_crypto_req_common *common)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	struct pdma_descr *d;
+
+	if (!dma->out_cnt || dma->out_cnt > PDMA_DESCR_COUNT) {
+		pr_err("%s: OUT descriptor list is %s\n",
+			MODULE_NAME, dma->out_cnt ? "empty" : "full");
+		return -EINVAL;
+
+	}
+
+	d = &dma->out[dma->out_cnt-1];
+	d->ctrl.eop = 1;
+
+	return 0;
+}
+
+/** artpec6_crypto_terminate_in_descrs - Set the interrupt flag on the last
+ *                                       in descriptor
+ *
+ * See artpec6_crypto_terminate_out_descrs() for return values
+ */
+static int
+artpec6_crypto_terminate_in_descrs(struct artpec6_crypto_req_common *common)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	struct pdma_descr *d;
+
+	if (!dma->in_cnt || dma->in_cnt > PDMA_DESCR_COUNT) {
+		pr_err("%s: IN descriptor list is %s\n",
+			MODULE_NAME, dma->in_cnt ? "empty" : "full");
+		return -EINVAL;
+	}
+
+	d = &dma->in[dma->in_cnt-1];
+	d->ctrl.intr = 1;
+	return 0;
+}
+
+/** create_hash_pad - Create a Secure Hash conformant pad
+ *
+ * @dst:      The destination buffer to write the pad. Must be at least 64 bytes
+ * @dgstlen:  The total length of the hash digest in bytes
+ * @bitcount: The total length of the digest in bits
+ *
+ * @return The total number of padding bytes written to @dst
+ */
+static size_t
+create_hash_pad(int oper, unsigned char *dst, u64 dgstlen, u64 bitcount)
+{
+	unsigned int mod, target, diff, pad_bytes, size_bytes;
+	__be64 bits = __cpu_to_be64(bitcount);
+
+	switch (oper) {
+	case regk_crypto_sha1:
+	case regk_crypto_sha256:
+	case regk_crypto_hmac_sha1:
+	case regk_crypto_hmac_sha256:
+		target = 448 / 8;
+		mod = 512 / 8;
+		size_bytes = 8;
+		break;
+	default:
+		target = 896 / 8;
+		mod = 1024 / 8;
+		size_bytes = 16;
+		break;
+	}
+
+	target -= 1;
+	diff = dgstlen & (mod - 1);
+	pad_bytes = diff > target ? target + mod - diff : target - diff;
+
+	memset(dst + 1, 0, pad_bytes);
+	dst[0] = 0x80;
+
+	if (size_bytes == 16) {
+		memset(dst + 1 + pad_bytes, 0, 8);
+		memcpy(dst + 1 + pad_bytes + 8, &bits, 8);
+	} else {
+		memcpy(dst + 1 + pad_bytes, &bits, 8);
+	}
+
+	return pad_bytes + size_bytes + 1;
+}
+
+static int artpec6_crypto_common_init(struct artpec6_crypto_req_common *common,
+		struct crypto_async_request *parent,
+		void (*complete)(struct crypto_async_request *req),
+		struct scatterlist *dstsg, unsigned int nbytes)
+{
+	gfp_t flags;
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+
+	flags = (parent->flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		 GFP_KERNEL : GFP_ATOMIC;
+
+	common->gfp_flags = flags;
+	common->dma = kmem_cache_alloc(ac->dma_cache, flags);
+	if (!common->dma)
+		return -ENOMEM;
+
+	common->req = parent;
+	common->complete = complete;
+	return 0;
+}
+
+static void
+artpec6_crypto_bounce_destroy(struct artpec6_crypto_dma_descriptors *dma)
+{
+	struct artpec6_crypto_bounce_buffer *b;
+	struct artpec6_crypto_bounce_buffer *next;
+
+	list_for_each_entry_safe(b, next, &dma->bounce_buffers, list) {
+		kfree(b);
+	}
+}
+
+static int
+artpec6_crypto_common_destroy(struct artpec6_crypto_req_common *common)
+{
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+
+	artpec6_crypto_dma_unmap_all(common);
+	artpec6_crypto_bounce_destroy(common->dma);
+	kmem_cache_free(ac->dma_cache, common->dma);
+	common->dma = NULL;
+	return 0;
+}
+
+/*
+ * Ciphering functions.
+ */
+static int artpec6_crypto_encrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(cipher);
+	struct artpec6_crypto_request_context *req_ctx = NULL;
+	void (*complete)(struct crypto_async_request *req);
+	int ret;
+
+	req_ctx = skcipher_request_ctx(req);
+
+	switch (ctx->crypto_type) {
+	case ARTPEC6_CRYPTO_CIPHER_AES_CBC:
+	case ARTPEC6_CRYPTO_CIPHER_AES_ECB:
+	case ARTPEC6_CRYPTO_CIPHER_AES_XTS:
+		req_ctx->decrypt = 0;
+		break;
+	default:
+		break;
+	}
+
+	switch (ctx->crypto_type) {
+	case ARTPEC6_CRYPTO_CIPHER_AES_CBC:
+		complete = artpec6_crypto_complete_cbc_encrypt;
+		break;
+	default:
+		complete = artpec6_crypto_complete_crypto;
+		break;
+	}
+
+	ret = artpec6_crypto_common_init(&req_ctx->common,
+				  &req->base,
+				  complete,
+				  req->dst, req->cryptlen);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_prepare_crypto(req);
+	if (ret) {
+		artpec6_crypto_common_destroy(&req_ctx->common);
+		return ret;
+	}
+
+	return artpec6_crypto_submit(&req_ctx->common);
+}
+
+static int artpec6_crypto_decrypt(struct skcipher_request *req)
+{
+	int ret;
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(cipher);
+	struct artpec6_crypto_request_context *req_ctx = NULL;
+	void (*complete)(struct crypto_async_request *req);
+
+	req_ctx = skcipher_request_ctx(req);
+
+	switch (ctx->crypto_type) {
+	case ARTPEC6_CRYPTO_CIPHER_AES_CBC:
+	case ARTPEC6_CRYPTO_CIPHER_AES_ECB:
+	case ARTPEC6_CRYPTO_CIPHER_AES_XTS:
+		req_ctx->decrypt = 1;
+		break;
+	default:
+		break;
+	}
+
+
+	switch (ctx->crypto_type) {
+	case ARTPEC6_CRYPTO_CIPHER_AES_CBC:
+		complete = artpec6_crypto_complete_cbc_decrypt;
+		break;
+	default:
+		complete = artpec6_crypto_complete_crypto;
+		break;
+	}
+
+	ret = artpec6_crypto_common_init(&req_ctx->common, &req->base,
+				  complete,
+				  req->dst, req->cryptlen);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_prepare_crypto(req);
+	if (ret) {
+		artpec6_crypto_common_destroy(&req_ctx->common);
+		return ret;
+	}
+
+	return artpec6_crypto_submit(&req_ctx->common);
+}
+
+static int
+artpec6_crypto_ctr_crypt(struct skcipher_request *req, bool encrypt)
+{
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(cipher);
+	size_t iv_len = crypto_skcipher_ivsize(cipher);
+	unsigned int counter = be32_to_cpup((__be32 *)
+					    (req->iv + iv_len - 4));
+	unsigned int nblks = ALIGN(req->cryptlen, AES_BLOCK_SIZE) /
+			     AES_BLOCK_SIZE;
+
+	/*
+	 * The hardware uses only the last 32-bits as the counter while the
+	 * kernel tests (aes_ctr_enc_tv_template[4] for example) expect that
+	 * the whole IV is a counter.  So fallback if the counter is going to
+	 * overlow.
+	 */
+	if (counter + nblks < counter) {
+		int ret;
+
+		pr_debug("counter %x will overflow (nblks %u), falling back\n",
+			 counter, counter + nblks);
+
+		ret = crypto_skcipher_setkey(ctx->fallback, ctx->aes_key,
+					     ctx->key_length);
+		if (ret)
+			return ret;
+
+		{
+			SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
+
+			skcipher_request_set_tfm(subreq, ctx->fallback);
+			skcipher_request_set_callback(subreq, req->base.flags,
+						      NULL, NULL);
+			skcipher_request_set_crypt(subreq, req->src, req->dst,
+						   req->cryptlen, req->iv);
+			ret = encrypt ? crypto_skcipher_encrypt(subreq)
+				      : crypto_skcipher_decrypt(subreq);
+			skcipher_request_zero(subreq);
+		}
+		return ret;
+	}
+
+	return encrypt ? artpec6_crypto_encrypt(req)
+		       : artpec6_crypto_decrypt(req);
+}
+
+static int artpec6_crypto_ctr_encrypt(struct skcipher_request *req)
+{
+	return artpec6_crypto_ctr_crypt(req, true);
+}
+
+static int artpec6_crypto_ctr_decrypt(struct skcipher_request *req)
+{
+	return artpec6_crypto_ctr_crypt(req, false);
+}
+
+/*
+ * AEAD functions
+ */
+static int artpec6_crypto_aead_init(struct crypto_aead *tfm)
+{
+	struct artpec6_cryptotfm_context *tfm_ctx = crypto_aead_ctx(tfm);
+
+	memset(tfm_ctx, 0, sizeof(*tfm_ctx));
+
+	crypto_aead_set_reqsize(tfm,
+				sizeof(struct artpec6_crypto_aead_req_ctx));
+
+	return 0;
+}
+
+static int artpec6_crypto_aead_set_key(struct crypto_aead *tfm, const u8 *key,
+			       unsigned int len)
+{
+	struct artpec6_cryptotfm_context *ctx = crypto_tfm_ctx(&tfm->base);
+
+	if (len != 16 && len != 24 && len != 32) {
+		crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -1;
+	}
+
+	ctx->key_length = len;
+
+	memcpy(ctx->aes_key, key, len);
+	return 0;
+}
+
+static int artpec6_crypto_aead_encrypt(struct aead_request *req)
+{
+	int ret;
+	struct artpec6_crypto_aead_req_ctx *req_ctx = aead_request_ctx(req);
+
+	req_ctx->decrypt = false;
+	ret = artpec6_crypto_common_init(&req_ctx->common, &req->base,
+				  artpec6_crypto_complete_aead,
+				  NULL, 0);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_prepare_aead(req);
+	if (ret) {
+		artpec6_crypto_common_destroy(&req_ctx->common);
+		return ret;
+	}
+
+	return artpec6_crypto_submit(&req_ctx->common);
+}
+
+static int artpec6_crypto_aead_decrypt(struct aead_request *req)
+{
+	int ret;
+	struct artpec6_crypto_aead_req_ctx *req_ctx = aead_request_ctx(req);
+
+	req_ctx->decrypt = true;
+	if (req->cryptlen < AES_BLOCK_SIZE)
+		return -EINVAL;
+
+	ret = artpec6_crypto_common_init(&req_ctx->common,
+				  &req->base,
+				  artpec6_crypto_complete_aead,
+				  NULL, 0);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_prepare_aead(req);
+	if (ret) {
+		artpec6_crypto_common_destroy(&req_ctx->common);
+		return ret;
+	}
+
+	return artpec6_crypto_submit(&req_ctx->common);
+}
+
+static int artpec6_crypto_prepare_hash(struct ahash_request *areq)
+{
+	struct artpec6_hashalg_context *ctx = crypto_tfm_ctx(areq->base.tfm);
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(areq);
+	size_t digestsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(areq));
+	size_t contextsize = digestsize == SHA384_DIGEST_SIZE ?
+		SHA512_DIGEST_SIZE : digestsize;
+	size_t blocksize = crypto_tfm_alg_blocksize(
+		crypto_ahash_tfm(crypto_ahash_reqtfm(areq)));
+	struct artpec6_crypto_req_common *common = &req_ctx->common;
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	enum artpec6_crypto_variant variant = ac->variant;
+	u32 sel_ctx;
+	bool ext_ctx = false;
+	bool run_hw = false;
+	int error = 0;
+
+	artpec6_crypto_init_dma_operation(common);
+
+	/* Upload HMAC key, must be first the first packet */
+	if (req_ctx->hash_flags & HASH_FLAG_HMAC) {
+		if (variant == ARTPEC6_CRYPTO) {
+			req_ctx->key_md = FIELD_PREP(A6_CRY_MD_OPER,
+						     a6_regk_crypto_dlkey);
+		} else {
+			req_ctx->key_md = FIELD_PREP(A7_CRY_MD_OPER,
+						     a7_regk_crypto_dlkey);
+		}
+
+		/* Copy and pad up the key */
+		memcpy(req_ctx->key_buffer, ctx->hmac_key,
+		       ctx->hmac_key_length);
+		memset(req_ctx->key_buffer + ctx->hmac_key_length, 0,
+		       blocksize - ctx->hmac_key_length);
+
+		error = artpec6_crypto_setup_out_descr(common,
+					(void *)&req_ctx->key_md,
+					sizeof(req_ctx->key_md), false, false);
+		if (error)
+			return error;
+
+		error = artpec6_crypto_setup_out_descr(common,
+					req_ctx->key_buffer, blocksize,
+					true, false);
+		if (error)
+			return error;
+	}
+
+	if (!(req_ctx->hash_flags & HASH_FLAG_INIT_CTX)) {
+		/* Restore context */
+		sel_ctx = regk_crypto_ext;
+		ext_ctx = true;
+	} else {
+		sel_ctx = regk_crypto_init;
+	}
+
+	if (variant == ARTPEC6_CRYPTO) {
+		req_ctx->hash_md &= ~A6_CRY_MD_HASH_SEL_CTX;
+		req_ctx->hash_md |= FIELD_PREP(A6_CRY_MD_HASH_SEL_CTX, sel_ctx);
+
+		/* If this is the final round, set the final flag */
+		if (req_ctx->hash_flags & HASH_FLAG_FINALIZE)
+			req_ctx->hash_md |= A6_CRY_MD_HASH_HMAC_FIN;
+	} else {
+		req_ctx->hash_md &= ~A7_CRY_MD_HASH_SEL_CTX;
+		req_ctx->hash_md |= FIELD_PREP(A7_CRY_MD_HASH_SEL_CTX, sel_ctx);
+
+		/* If this is the final round, set the final flag */
+		if (req_ctx->hash_flags & HASH_FLAG_FINALIZE)
+			req_ctx->hash_md |= A7_CRY_MD_HASH_HMAC_FIN;
+	}
+
+	/* Setup up metadata descriptors */
+	error = artpec6_crypto_setup_out_descr(common,
+				(void *)&req_ctx->hash_md,
+				sizeof(req_ctx->hash_md), false, false);
+	if (error)
+		return error;
+
+	error = artpec6_crypto_setup_in_descr(common, ac->pad_buffer, 4, false);
+	if (error)
+		return error;
+
+	if (ext_ctx) {
+		error = artpec6_crypto_setup_out_descr(common,
+					req_ctx->digeststate,
+					contextsize, false, false);
+
+		if (error)
+			return error;
+	}
+
+	if (req_ctx->hash_flags & HASH_FLAG_UPDATE) {
+		size_t done_bytes = 0;
+		size_t total_bytes = areq->nbytes + req_ctx->partial_bytes;
+		size_t ready_bytes = round_down(total_bytes, blocksize);
+		struct artpec6_crypto_walk walk;
+
+		run_hw = ready_bytes > 0;
+		if (req_ctx->partial_bytes && ready_bytes) {
+			/* We have a partial buffer and will at least some bytes
+			 * to the HW. Empty this partial buffer before tackling
+			 * the SG lists
+			 */
+			memcpy(req_ctx->partial_buffer_out,
+				req_ctx->partial_buffer,
+				req_ctx->partial_bytes);
+
+			error = artpec6_crypto_setup_out_descr(common,
+						req_ctx->partial_buffer_out,
+						req_ctx->partial_bytes,
+						false, true);
+			if (error)
+				return error;
+
+			/* Reset partial buffer */
+			done_bytes += req_ctx->partial_bytes;
+			req_ctx->partial_bytes = 0;
+		}
+
+		artpec6_crypto_walk_init(&walk, areq->src);
+
+		error = artpec6_crypto_setup_sg_descrs_out(common, &walk,
+							   ready_bytes -
+							   done_bytes);
+		if (error)
+			return error;
+
+		if (walk.sg) {
+			size_t sg_skip = ready_bytes - done_bytes;
+			size_t sg_rem = areq->nbytes - sg_skip;
+
+			sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
+					   req_ctx->partial_buffer +
+					   req_ctx->partial_bytes,
+					   sg_rem, sg_skip);
+
+			req_ctx->partial_bytes += sg_rem;
+		}
+
+		req_ctx->digcnt += ready_bytes;
+		req_ctx->hash_flags &= ~(HASH_FLAG_UPDATE);
+	}
+
+	/* Finalize */
+	if (req_ctx->hash_flags & HASH_FLAG_FINALIZE) {
+		bool needtrim = contextsize != digestsize;
+		size_t hash_pad_len;
+		u64 digest_bits;
+		u32 oper;
+
+		if (variant == ARTPEC6_CRYPTO)
+			oper = FIELD_GET(A6_CRY_MD_OPER, req_ctx->hash_md);
+		else
+			oper = FIELD_GET(A7_CRY_MD_OPER, req_ctx->hash_md);
+
+		/* Write out the partial buffer if present */
+		if (req_ctx->partial_bytes) {
+			memcpy(req_ctx->partial_buffer_out,
+			       req_ctx->partial_buffer,
+			       req_ctx->partial_bytes);
+			error = artpec6_crypto_setup_out_descr(common,
+						req_ctx->partial_buffer_out,
+						req_ctx->partial_bytes,
+						false, true);
+			if (error)
+				return error;
+
+			req_ctx->digcnt += req_ctx->partial_bytes;
+			req_ctx->partial_bytes = 0;
+		}
+
+		if (req_ctx->hash_flags & HASH_FLAG_HMAC)
+			digest_bits = 8 * (req_ctx->digcnt + blocksize);
+		else
+			digest_bits = 8 * req_ctx->digcnt;
+
+		/* Add the hash pad */
+		hash_pad_len = create_hash_pad(oper, req_ctx->pad_buffer,
+					       req_ctx->digcnt, digest_bits);
+		error = artpec6_crypto_setup_out_descr(common,
+						      req_ctx->pad_buffer,
+						      hash_pad_len, false,
+						      true);
+		req_ctx->digcnt = 0;
+
+		if (error)
+			return error;
+
+		/* Descriptor for the final result */
+		error = artpec6_crypto_setup_in_descr(common, areq->result,
+						      digestsize,
+						      !needtrim);
+		if (error)
+			return error;
+
+		if (needtrim) {
+			/* Discard the extra context bytes for SHA-384 */
+			error = artpec6_crypto_setup_in_descr(common,
+					req_ctx->partial_buffer,
+					digestsize - contextsize, true);
+			if (error)
+				return error;
+		}
+
+	} else { /* This is not the final operation for this request */
+		if (!run_hw)
+			return ARTPEC6_CRYPTO_PREPARE_HASH_NO_START;
+
+		/* Save the result to the context */
+		error = artpec6_crypto_setup_in_descr(common,
+						      req_ctx->digeststate,
+						      contextsize, false);
+		if (error)
+			return error;
+		/* fall through */
+	}
+
+	req_ctx->hash_flags &= ~(HASH_FLAG_INIT_CTX | HASH_FLAG_UPDATE |
+				 HASH_FLAG_FINALIZE);
+
+	error = artpec6_crypto_terminate_in_descrs(common);
+	if (error)
+		return error;
+
+	error = artpec6_crypto_terminate_out_descrs(common);
+	if (error)
+		return error;
+
+	error = artpec6_crypto_dma_map_descs(common);
+	if (error)
+		return error;
+
+	return ARTPEC6_CRYPTO_PREPARE_HASH_START;
+}
+
+
+static int artpec6_crypto_aes_ecb_init(struct crypto_skcipher *tfm)
+{
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm);
+
+	tfm->reqsize = sizeof(struct artpec6_crypto_request_context);
+	ctx->crypto_type = ARTPEC6_CRYPTO_CIPHER_AES_ECB;
+
+	return 0;
+}
+
+static int artpec6_crypto_aes_ctr_init(struct crypto_skcipher *tfm)
+{
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm);
+
+	ctx->fallback = crypto_alloc_skcipher(crypto_tfm_alg_name(&tfm->base),
+					      0,
+					      CRYPTO_ALG_ASYNC |
+					      CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(ctx->fallback))
+		return PTR_ERR(ctx->fallback);
+
+	tfm->reqsize = sizeof(struct artpec6_crypto_request_context);
+	ctx->crypto_type = ARTPEC6_CRYPTO_CIPHER_AES_CTR;
+
+	return 0;
+}
+
+static int artpec6_crypto_aes_cbc_init(struct crypto_skcipher *tfm)
+{
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm);
+
+	tfm->reqsize = sizeof(struct artpec6_crypto_request_context);
+	ctx->crypto_type = ARTPEC6_CRYPTO_CIPHER_AES_CBC;
+
+	return 0;
+}
+
+static int artpec6_crypto_aes_xts_init(struct crypto_skcipher *tfm)
+{
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm);
+
+	tfm->reqsize = sizeof(struct artpec6_crypto_request_context);
+	ctx->crypto_type = ARTPEC6_CRYPTO_CIPHER_AES_XTS;
+
+	return 0;
+}
+
+static void artpec6_crypto_aes_exit(struct crypto_skcipher *tfm)
+{
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm);
+
+	memset(ctx, 0, sizeof(*ctx));
+}
+
+static void artpec6_crypto_aes_ctr_exit(struct crypto_skcipher *tfm)
+{
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_free_skcipher(ctx->fallback);
+	artpec6_crypto_aes_exit(tfm);
+}
+
+static int
+artpec6_crypto_cipher_set_key(struct crypto_skcipher *cipher, const u8 *key,
+			      unsigned int keylen)
+{
+	struct artpec6_cryptotfm_context *ctx =
+		crypto_skcipher_ctx(cipher);
+
+	switch (keylen) {
+	case 16:
+	case 24:
+	case 32:
+		break;
+	default:
+		crypto_skcipher_set_flags(cipher,
+					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->aes_key, key, keylen);
+	ctx->key_length = keylen;
+	return 0;
+}
+
+static int
+artpec6_crypto_xts_set_key(struct crypto_skcipher *cipher, const u8 *key,
+			      unsigned int keylen)
+{
+	struct artpec6_cryptotfm_context *ctx =
+		crypto_skcipher_ctx(cipher);
+	int ret;
+
+	ret = xts_check_key(&cipher->base, key, keylen);
+	if (ret)
+		return ret;
+
+	switch (keylen) {
+	case 32:
+	case 48:
+	case 64:
+		break;
+	default:
+		crypto_skcipher_set_flags(cipher,
+					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->aes_key, key, keylen);
+	ctx->key_length = keylen;
+	return 0;
+}
+
+/** artpec6_crypto_process_crypto - Prepare an async block cipher crypto request
+ *
+ * @req: The asynch request to process
+ *
+ * @return 0 if the dma job was successfully prepared
+ *	  <0 on error
+ *
+ * This function sets up the PDMA descriptors for a block cipher request.
+ *
+ * The required padding is added for AES-CTR using a statically defined
+ * buffer.
+ *
+ * The PDMA descriptor list will be as follows:
+ *
+ * OUT: [KEY_MD][KEY][EOP]<CIPHER_MD>[IV]<data_0>...[data_n][AES-CTR_pad]<eop>
+ * IN:  <CIPHER_MD><data_0>...[data_n]<intr>
+ *
+ */
+static int artpec6_crypto_prepare_crypto(struct skcipher_request *areq)
+{
+	int ret;
+	struct artpec6_crypto_walk walk;
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq);
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(cipher);
+	struct artpec6_crypto_request_context *req_ctx = NULL;
+	size_t iv_len = crypto_skcipher_ivsize(cipher);
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	enum artpec6_crypto_variant variant = ac->variant;
+	struct artpec6_crypto_req_common *common;
+	bool cipher_decr = false;
+	size_t cipher_klen;
+	u32 cipher_len = 0; /* Same as regk_crypto_key_128 for NULL crypto */
+	u32 oper;
+
+	req_ctx = skcipher_request_ctx(areq);
+	common = &req_ctx->common;
+
+	artpec6_crypto_init_dma_operation(common);
+
+	if (variant == ARTPEC6_CRYPTO)
+		ctx->key_md = FIELD_PREP(A6_CRY_MD_OPER, a6_regk_crypto_dlkey);
+	else
+		ctx->key_md = FIELD_PREP(A7_CRY_MD_OPER, a7_regk_crypto_dlkey);
+
+	ret = artpec6_crypto_setup_out_descr(common, (void *)&ctx->key_md,
+					     sizeof(ctx->key_md), false, false);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_setup_out_descr(common, ctx->aes_key,
+					      ctx->key_length, true, false);
+	if (ret)
+		return ret;
+
+	req_ctx->cipher_md = 0;
+
+	if (ctx->crypto_type == ARTPEC6_CRYPTO_CIPHER_AES_XTS)
+		cipher_klen = ctx->key_length/2;
+	else
+		cipher_klen =  ctx->key_length;
+
+	/* Metadata */
+	switch (cipher_klen) {
+	case 16:
+		cipher_len = regk_crypto_key_128;
+		break;
+	case 24:
+		cipher_len = regk_crypto_key_192;
+		break;
+	case 32:
+		cipher_len = regk_crypto_key_256;
+		break;
+	default:
+		pr_err("%s: Invalid key length %d!\n",
+			MODULE_NAME, ctx->key_length);
+		return -EINVAL;
+	}
+
+	switch (ctx->crypto_type) {
+	case ARTPEC6_CRYPTO_CIPHER_AES_ECB:
+		oper = regk_crypto_aes_ecb;
+		cipher_decr = req_ctx->decrypt;
+		break;
+
+	case ARTPEC6_CRYPTO_CIPHER_AES_CBC:
+		oper = regk_crypto_aes_cbc;
+		cipher_decr = req_ctx->decrypt;
+		break;
+
+	case ARTPEC6_CRYPTO_CIPHER_AES_CTR:
+		oper = regk_crypto_aes_ctr;
+		cipher_decr = false;
+		break;
+
+	case ARTPEC6_CRYPTO_CIPHER_AES_XTS:
+		oper = regk_crypto_aes_xts;
+		cipher_decr = req_ctx->decrypt;
+
+		if (variant == ARTPEC6_CRYPTO)
+			req_ctx->cipher_md |= A6_CRY_MD_CIPHER_DSEQ;
+		else
+			req_ctx->cipher_md |= A7_CRY_MD_CIPHER_DSEQ;
+		break;
+
+	default:
+		pr_err("%s: Invalid cipher mode %d!\n",
+			MODULE_NAME, ctx->crypto_type);
+		return -EINVAL;
+	}
+
+	if (variant == ARTPEC6_CRYPTO) {
+		req_ctx->cipher_md |= FIELD_PREP(A6_CRY_MD_OPER, oper);
+		req_ctx->cipher_md |= FIELD_PREP(A6_CRY_MD_CIPHER_LEN,
+						 cipher_len);
+		if (cipher_decr)
+			req_ctx->cipher_md |= A6_CRY_MD_CIPHER_DECR;
+	} else {
+		req_ctx->cipher_md |= FIELD_PREP(A7_CRY_MD_OPER, oper);
+		req_ctx->cipher_md |= FIELD_PREP(A7_CRY_MD_CIPHER_LEN,
+						 cipher_len);
+		if (cipher_decr)
+			req_ctx->cipher_md |= A7_CRY_MD_CIPHER_DECR;
+	}
+
+	ret = artpec6_crypto_setup_out_descr(common,
+					    &req_ctx->cipher_md,
+					    sizeof(req_ctx->cipher_md),
+					    false, false);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_setup_in_descr(common, ac->pad_buffer, 4, false);
+	if (ret)
+		return ret;
+
+	if (iv_len) {
+		ret = artpec6_crypto_setup_out_descr(common, areq->iv, iv_len,
+						     false, false);
+		if (ret)
+			return ret;
+	}
+	/* Data out */
+	artpec6_crypto_walk_init(&walk, areq->src);
+	ret = artpec6_crypto_setup_sg_descrs_out(common, &walk, areq->cryptlen);
+	if (ret)
+		return ret;
+
+	/* Data in */
+	artpec6_crypto_walk_init(&walk, areq->dst);
+	ret = artpec6_crypto_setup_sg_descrs_in(common, &walk, areq->cryptlen);
+	if (ret)
+		return ret;
+
+	/* CTR-mode padding required by the HW. */
+	if (ctx->crypto_type == ARTPEC6_CRYPTO_CIPHER_AES_CTR ||
+	    ctx->crypto_type == ARTPEC6_CRYPTO_CIPHER_AES_XTS) {
+		size_t pad = ALIGN(areq->cryptlen, AES_BLOCK_SIZE) -
+			     areq->cryptlen;
+
+		if (pad) {
+			ret = artpec6_crypto_setup_out_descr(common,
+							     ac->pad_buffer,
+							     pad, false, false);
+			if (ret)
+				return ret;
+
+			ret = artpec6_crypto_setup_in_descr(common,
+							    ac->pad_buffer, pad,
+							    false);
+			if (ret)
+				return ret;
+		}
+	}
+
+	ret = artpec6_crypto_terminate_out_descrs(common);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_terminate_in_descrs(common);
+	if (ret)
+		return ret;
+
+	return artpec6_crypto_dma_map_descs(common);
+}
+
+static int artpec6_crypto_prepare_aead(struct aead_request *areq)
+{
+	size_t count;
+	int ret;
+	size_t input_length;
+	struct artpec6_cryptotfm_context *ctx = crypto_tfm_ctx(areq->base.tfm);
+	struct artpec6_crypto_aead_req_ctx *req_ctx = aead_request_ctx(areq);
+	struct crypto_aead *cipher = crypto_aead_reqtfm(areq);
+	struct artpec6_crypto_req_common *common = &req_ctx->common;
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	enum artpec6_crypto_variant variant = ac->variant;
+	u32 md_cipher_len;
+
+	artpec6_crypto_init_dma_operation(common);
+
+	/* Key */
+	if (variant == ARTPEC6_CRYPTO) {
+		ctx->key_md = FIELD_PREP(A6_CRY_MD_OPER,
+					 a6_regk_crypto_dlkey);
+	} else {
+		ctx->key_md = FIELD_PREP(A7_CRY_MD_OPER,
+					 a7_regk_crypto_dlkey);
+	}
+	ret = artpec6_crypto_setup_out_descr(common, (void *)&ctx->key_md,
+					     sizeof(ctx->key_md), false, false);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_setup_out_descr(common, ctx->aes_key,
+					     ctx->key_length, true, false);
+	if (ret)
+		return ret;
+
+	req_ctx->cipher_md = 0;
+
+	switch (ctx->key_length) {
+	case 16:
+		md_cipher_len = regk_crypto_key_128;
+		break;
+	case 24:
+		md_cipher_len = regk_crypto_key_192;
+		break;
+	case 32:
+		md_cipher_len = regk_crypto_key_256;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (variant == ARTPEC6_CRYPTO) {
+		req_ctx->cipher_md |= FIELD_PREP(A6_CRY_MD_OPER,
+						 regk_crypto_aes_gcm);
+		req_ctx->cipher_md |= FIELD_PREP(A6_CRY_MD_CIPHER_LEN,
+						 md_cipher_len);
+		if (req_ctx->decrypt)
+			req_ctx->cipher_md |= A6_CRY_MD_CIPHER_DECR;
+	} else {
+		req_ctx->cipher_md |= FIELD_PREP(A7_CRY_MD_OPER,
+						 regk_crypto_aes_gcm);
+		req_ctx->cipher_md |= FIELD_PREP(A7_CRY_MD_CIPHER_LEN,
+						 md_cipher_len);
+		if (req_ctx->decrypt)
+			req_ctx->cipher_md |= A7_CRY_MD_CIPHER_DECR;
+	}
+
+	ret = artpec6_crypto_setup_out_descr(common,
+					    (void *) &req_ctx->cipher_md,
+					    sizeof(req_ctx->cipher_md), false,
+					    false);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_setup_in_descr(common, ac->pad_buffer, 4, false);
+	if (ret)
+		return ret;
+
+	/* For the decryption, cryptlen includes the tag. */
+	input_length = areq->cryptlen;
+	if (req_ctx->decrypt)
+		input_length -= AES_BLOCK_SIZE;
+
+	/* Prepare the context buffer */
+	req_ctx->hw_ctx.aad_length_bits =
+		__cpu_to_be64(8*areq->assoclen);
+
+	req_ctx->hw_ctx.text_length_bits =
+		__cpu_to_be64(8*input_length);
+
+	memcpy(req_ctx->hw_ctx.J0, areq->iv, crypto_aead_ivsize(cipher));
+	// The HW omits the initial increment of the counter field.
+	crypto_inc(req_ctx->hw_ctx.J0+12, 4);
+
+	ret = artpec6_crypto_setup_out_descr(common, &req_ctx->hw_ctx,
+		sizeof(struct artpec6_crypto_aead_hw_ctx), false, false);
+	if (ret)
+		return ret;
+
+	{
+		struct artpec6_crypto_walk walk;
+
+		artpec6_crypto_walk_init(&walk, areq->src);
+
+		/* Associated data */
+		count = areq->assoclen;
+		ret = artpec6_crypto_setup_sg_descrs_out(common, &walk, count);
+		if (ret)
+			return ret;
+
+		if (!IS_ALIGNED(areq->assoclen, 16)) {
+			size_t assoc_pad = 16 - (areq->assoclen % 16);
+			/* The HW mandates zero padding here */
+			ret = artpec6_crypto_setup_out_descr(common,
+							     ac->zero_buffer,
+							     assoc_pad, false,
+							     false);
+			if (ret)
+				return ret;
+		}
+
+		/* Data to crypto */
+		count = input_length;
+		ret = artpec6_crypto_setup_sg_descrs_out(common, &walk, count);
+		if (ret)
+			return ret;
+
+		if (!IS_ALIGNED(input_length, 16)) {
+			size_t crypto_pad = 16 - (input_length % 16);
+			/* The HW mandates zero padding here */
+			ret = artpec6_crypto_setup_out_descr(common,
+							     ac->zero_buffer,
+							     crypto_pad,
+							     false,
+							     false);
+			if (ret)
+				return ret;
+		}
+	}
+
+	/* Data from crypto */
+	{
+		struct artpec6_crypto_walk walk;
+		size_t output_len = areq->cryptlen;
+
+		if (req_ctx->decrypt)
+			output_len -= AES_BLOCK_SIZE;
+
+		artpec6_crypto_walk_init(&walk, areq->dst);
+
+		/* skip associated data in the output */
+		count = artpec6_crypto_walk_advance(&walk, areq->assoclen);
+		if (count)
+			return -EINVAL;
+
+		count = output_len;
+		ret = artpec6_crypto_setup_sg_descrs_in(common, &walk, count);
+		if (ret)
+			return ret;
+
+		/* Put padding between the cryptotext and the auth tag */
+		if (!IS_ALIGNED(output_len, 16)) {
+			size_t crypto_pad = 16 - (output_len % 16);
+
+			ret = artpec6_crypto_setup_in_descr(common,
+							    ac->pad_buffer,
+							    crypto_pad, false);
+			if (ret)
+				return ret;
+		}
+
+		/* The authentication tag shall follow immediately after
+		 * the output ciphertext. For decryption it is put in a context
+		 * buffer for later compare against the input tag.
+		 */
+		count = AES_BLOCK_SIZE;
+
+		if (req_ctx->decrypt) {
+			ret = artpec6_crypto_setup_in_descr(common,
+				req_ctx->decryption_tag, count, false);
+			if (ret)
+				return ret;
+
+		} else {
+			ret = artpec6_crypto_setup_sg_descrs_in(common, &walk,
+								count);
+			if (ret)
+				return ret;
+		}
+
+	}
+
+	ret = artpec6_crypto_terminate_in_descrs(common);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_terminate_out_descrs(common);
+	if (ret)
+		return ret;
+
+	return artpec6_crypto_dma_map_descs(common);
+}
+
+static void artpec6_crypto_process_queue(struct artpec6_crypto *ac)
+{
+	struct artpec6_crypto_req_common *req;
+
+	while (!list_empty(&ac->queue) && !artpec6_crypto_busy()) {
+		req = list_first_entry(&ac->queue,
+				       struct artpec6_crypto_req_common,
+				       list);
+		list_move_tail(&req->list, &ac->pending);
+		artpec6_crypto_start_dma(req);
+
+		req->req->complete(req->req, -EINPROGRESS);
+	}
+
+	/*
+	 * In some cases, the hardware can raise an in_eop_flush interrupt
+	 * before actually updating the status, so we have an timer which will
+	 * recheck the status on timeout.  Since the cases are expected to be
+	 * very rare, we use a relatively large timeout value.  There should be
+	 * no noticeable negative effect if we timeout spuriously.
+	 */
+	if (ac->pending_count)
+		mod_timer(&ac->timer, jiffies + msecs_to_jiffies(100));
+	else
+		del_timer(&ac->timer);
+}
+
+static void artpec6_crypto_timeout(unsigned long data)
+{
+	struct artpec6_crypto *ac = (struct artpec6_crypto *) data;
+
+	dev_info_ratelimited(artpec6_crypto_dev, "timeout\n");
+
+	tasklet_schedule(&ac->task);
+}
+
+static void artpec6_crypto_task(unsigned long data)
+{
+	struct artpec6_crypto *ac = (struct artpec6_crypto *)data;
+	struct artpec6_crypto_req_common *req;
+	struct artpec6_crypto_req_common *n;
+
+	if (list_empty(&ac->pending)) {
+		pr_debug("Spurious IRQ\n");
+		return;
+	}
+
+	spin_lock_bh(&ac->queue_lock);
+
+	list_for_each_entry_safe(req, n, &ac->pending, list) {
+		struct artpec6_crypto_dma_descriptors *dma = req->dma;
+		u32 stat;
+
+		dma_sync_single_for_cpu(artpec6_crypto_dev, dma->stat_dma_addr,
+					sizeof(dma->stat[0]),
+					DMA_BIDIRECTIONAL);
+
+		stat = req->dma->stat[req->dma->in_cnt-1];
+
+		/* A non-zero final status descriptor indicates
+		 * this job has finished.
+		 */
+		pr_debug("Request %p status is %X\n", req, stat);
+		if (!stat)
+			break;
+
+		/* Allow testing of timeout handling with fault injection */
+#ifdef CONFIG_FAULT_INJECTION
+		if (should_fail(&artpec6_crypto_fail_status_read, 1))
+			continue;
+#endif
+
+		pr_debug("Completing request %p\n", req);
+
+		list_del(&req->list);
+
+		artpec6_crypto_dma_unmap_all(req);
+		artpec6_crypto_copy_bounce_buffers(req);
+
+		ac->pending_count--;
+		artpec6_crypto_common_destroy(req);
+		req->complete(req->req);
+	}
+
+	artpec6_crypto_process_queue(ac);
+
+	spin_unlock_bh(&ac->queue_lock);
+}
+
+static void artpec6_crypto_complete_crypto(struct crypto_async_request *req)
+{
+	req->complete(req, 0);
+}
+
+static void
+artpec6_crypto_complete_cbc_decrypt(struct crypto_async_request *req)
+{
+	struct skcipher_request *cipher_req = container_of(req,
+		struct skcipher_request, base);
+
+	scatterwalk_map_and_copy(cipher_req->iv, cipher_req->src,
+				 cipher_req->cryptlen - AES_BLOCK_SIZE,
+				 AES_BLOCK_SIZE, 0);
+	req->complete(req, 0);
+}
+
+static void
+artpec6_crypto_complete_cbc_encrypt(struct crypto_async_request *req)
+{
+	struct skcipher_request *cipher_req = container_of(req,
+		struct skcipher_request, base);
+
+	scatterwalk_map_and_copy(cipher_req->iv, cipher_req->dst,
+				 cipher_req->cryptlen - AES_BLOCK_SIZE,
+				 AES_BLOCK_SIZE, 0);
+	req->complete(req, 0);
+}
+
+static void artpec6_crypto_complete_aead(struct crypto_async_request *req)
+{
+	int result = 0;
+
+	/* Verify GCM hashtag. */
+	struct aead_request *areq = container_of(req,
+		struct aead_request, base);
+	struct artpec6_crypto_aead_req_ctx *req_ctx = aead_request_ctx(areq);
+
+	if (req_ctx->decrypt) {
+		u8 input_tag[AES_BLOCK_SIZE];
+
+		sg_pcopy_to_buffer(areq->src,
+				   sg_nents(areq->src),
+				   input_tag,
+				   AES_BLOCK_SIZE,
+				   areq->assoclen + areq->cryptlen -
+				   AES_BLOCK_SIZE);
+
+		if (memcmp(req_ctx->decryption_tag,
+			   input_tag,
+			   AES_BLOCK_SIZE)) {
+			pr_debug("***EBADMSG:\n");
+			print_hex_dump_debug("ref:", DUMP_PREFIX_ADDRESS, 32, 1,
+					     input_tag, AES_BLOCK_SIZE, true);
+			print_hex_dump_debug("out:", DUMP_PREFIX_ADDRESS, 32, 1,
+					     req_ctx->decryption_tag,
+					     AES_BLOCK_SIZE, true);
+
+			result = -EBADMSG;
+		}
+	}
+
+	req->complete(req, result);
+}
+
+static void artpec6_crypto_complete_hash(struct crypto_async_request *req)
+{
+	req->complete(req, 0);
+}
+
+
+/*------------------- Hash functions -----------------------------------------*/
+static int
+artpec6_crypto_hash_set_key(struct crypto_ahash *tfm,
+		    const u8 *key, unsigned int keylen)
+{
+	struct artpec6_hashalg_context *tfm_ctx = crypto_tfm_ctx(&tfm->base);
+	size_t blocksize;
+	int ret;
+
+	if (!keylen) {
+		pr_err("Invalid length (%d) of HMAC key\n",
+			keylen);
+		return -EINVAL;
+	}
+
+	memset(tfm_ctx->hmac_key, 0, sizeof(tfm_ctx->hmac_key));
+
+	blocksize = crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
+
+	if (keylen > blocksize) {
+		SHASH_DESC_ON_STACK(hdesc, tfm_ctx->child_hash);
+
+		hdesc->tfm = tfm_ctx->child_hash;
+		hdesc->flags = crypto_ahash_get_flags(tfm) &
+			       CRYPTO_TFM_REQ_MAY_SLEEP;
+
+		tfm_ctx->hmac_key_length = blocksize;
+		ret = crypto_shash_digest(hdesc, key, keylen,
+					  tfm_ctx->hmac_key);
+		if (ret)
+			return ret;
+
+	} else {
+		memcpy(tfm_ctx->hmac_key, key, keylen);
+		tfm_ctx->hmac_key_length = keylen;
+	}
+
+	return 0;
+}
+
+static int
+artpec6_crypto_init_hash(struct ahash_request *req, u8 type, int hmac)
+{
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	enum artpec6_crypto_variant variant = ac->variant;
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+	u32 oper;
+
+	memset(req_ctx, 0, sizeof(*req_ctx));
+
+	req_ctx->hash_flags = HASH_FLAG_INIT_CTX;
+	if (hmac)
+		req_ctx->hash_flags |= (HASH_FLAG_HMAC | HASH_FLAG_UPDATE_KEY);
+
+	switch (type) {
+	case ARTPEC6_CRYPTO_HASH_SHA1:
+		oper = hmac ? regk_crypto_hmac_sha1 : regk_crypto_sha1;
+		break;
+	case ARTPEC6_CRYPTO_HASH_SHA256:
+		oper = hmac ? regk_crypto_hmac_sha256 : regk_crypto_sha256;
+		break;
+	case ARTPEC6_CRYPTO_HASH_SHA384:
+		oper = hmac ? regk_crypto_hmac_sha384 : regk_crypto_sha384;
+		break;
+	case ARTPEC6_CRYPTO_HASH_SHA512:
+		oper = hmac ? regk_crypto_hmac_sha512 : regk_crypto_sha512;
+		break;
+
+	default:
+		pr_err("%s: Unsupported hash type 0x%x\n", MODULE_NAME, type);
+		return -EINVAL;
+	}
+
+	if (variant == ARTPEC6_CRYPTO)
+		req_ctx->hash_md = FIELD_PREP(A6_CRY_MD_OPER, oper);
+	else
+		req_ctx->hash_md = FIELD_PREP(A7_CRY_MD_OPER, oper);
+
+	return 0;
+}
+
+static int artpec6_crypto_prepare_submit_hash(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+	int ret;
+
+	if (!req_ctx->common.dma) {
+		ret = artpec6_crypto_common_init(&req_ctx->common,
+					  &req->base,
+					  artpec6_crypto_complete_hash,
+					  NULL, 0);
+
+		if (ret)
+			return ret;
+	}
+
+	ret = artpec6_crypto_prepare_hash(req);
+	switch (ret) {
+	case ARTPEC6_CRYPTO_PREPARE_HASH_START:
+		ret = artpec6_crypto_submit(&req_ctx->common);
+		break;
+
+	case ARTPEC6_CRYPTO_PREPARE_HASH_NO_START:
+		ret = 0;
+		/* Fallthrough */
+
+	default:
+		artpec6_crypto_common_destroy(&req_ctx->common);
+		break;
+	}
+
+	return ret;
+}
+
+static int artpec6_crypto_hash_final(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	req_ctx->hash_flags |= HASH_FLAG_FINALIZE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int artpec6_crypto_hash_update(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	req_ctx->hash_flags |= HASH_FLAG_UPDATE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int artpec6_crypto_sha1_init(struct ahash_request *req)
+{
+	return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA1, 0);
+}
+
+static int artpec6_crypto_sha1_digest(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA1, 0);
+
+	req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int artpec6_crypto_sha256_init(struct ahash_request *req)
+{
+	return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA256, 0);
+}
+
+static int artpec6_crypto_sha256_digest(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA256, 0);
+	req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int __maybe_unused artpec6_crypto_sha384_init(struct ahash_request *req)
+{
+	return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA384, 0);
+}
+
+static int __maybe_unused
+artpec6_crypto_sha384_digest(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA384, 0);
+	req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int artpec6_crypto_sha512_init(struct ahash_request *req)
+{
+	return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA512, 0);
+}
+
+static int artpec6_crypto_sha512_digest(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA512, 0);
+	req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int artpec6_crypto_hmac_sha256_init(struct ahash_request *req)
+{
+	return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA256, 1);
+}
+
+static int __maybe_unused
+artpec6_crypto_hmac_sha384_init(struct ahash_request *req)
+{
+	return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA384, 1);
+}
+
+static int artpec6_crypto_hmac_sha512_init(struct ahash_request *req)
+{
+	return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA512, 1);
+}
+
+static int artpec6_crypto_hmac_sha256_digest(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA256, 1);
+	req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int __maybe_unused
+artpec6_crypto_hmac_sha384_digest(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA384, 1);
+	req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int artpec6_crypto_hmac_sha512_digest(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA512, 1);
+	req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int artpec6_crypto_ahash_init_common(struct crypto_tfm *tfm,
+				    const char *base_hash_name)
+{
+	struct artpec6_hashalg_context *tfm_ctx = crypto_tfm_ctx(tfm);
+
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct artpec6_hash_request_context));
+	memset(tfm_ctx, 0, sizeof(*tfm_ctx));
+
+	if (base_hash_name) {
+		struct crypto_shash *child;
+
+		child = crypto_alloc_shash(base_hash_name, 0,
+					   CRYPTO_ALG_NEED_FALLBACK);
+
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		tfm_ctx->child_hash = child;
+	}
+
+	return 0;
+}
+
+static int artpec6_crypto_ahash_init(struct crypto_tfm *tfm)
+{
+	return artpec6_crypto_ahash_init_common(tfm, NULL);
+}
+
+static int artpec6_crypto_ahash_init_hmac_sha256(struct crypto_tfm *tfm)
+{
+	return artpec6_crypto_ahash_init_common(tfm, "sha256");
+}
+
+static int __maybe_unused
+artpec6_crypto_ahash_init_hmac_sha384(struct crypto_tfm *tfm)
+{
+	return artpec6_crypto_ahash_init_common(tfm, "sha384");
+}
+
+static int artpec6_crypto_ahash_init_hmac_sha512(struct crypto_tfm *tfm)
+{
+	return artpec6_crypto_ahash_init_common(tfm, "sha512");
+}
+
+static void artpec6_crypto_ahash_exit(struct crypto_tfm *tfm)
+{
+	struct artpec6_hashalg_context *tfm_ctx = crypto_tfm_ctx(tfm);
+
+	if (tfm_ctx->child_hash)
+		crypto_free_shash(tfm_ctx->child_hash);
+
+	memset(tfm_ctx->hmac_key, 0, sizeof(tfm_ctx->hmac_key));
+	tfm_ctx->hmac_key_length = 0;
+}
+
+static int artpec6_crypto_hash_export(struct ahash_request *req, void *out)
+{
+	const struct artpec6_hash_request_context *ctx = ahash_request_ctx(req);
+	struct artpec6_hash_export_state *state = out;
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	enum artpec6_crypto_variant variant = ac->variant;
+
+	BUILD_BUG_ON(sizeof(state->partial_buffer) !=
+		     sizeof(ctx->partial_buffer));
+	BUILD_BUG_ON(sizeof(state->digeststate) != sizeof(ctx->digeststate));
+
+	state->digcnt = ctx->digcnt;
+	state->partial_bytes = ctx->partial_bytes;
+	state->hash_flags = ctx->hash_flags;
+
+	if (variant == ARTPEC6_CRYPTO)
+		state->oper = FIELD_GET(A6_CRY_MD_OPER, ctx->hash_md);
+	else
+		state->oper = FIELD_GET(A7_CRY_MD_OPER, ctx->hash_md);
+
+	memcpy(state->partial_buffer, ctx->partial_buffer,
+	       sizeof(state->partial_buffer));
+	memcpy(state->digeststate, ctx->digeststate,
+	       sizeof(state->digeststate));
+
+	return 0;
+}
+
+static int artpec6_crypto_hash_import(struct ahash_request *req, const void *in)
+{
+	struct artpec6_hash_request_context *ctx = ahash_request_ctx(req);
+	const struct artpec6_hash_export_state *state = in;
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	enum artpec6_crypto_variant variant = ac->variant;
+
+	memset(ctx, 0, sizeof(*ctx));
+
+	ctx->digcnt = state->digcnt;
+	ctx->partial_bytes = state->partial_bytes;
+	ctx->hash_flags = state->hash_flags;
+
+	if (variant == ARTPEC6_CRYPTO)
+		ctx->hash_md = FIELD_PREP(A6_CRY_MD_OPER, state->oper);
+	else
+		ctx->hash_md = FIELD_PREP(A7_CRY_MD_OPER, state->oper);
+
+	memcpy(ctx->partial_buffer, state->partial_buffer,
+	       sizeof(state->partial_buffer));
+	memcpy(ctx->digeststate, state->digeststate,
+	       sizeof(state->digeststate));
+
+	return 0;
+}
+
+static int init_crypto_hw(struct artpec6_crypto *ac)
+{
+	enum artpec6_crypto_variant variant = ac->variant;
+	void __iomem *base = ac->base;
+	u32 out_descr_buf_size;
+	u32 out_data_buf_size;
+	u32 in_data_buf_size;
+	u32 in_descr_buf_size;
+	u32 in_stat_buf_size;
+	u32 in, out;
+
+	/*
+	 * The PDMA unit contains 1984 bytes of internal memory for the OUT
+	 * channels and 1024 bytes for the IN channel. This is an elastic
+	 * memory used to internally store the descriptors and data. The values
+	 * ares specified in 64 byte incremements.  Trustzone buffers are not
+	 * used at this stage.
+	 */
+	out_data_buf_size = 16;  /* 1024 bytes for data */
+	out_descr_buf_size = 15; /* 960 bytes for descriptors */
+	in_data_buf_size = 8;    /* 512 bytes for data */
+	in_descr_buf_size = 4;   /* 256 bytes for descriptors */
+	in_stat_buf_size = 4;   /* 256 bytes for stat descrs */
+
+	BUILD_BUG_ON_MSG((out_data_buf_size
+				+ out_descr_buf_size) * 64 > 1984,
+			  "Invalid OUT configuration");
+
+	BUILD_BUG_ON_MSG((in_data_buf_size
+				+ in_descr_buf_size
+				+ in_stat_buf_size) * 64 > 1024,
+			  "Invalid IN configuration");
+
+	in = FIELD_PREP(PDMA_IN_BUF_CFG_DATA_BUF_SIZE, in_data_buf_size) |
+	     FIELD_PREP(PDMA_IN_BUF_CFG_DESCR_BUF_SIZE, in_descr_buf_size) |
+	     FIELD_PREP(PDMA_IN_BUF_CFG_STAT_BUF_SIZE, in_stat_buf_size);
+
+	out = FIELD_PREP(PDMA_OUT_BUF_CFG_DATA_BUF_SIZE, out_data_buf_size) |
+	      FIELD_PREP(PDMA_OUT_BUF_CFG_DESCR_BUF_SIZE, out_descr_buf_size);
+
+	writel_relaxed(out, base + PDMA_OUT_BUF_CFG);
+	writel_relaxed(PDMA_OUT_CFG_EN, base + PDMA_OUT_CFG);
+
+	if (variant == ARTPEC6_CRYPTO) {
+		writel_relaxed(in, base + A6_PDMA_IN_BUF_CFG);
+		writel_relaxed(PDMA_IN_CFG_EN, base + A6_PDMA_IN_CFG);
+		writel_relaxed(A6_PDMA_INTR_MASK_IN_DATA |
+			       A6_PDMA_INTR_MASK_IN_EOP_FLUSH,
+			       base + A6_PDMA_INTR_MASK);
+	} else {
+		writel_relaxed(in, base + A7_PDMA_IN_BUF_CFG);
+		writel_relaxed(PDMA_IN_CFG_EN, base + A7_PDMA_IN_CFG);
+		writel_relaxed(A7_PDMA_INTR_MASK_IN_DATA |
+			       A7_PDMA_INTR_MASK_IN_EOP_FLUSH,
+			       base + A7_PDMA_INTR_MASK);
+	}
+
+	return 0;
+}
+
+static void artpec6_crypto_disable_hw(struct artpec6_crypto *ac)
+{
+	enum artpec6_crypto_variant variant = ac->variant;
+	void __iomem *base = ac->base;
+
+	if (variant == ARTPEC6_CRYPTO) {
+		writel_relaxed(A6_PDMA_IN_CMD_STOP, base + A6_PDMA_IN_CMD);
+		writel_relaxed(0, base + A6_PDMA_IN_CFG);
+		writel_relaxed(A6_PDMA_OUT_CMD_STOP, base + PDMA_OUT_CMD);
+	} else {
+		writel_relaxed(A7_PDMA_IN_CMD_STOP, base + A7_PDMA_IN_CMD);
+		writel_relaxed(0, base + A7_PDMA_IN_CFG);
+		writel_relaxed(A7_PDMA_OUT_CMD_STOP, base + PDMA_OUT_CMD);
+	}
+
+	writel_relaxed(0, base + PDMA_OUT_CFG);
+
+}
+
+static irqreturn_t artpec6_crypto_irq(int irq, void *dev_id)
+{
+	struct artpec6_crypto *ac = dev_id;
+	enum artpec6_crypto_variant variant = ac->variant;
+	void __iomem *base = ac->base;
+	u32 mask_in_data, mask_in_eop_flush;
+	u32 in_cmd_flush_stat, in_cmd_reg;
+	u32 ack_intr_reg;
+	u32 ack = 0;
+	u32 intr;
+
+	if (variant == ARTPEC6_CRYPTO) {
+		intr = readl_relaxed(base + A6_PDMA_MASKED_INTR);
+		mask_in_data = A6_PDMA_INTR_MASK_IN_DATA;
+		mask_in_eop_flush = A6_PDMA_INTR_MASK_IN_EOP_FLUSH;
+		in_cmd_flush_stat = A6_PDMA_IN_CMD_FLUSH_STAT;
+		in_cmd_reg = A6_PDMA_IN_CMD;
+		ack_intr_reg = A6_PDMA_ACK_INTR;
+	} else {
+		intr = readl_relaxed(base + A7_PDMA_MASKED_INTR);
+		mask_in_data = A7_PDMA_INTR_MASK_IN_DATA;
+		mask_in_eop_flush = A7_PDMA_INTR_MASK_IN_EOP_FLUSH;
+		in_cmd_flush_stat = A7_PDMA_IN_CMD_FLUSH_STAT;
+		in_cmd_reg = A7_PDMA_IN_CMD;
+		ack_intr_reg = A7_PDMA_ACK_INTR;
+	}
+
+	/* We get two interrupt notifications from each job.
+	 * The in_data means all data was sent to memory and then
+	 * we request a status flush command to write the per-job
+	 * status to its status vector. This ensures that the
+	 * tasklet can detect exactly how many submitted jobs
+	 * that have finished.
+	 */
+	if (intr & mask_in_data)
+		ack |= mask_in_data;
+
+	if (intr & mask_in_eop_flush)
+		ack |= mask_in_eop_flush;
+	else
+		writel_relaxed(in_cmd_flush_stat, base + in_cmd_reg);
+
+	writel_relaxed(ack, base + ack_intr_reg);
+
+	if (intr & mask_in_eop_flush)
+		tasklet_schedule(&ac->task);
+
+	return IRQ_HANDLED;
+}
+
+/*------------------- Algorithm definitions ----------------------------------*/
+
+/* Hashes */
+static struct ahash_alg hash_algos[] = {
+	/* SHA-1 */
+	{
+		.init = artpec6_crypto_sha1_init,
+		.update = artpec6_crypto_hash_update,
+		.final = artpec6_crypto_hash_final,
+		.digest = artpec6_crypto_sha1_digest,
+		.import = artpec6_crypto_hash_import,
+		.export = artpec6_crypto_hash_export,
+		.halg.digestsize = SHA1_DIGEST_SIZE,
+		.halg.statesize = sizeof(struct artpec6_hash_export_state),
+		.halg.base = {
+			.cra_name = "sha1",
+			.cra_driver_name = "artpec-sha1",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+			.cra_blocksize = SHA1_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_hashalg_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+			.cra_init = artpec6_crypto_ahash_init,
+			.cra_exit = artpec6_crypto_ahash_exit,
+		}
+	},
+	/* SHA-256 */
+	{
+		.init = artpec6_crypto_sha256_init,
+		.update = artpec6_crypto_hash_update,
+		.final = artpec6_crypto_hash_final,
+		.digest = artpec6_crypto_sha256_digest,
+		.import = artpec6_crypto_hash_import,
+		.export = artpec6_crypto_hash_export,
+		.halg.digestsize = SHA256_DIGEST_SIZE,
+		.halg.statesize = sizeof(struct artpec6_hash_export_state),
+		.halg.base = {
+			.cra_name = "sha256",
+			.cra_driver_name = "artpec-sha256",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+			.cra_blocksize = SHA256_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_hashalg_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+			.cra_init = artpec6_crypto_ahash_init,
+			.cra_exit = artpec6_crypto_ahash_exit,
+		}
+	},
+	/* HMAC SHA-256 */
+	{
+		.init = artpec6_crypto_hmac_sha256_init,
+		.update = artpec6_crypto_hash_update,
+		.final = artpec6_crypto_hash_final,
+		.digest = artpec6_crypto_hmac_sha256_digest,
+		.import = artpec6_crypto_hash_import,
+		.export = artpec6_crypto_hash_export,
+		.setkey = artpec6_crypto_hash_set_key,
+		.halg.digestsize = SHA256_DIGEST_SIZE,
+		.halg.statesize = sizeof(struct artpec6_hash_export_state),
+		.halg.base = {
+			.cra_name = "hmac(sha256)",
+			.cra_driver_name = "artpec-hmac-sha256",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+			.cra_blocksize = SHA256_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_hashalg_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+			.cra_init = artpec6_crypto_ahash_init_hmac_sha256,
+			.cra_exit = artpec6_crypto_ahash_exit,
+		}
+	},
+};
+
+static struct ahash_alg artpec7_hash_algos[] = {
+	/* SHA-384 */
+	{
+		.init = artpec6_crypto_sha384_init,
+		.update = artpec6_crypto_hash_update,
+		.final = artpec6_crypto_hash_final,
+		.digest = artpec6_crypto_sha384_digest,
+		.import = artpec6_crypto_hash_import,
+		.export = artpec6_crypto_hash_export,
+		.halg.digestsize = SHA384_DIGEST_SIZE,
+		.halg.statesize = sizeof(struct artpec6_hash_export_state),
+		.halg.base = {
+			.cra_name = "sha384",
+			.cra_driver_name = "artpec-sha384",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+			.cra_blocksize = SHA384_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_hashalg_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+			.cra_init = artpec6_crypto_ahash_init,
+			.cra_exit = artpec6_crypto_ahash_exit,
+		}
+	},
+	/* HMAC SHA-384 */
+	{
+		.init = artpec6_crypto_hmac_sha384_init,
+		.update = artpec6_crypto_hash_update,
+		.final = artpec6_crypto_hash_final,
+		.digest = artpec6_crypto_hmac_sha384_digest,
+		.import = artpec6_crypto_hash_import,
+		.export = artpec6_crypto_hash_export,
+		.setkey = artpec6_crypto_hash_set_key,
+		.halg.digestsize = SHA384_DIGEST_SIZE,
+		.halg.statesize = sizeof(struct artpec6_hash_export_state),
+		.halg.base = {
+			.cra_name = "hmac(sha384)",
+			.cra_driver_name = "artpec-hmac-sha384",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+			.cra_blocksize = SHA384_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_hashalg_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+			.cra_init = artpec6_crypto_ahash_init_hmac_sha384,
+			.cra_exit = artpec6_crypto_ahash_exit,
+		}
+	},
+	/* SHA-512 */
+	{
+		.init = artpec6_crypto_sha512_init,
+		.update = artpec6_crypto_hash_update,
+		.final = artpec6_crypto_hash_final,
+		.digest = artpec6_crypto_sha512_digest,
+		.import = artpec6_crypto_hash_import,
+		.export = artpec6_crypto_hash_export,
+		.halg.digestsize = SHA512_DIGEST_SIZE,
+		.halg.statesize = sizeof(struct artpec6_hash_export_state),
+		.halg.base = {
+			.cra_name = "sha512",
+			.cra_driver_name = "artpec-sha512",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+			.cra_blocksize = SHA512_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_hashalg_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+			.cra_init = artpec6_crypto_ahash_init,
+			.cra_exit = artpec6_crypto_ahash_exit,
+		}
+	},
+	/* HMAC SHA-512 */
+	{
+		.init = artpec6_crypto_hmac_sha512_init,
+		.update = artpec6_crypto_hash_update,
+		.final = artpec6_crypto_hash_final,
+		.digest = artpec6_crypto_hmac_sha512_digest,
+		.import = artpec6_crypto_hash_import,
+		.export = artpec6_crypto_hash_export,
+		.setkey = artpec6_crypto_hash_set_key,
+		.halg.digestsize = SHA512_DIGEST_SIZE,
+		.halg.statesize = sizeof(struct artpec6_hash_export_state),
+		.halg.base = {
+			.cra_name = "hmac(sha512)",
+			.cra_driver_name = "artpec-hmac-sha512",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+			.cra_blocksize = SHA512_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_hashalg_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+			.cra_init = artpec6_crypto_ahash_init_hmac_sha512,
+			.cra_exit = artpec6_crypto_ahash_exit,
+		}
+	},
+};
+
+/* Crypto */
+static struct skcipher_alg crypto_algos[] = {
+	/* AES - ECB */
+	{
+		.base = {
+			.cra_name = "ecb(aes)",
+			.cra_driver_name = "artpec6-ecb-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				     CRYPTO_ALG_ASYNC,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_cryptotfm_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.setkey = artpec6_crypto_cipher_set_key,
+		.encrypt = artpec6_crypto_encrypt,
+		.decrypt = artpec6_crypto_decrypt,
+		.init = artpec6_crypto_aes_ecb_init,
+		.exit = artpec6_crypto_aes_exit,
+	},
+	/* AES - CTR */
+	{
+		.base = {
+			.cra_name = "ctr(aes)",
+			.cra_driver_name = "artpec6-ctr-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				     CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_NEED_FALLBACK,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct artpec6_cryptotfm_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.ivsize = AES_BLOCK_SIZE,
+		.setkey = artpec6_crypto_cipher_set_key,
+		.encrypt = artpec6_crypto_ctr_encrypt,
+		.decrypt = artpec6_crypto_ctr_decrypt,
+		.init = artpec6_crypto_aes_ctr_init,
+		.exit = artpec6_crypto_aes_ctr_exit,
+	},
+	/* AES - CBC */
+	{
+		.base = {
+			.cra_name = "cbc(aes)",
+			.cra_driver_name = "artpec6-cbc-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				     CRYPTO_ALG_ASYNC,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_cryptotfm_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.ivsize = AES_BLOCK_SIZE,
+		.setkey = artpec6_crypto_cipher_set_key,
+		.encrypt = artpec6_crypto_encrypt,
+		.decrypt = artpec6_crypto_decrypt,
+		.init = artpec6_crypto_aes_cbc_init,
+		.exit = artpec6_crypto_aes_exit
+	},
+	/* AES - XTS */
+	{
+		.base = {
+			.cra_name = "xts(aes)",
+			.cra_driver_name = "artpec6-xts-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				     CRYPTO_ALG_ASYNC,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct artpec6_cryptotfm_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = 2*AES_MIN_KEY_SIZE,
+		.max_keysize = 2*AES_MAX_KEY_SIZE,
+		.ivsize = 16,
+		.setkey = artpec6_crypto_xts_set_key,
+		.encrypt = artpec6_crypto_encrypt,
+		.decrypt = artpec6_crypto_decrypt,
+		.init = artpec6_crypto_aes_xts_init,
+		.exit = artpec6_crypto_aes_exit,
+	},
+};
+
+static struct aead_alg aead_algos[] = {
+	{
+		.init   = artpec6_crypto_aead_init,
+		.setkey = artpec6_crypto_aead_set_key,
+		.encrypt = artpec6_crypto_aead_encrypt,
+		.decrypt = artpec6_crypto_aead_decrypt,
+		.ivsize = AES_BLOCK_SIZE,
+		.maxauthsize = AES_BLOCK_SIZE,
+
+		.base = {
+			.cra_name = "gcm(aes)",
+			.cra_driver_name = "artpec-gcm-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct artpec6_cryptotfm_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+		},
+	}
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+struct dbgfs_u32 {
+	char *name;
+	mode_t mode;
+	u32 *flag;
+	char *desc;
+};
+
+static void artpec6_crypto_init_debugfs(void)
+{
+	dbgfs_root = debugfs_create_dir("artpec6_crypto", NULL);
+
+	if (!dbgfs_root || IS_ERR(dbgfs_root)) {
+		dbgfs_root = NULL;
+		pr_err("%s: Could not initialise debugfs!\n", MODULE_NAME);
+		return;
+	}
+
+#ifdef CONFIG_FAULT_INJECTION
+	fault_create_debugfs_attr("fail_status_read", dbgfs_root,
+				  &artpec6_crypto_fail_status_read);
+
+	fault_create_debugfs_attr("fail_dma_array_full", dbgfs_root,
+				  &artpec6_crypto_fail_dma_array_full);
+#endif
+}
+
+static void artpec6_crypto_free_debugfs(void)
+{
+	if (!dbgfs_root)
+		return;
+
+	debugfs_remove_recursive(dbgfs_root);
+	dbgfs_root = NULL;
+}
+#endif
+
+static const struct of_device_id artpec6_crypto_of_match[] = {
+	{ .compatible = "axis,artpec6-crypto", .data = (void *)ARTPEC6_CRYPTO },
+	{ .compatible = "axis,artpec7-crypto", .data = (void *)ARTPEC7_CRYPTO },
+	{}
+};
+MODULE_DEVICE_TABLE(of, artpec6_crypto_of_match);
+
+static int artpec6_crypto_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *match;
+	enum artpec6_crypto_variant variant;
+	struct artpec6_crypto *ac;
+	struct device *dev = &pdev->dev;
+	void __iomem *base;
+	struct resource *res;
+	int irq;
+	int err;
+
+	if (artpec6_crypto_dev)
+		return -ENODEV;
+
+	match = of_match_node(artpec6_crypto_of_match, dev->of_node);
+	if (!match)
+		return -EINVAL;
+
+	variant = (enum artpec6_crypto_variant)match->data;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return -ENODEV;
+
+	ac = devm_kzalloc(&pdev->dev, sizeof(struct artpec6_crypto),
+			  GFP_KERNEL);
+	if (!ac)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, ac);
+	ac->variant = variant;
+
+	spin_lock_init(&ac->queue_lock);
+	INIT_LIST_HEAD(&ac->queue);
+	INIT_LIST_HEAD(&ac->pending);
+	setup_timer(&ac->timer, artpec6_crypto_timeout, (unsigned long) ac);
+
+	ac->base = base;
+
+	ac->dma_cache = kmem_cache_create("artpec6_crypto_dma",
+		sizeof(struct artpec6_crypto_dma_descriptors),
+		64,
+		0,
+		NULL);
+	if (!ac->dma_cache)
+		return -ENOMEM;
+
+#ifdef CONFIG_DEBUG_FS
+	artpec6_crypto_init_debugfs();
+#endif
+
+	tasklet_init(&ac->task, artpec6_crypto_task,
+		     (unsigned long)ac);
+
+	ac->pad_buffer = devm_kzalloc(&pdev->dev, 2 * ARTPEC_CACHE_LINE_MAX,
+				      GFP_KERNEL);
+	if (!ac->pad_buffer)
+		return -ENOMEM;
+	ac->pad_buffer = PTR_ALIGN(ac->pad_buffer, ARTPEC_CACHE_LINE_MAX);
+
+	ac->zero_buffer = devm_kzalloc(&pdev->dev, 2 * ARTPEC_CACHE_LINE_MAX,
+				      GFP_KERNEL);
+	if (!ac->zero_buffer)
+		return -ENOMEM;
+	ac->zero_buffer = PTR_ALIGN(ac->zero_buffer, ARTPEC_CACHE_LINE_MAX);
+
+	err = init_crypto_hw(ac);
+	if (err)
+		goto free_cache;
+
+	err = devm_request_irq(&pdev->dev, irq, artpec6_crypto_irq, 0,
+			       "artpec6-crypto", ac);
+	if (err)
+		goto disable_hw;
+
+	artpec6_crypto_dev = &pdev->dev;
+
+	err = crypto_register_ahashes(hash_algos, ARRAY_SIZE(hash_algos));
+	if (err) {
+		dev_err(dev, "Failed to register ahashes\n");
+		goto disable_hw;
+	}
+
+	if (variant != ARTPEC6_CRYPTO) {
+		err = crypto_register_ahashes(artpec7_hash_algos,
+					      ARRAY_SIZE(artpec7_hash_algos));
+		if (err) {
+			dev_err(dev, "Failed to register ahashes\n");
+			goto unregister_ahashes;
+		}
+	}
+
+	err = crypto_register_skciphers(crypto_algos, ARRAY_SIZE(crypto_algos));
+	if (err) {
+		dev_err(dev, "Failed to register ciphers\n");
+		goto unregister_a7_ahashes;
+	}
+
+	err = crypto_register_aeads(aead_algos, ARRAY_SIZE(aead_algos));
+	if (err) {
+		dev_err(dev, "Failed to register aeads\n");
+		goto unregister_algs;
+	}
+
+	return 0;
+
+unregister_algs:
+	crypto_unregister_skciphers(crypto_algos, ARRAY_SIZE(crypto_algos));
+unregister_a7_ahashes:
+	if (variant != ARTPEC6_CRYPTO)
+		crypto_unregister_ahashes(artpec7_hash_algos,
+					  ARRAY_SIZE(artpec7_hash_algos));
+unregister_ahashes:
+	crypto_unregister_ahashes(hash_algos, ARRAY_SIZE(hash_algos));
+disable_hw:
+	artpec6_crypto_disable_hw(ac);
+free_cache:
+	kmem_cache_destroy(ac->dma_cache);
+	return err;
+}
+
+static int artpec6_crypto_remove(struct platform_device *pdev)
+{
+	struct artpec6_crypto *ac = platform_get_drvdata(pdev);
+	int irq = platform_get_irq(pdev, 0);
+
+	crypto_unregister_ahashes(hash_algos, ARRAY_SIZE(hash_algos));
+	if (ac->variant != ARTPEC6_CRYPTO)
+		crypto_unregister_ahashes(artpec7_hash_algos,
+					  ARRAY_SIZE(artpec7_hash_algos));
+	crypto_unregister_skciphers(crypto_algos, ARRAY_SIZE(crypto_algos));
+	crypto_unregister_aeads(aead_algos, ARRAY_SIZE(aead_algos));
+
+	tasklet_disable(&ac->task);
+	devm_free_irq(&pdev->dev, irq, ac);
+	tasklet_kill(&ac->task);
+	del_timer_sync(&ac->timer);
+
+	artpec6_crypto_disable_hw(ac);
+
+	kmem_cache_destroy(ac->dma_cache);
+#ifdef CONFIG_DEBUG_FS
+	artpec6_crypto_free_debugfs();
+#endif
+	return 0;
+}
+
+static struct platform_driver artpec6_crypto_driver = {
+	.probe   = artpec6_crypto_probe,
+	.remove  = artpec6_crypto_remove,
+	.driver  = {
+		.name  = "artpec6-crypto",
+		.owner = THIS_MODULE,
+		.of_match_table = artpec6_crypto_of_match,
+	},
+};
+
+module_platform_driver(artpec6_crypto_driver);
+
+MODULE_AUTHOR("Axis Communications AB");
+MODULE_DESCRIPTION("ARTPEC-6 Crypto driver");
+MODULE_LICENSE("GPL");

From f93ed02818512f6d1b73d9a19e25526847019083 Mon Sep 17 00:00:00 2001
From: Lars Persson <lars.persson@axis.com>
Date: Thu, 10 Aug 2017 14:53:54 +0200
Subject: [PATCH 110/116] MAINTAINERS: Add ARTPEC crypto maintainer

Assign the Axis kernel team as maintainer for crypto drivers under
drivers/crypto/axis.

Signed-off-by: Lars Persson <larper@axis.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index d5b6c71e783eb..72186cf9820de 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1129,6 +1129,7 @@ L:	linux-arm-kernel@axis.com
 F:	arch/arm/mach-artpec
 F:	arch/arm/boot/dts/artpec6*
 F:	drivers/clk/axis
+F:	drivers/crypto/axis
 F:	drivers/pinctrl/pinctrl-artpec*
 F:	Documentation/devicetree/bindings/pinctrl/axis,artpec6-pinctrl.txt
 

From 249cb0632570302e2c61f900806b92f3fe66783b Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Sun, 13 Aug 2017 14:34:00 +0530
Subject: [PATCH 111/116] crypto: sahara - constify platform_device_id

platform_device_id are not supposed to change at runtime. All functions
working with platform_device_id provided by <linux/platform_device.h>
work with const platform_device_id. So mark the non-const structs as
const.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/sahara.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c
index 03a8abad7e76a..08e7bdcaa6e31 100644
--- a/drivers/crypto/sahara.c
+++ b/drivers/crypto/sahara.c
@@ -1375,7 +1375,7 @@ static void sahara_unregister_algs(struct sahara_dev *dev)
 			crypto_unregister_ahash(&sha_v4_algs[i]);
 }
 
-static struct platform_device_id sahara_platform_ids[] = {
+static const struct platform_device_id sahara_platform_ids[] = {
 	{ .name = "sahara-imx27" },
 	{ /* sentinel */ }
 };

From baf5b752dae2b7c84b3fa5ffb0eb41648d659c09 Mon Sep 17 00:00:00 2001
From: Corentin LABBE <clabbe.montjoie@gmail.com>
Date: Mon, 14 Aug 2017 13:58:54 +0200
Subject: [PATCH 112/116] crypto: cavium - add release_firmware to all return
 case

Two return case misses to call release_firmware() and so leak some
memory.

This patch create a fw_release label (and so a common error path)
and use it on all return case.

Detected by CoverityScan, CID#1416422 ("Resource Leak")

Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/cavium/cpt/cptpf_main.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/cavium/cpt/cptpf_main.c b/drivers/crypto/cavium/cpt/cptpf_main.c
index 4119c40e7c4b0..34a6d8bf229ef 100644
--- a/drivers/crypto/cavium/cpt/cptpf_main.c
+++ b/drivers/crypto/cavium/cpt/cptpf_main.c
@@ -268,8 +268,10 @@ static int cpt_ucode_load_fw(struct cpt_device *cpt, const u8 *fw, bool is_ae)
 	mcode = &cpt->mcode[cpt->next_mc_idx];
 	memcpy(mcode->version, (u8 *)fw_entry->data, CPT_UCODE_VERSION_SZ);
 	mcode->code_size = ntohl(ucode->code_length) * 2;
-	if (!mcode->code_size)
-		return -EINVAL;
+	if (!mcode->code_size) {
+		ret = -EINVAL;
+		goto fw_release;
+	}
 
 	mcode->is_ae = is_ae;
 	mcode->core_mask = 0ULL;
@@ -280,7 +282,8 @@ static int cpt_ucode_load_fw(struct cpt_device *cpt, const u8 *fw, bool is_ae)
 					  &mcode->phys_base, GFP_KERNEL);
 	if (!mcode->code) {
 		dev_err(dev, "Unable to allocate space for microcode");
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto fw_release;
 	}
 
 	memcpy((void *)mcode->code, (void *)(fw_entry->data + sizeof(*ucode)),
@@ -302,12 +305,14 @@ static int cpt_ucode_load_fw(struct cpt_device *cpt, const u8 *fw, bool is_ae)
 	ret = do_cpt_init(cpt, mcode);
 	if (ret) {
 		dev_err(dev, "do_cpt_init failed with ret: %d\n", ret);
-		return ret;
+		goto fw_release;
 	}
 
 	dev_info(dev, "Microcode Loaded %s\n", mcode->version);
 	mcode->is_mc_valid = 1;
 	cpt->next_mc_idx++;
+
+fw_release:
 	release_firmware(fw_entry);
 
 	return ret;

From 5a7801f6634b1e2888bcb1a85bedc50e46dcd757 Mon Sep 17 00:00:00 2001
From: Zain Wang <wzz@rock-chips.com>
Date: Tue, 15 Aug 2017 15:48:15 +0800
Subject: [PATCH 113/116] crypto: rockchip - Don't dequeue the request when
 device is busy

The device can only process one request at a time. So if multiple
requests came at the same time, we can enqueue them first, and
dequeue them one by one when the device is idle.

Signed-off-by: zain wang <wzz@rock-chips.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto.c       |  46 +++++-
 drivers/crypto/rockchip/rk3288_crypto.h       |  11 +-
 .../rockchip/rk3288_crypto_ablkcipher.c       | 118 +++++++---------
 drivers/crypto/rockchip/rk3288_crypto_ahash.c | 133 ++++++++----------
 4 files changed, 160 insertions(+), 148 deletions(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index 57c37831bd424..c9d622abd90c0 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -184,15 +184,53 @@ static irqreturn_t rk_crypto_irq_handle(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static int rk_crypto_enqueue(struct rk_crypto_info *dev,
+			      struct crypto_async_request *async_req)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	ret = crypto_enqueue_request(&dev->queue, async_req);
+	if (dev->busy) {
+		spin_unlock_irqrestore(&dev->lock, flags);
+		return ret;
+	}
+	dev->busy = true;
+	spin_unlock_irqrestore(&dev->lock, flags);
+	tasklet_schedule(&dev->queue_task);
+
+	return ret;
+}
+
 static void rk_crypto_queue_task_cb(unsigned long data)
 {
 	struct rk_crypto_info *dev = (struct rk_crypto_info *)data;
+	struct crypto_async_request *async_req, *backlog;
+	unsigned long flags;
 	int err = 0;
 
 	dev->err = 0;
+	spin_lock_irqsave(&dev->lock, flags);
+	backlog   = crypto_get_backlog(&dev->queue);
+	async_req = crypto_dequeue_request(&dev->queue);
+
+	if (!async_req) {
+		dev->busy = false;
+		spin_unlock_irqrestore(&dev->lock, flags);
+		return;
+	}
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	if (backlog) {
+		backlog->complete(backlog, -EINPROGRESS);
+		backlog = NULL;
+	}
+
+	dev->async_req = async_req;
 	err = dev->start(dev);
 	if (err)
-		dev->complete(dev, err);
+		dev->complete(dev->async_req, err);
 }
 
 static void rk_crypto_done_task_cb(unsigned long data)
@@ -200,13 +238,13 @@ static void rk_crypto_done_task_cb(unsigned long data)
 	struct rk_crypto_info *dev = (struct rk_crypto_info *)data;
 
 	if (dev->err) {
-		dev->complete(dev, dev->err);
+		dev->complete(dev->async_req, dev->err);
 		return;
 	}
 
 	dev->err = dev->update(dev);
 	if (dev->err)
-		dev->complete(dev, dev->err);
+		dev->complete(dev->async_req, dev->err);
 }
 
 static struct rk_crypto_tmp *rk_cipher_algs[] = {
@@ -365,6 +403,8 @@ static int rk_crypto_probe(struct platform_device *pdev)
 	crypto_info->disable_clk = rk_crypto_disable_clk;
 	crypto_info->load_data = rk_load_data;
 	crypto_info->unload_data = rk_unload_data;
+	crypto_info->enqueue = rk_crypto_enqueue;
+	crypto_info->busy = false;
 
 	err = rk_crypto_register(crypto_info);
 	if (err) {
diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h
index 65ad1c2619490..ab6a1b4c40f0c 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.h
+++ b/drivers/crypto/rockchip/rk3288_crypto.h
@@ -192,8 +192,7 @@ struct rk_crypto_info {
 	struct crypto_queue		queue;
 	struct tasklet_struct		queue_task;
 	struct tasklet_struct		done_task;
-	struct ablkcipher_request	*ablk_req;
-	struct ahash_request		*ahash_req;
+	struct crypto_async_request	*async_req;
 	int 				err;
 	/* device lock */
 	spinlock_t			lock;
@@ -210,18 +209,20 @@ struct rk_crypto_info {
 	size_t				nents;
 	unsigned int			total;
 	unsigned int			count;
-	u32				mode;
 	dma_addr_t			addr_in;
 	dma_addr_t			addr_out;
+	bool				busy;
 	int (*start)(struct rk_crypto_info *dev);
 	int (*update)(struct rk_crypto_info *dev);
-	void (*complete)(struct rk_crypto_info *dev, int err);
+	void (*complete)(struct crypto_async_request *base, int err);
 	int (*enable_clk)(struct rk_crypto_info *dev);
 	void (*disable_clk)(struct rk_crypto_info *dev);
 	int (*load_data)(struct rk_crypto_info *dev,
 			 struct scatterlist *sg_src,
 			 struct scatterlist *sg_dst);
 	void (*unload_data)(struct rk_crypto_info *dev);
+	int (*enqueue)(struct rk_crypto_info *dev,
+		       struct crypto_async_request *async_req);
 };
 
 /* the private variable of hash */
@@ -234,12 +235,14 @@ struct rk_ahash_ctx {
 /* the privete variable of hash for fallback */
 struct rk_ahash_rctx {
 	struct ahash_request		fallback_req;
+	u32				mode;
 };
 
 /* the private variable of cipher */
 struct rk_cipher_ctx {
 	struct rk_crypto_info		*dev;
 	unsigned int			keylen;
+	u32				mode;
 };
 
 enum alg_type {
diff --git a/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c b/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c
index dbe78def7b650..639c15c5364b4 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c
@@ -15,50 +15,19 @@
 
 #define RK_CRYPTO_DEC			BIT(0)
 
-static void rk_crypto_complete(struct rk_crypto_info *dev, int err)
+static void rk_crypto_complete(struct crypto_async_request *base, int err)
 {
-	if (dev->ablk_req->base.complete)
-		dev->ablk_req->base.complete(&dev->ablk_req->base, err);
+	if (base->complete)
+		base->complete(base, err);
 }
 
 static int rk_handle_req(struct rk_crypto_info *dev,
 			 struct ablkcipher_request *req)
 {
-	unsigned long flags;
-	struct crypto_async_request *async_req, *backlog;
-	int err;
-
 	if (!IS_ALIGNED(req->nbytes, dev->align_size))
 		return -EINVAL;
-
-	dev->left_bytes = req->nbytes;
-	dev->total = req->nbytes;
-	dev->sg_src = req->src;
-	dev->first = req->src;
-	dev->nents = sg_nents(req->src);
-	dev->sg_dst = req->dst;
-	dev->aligned = 1;
-	dev->ablk_req = req;
-
-	spin_lock_irqsave(&dev->lock, flags);
-	err = ablkcipher_enqueue_request(&dev->queue, req);
-	backlog   = crypto_get_backlog(&dev->queue);
-	async_req = crypto_dequeue_request(&dev->queue);
-	spin_unlock_irqrestore(&dev->lock, flags);
-
-	if (!async_req) {
-		dev_err(dev->dev, "async_req is NULL !!\n");
-		return err;
-	}
-	if (backlog) {
-		backlog->complete(backlog, -EINPROGRESS);
-		backlog = NULL;
-	}
-
-	dev->ablk_req = ablkcipher_request_cast(async_req);
-
-	tasklet_schedule(&dev->queue_task);
-	return err;
+	else
+		return dev->enqueue(dev, &req->base);
 }
 
 static int rk_aes_setkey(struct crypto_ablkcipher *cipher,
@@ -108,7 +77,7 @@ static int rk_aes_ecb_encrypt(struct ablkcipher_request *req)
 	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
 	struct rk_crypto_info *dev = ctx->dev;
 
-	dev->mode = RK_CRYPTO_AES_ECB_MODE;
+	ctx->mode = RK_CRYPTO_AES_ECB_MODE;
 	return rk_handle_req(dev, req);
 }
 
@@ -118,7 +87,7 @@ static int rk_aes_ecb_decrypt(struct ablkcipher_request *req)
 	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
 	struct rk_crypto_info *dev = ctx->dev;
 
-	dev->mode = RK_CRYPTO_AES_ECB_MODE | RK_CRYPTO_DEC;
+	ctx->mode = RK_CRYPTO_AES_ECB_MODE | RK_CRYPTO_DEC;
 	return rk_handle_req(dev, req);
 }
 
@@ -128,7 +97,7 @@ static int rk_aes_cbc_encrypt(struct ablkcipher_request *req)
 	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
 	struct rk_crypto_info *dev = ctx->dev;
 
-	dev->mode = RK_CRYPTO_AES_CBC_MODE;
+	ctx->mode = RK_CRYPTO_AES_CBC_MODE;
 	return rk_handle_req(dev, req);
 }
 
@@ -138,7 +107,7 @@ static int rk_aes_cbc_decrypt(struct ablkcipher_request *req)
 	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
 	struct rk_crypto_info *dev = ctx->dev;
 
-	dev->mode = RK_CRYPTO_AES_CBC_MODE | RK_CRYPTO_DEC;
+	ctx->mode = RK_CRYPTO_AES_CBC_MODE | RK_CRYPTO_DEC;
 	return rk_handle_req(dev, req);
 }
 
@@ -148,7 +117,7 @@ static int rk_des_ecb_encrypt(struct ablkcipher_request *req)
 	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
 	struct rk_crypto_info *dev = ctx->dev;
 
-	dev->mode = 0;
+	ctx->mode = 0;
 	return rk_handle_req(dev, req);
 }
 
@@ -158,7 +127,7 @@ static int rk_des_ecb_decrypt(struct ablkcipher_request *req)
 	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
 	struct rk_crypto_info *dev = ctx->dev;
 
-	dev->mode = RK_CRYPTO_DEC;
+	ctx->mode = RK_CRYPTO_DEC;
 	return rk_handle_req(dev, req);
 }
 
@@ -168,7 +137,7 @@ static int rk_des_cbc_encrypt(struct ablkcipher_request *req)
 	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
 	struct rk_crypto_info *dev = ctx->dev;
 
-	dev->mode = RK_CRYPTO_TDES_CHAINMODE_CBC;
+	ctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC;
 	return rk_handle_req(dev, req);
 }
 
@@ -178,7 +147,7 @@ static int rk_des_cbc_decrypt(struct ablkcipher_request *req)
 	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
 	struct rk_crypto_info *dev = ctx->dev;
 
-	dev->mode = RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC;
+	ctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC;
 	return rk_handle_req(dev, req);
 }
 
@@ -188,7 +157,7 @@ static int rk_des3_ede_ecb_encrypt(struct ablkcipher_request *req)
 	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
 	struct rk_crypto_info *dev = ctx->dev;
 
-	dev->mode = RK_CRYPTO_TDES_SELECT;
+	ctx->mode = RK_CRYPTO_TDES_SELECT;
 	return rk_handle_req(dev, req);
 }
 
@@ -198,7 +167,7 @@ static int rk_des3_ede_ecb_decrypt(struct ablkcipher_request *req)
 	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
 	struct rk_crypto_info *dev = ctx->dev;
 
-	dev->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_DEC;
+	ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_DEC;
 	return rk_handle_req(dev, req);
 }
 
@@ -208,7 +177,7 @@ static int rk_des3_ede_cbc_encrypt(struct ablkcipher_request *req)
 	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
 	struct rk_crypto_info *dev = ctx->dev;
 
-	dev->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC;
+	ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC;
 	return rk_handle_req(dev, req);
 }
 
@@ -218,15 +187,16 @@ static int rk_des3_ede_cbc_decrypt(struct ablkcipher_request *req)
 	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
 	struct rk_crypto_info *dev = ctx->dev;
 
-	dev->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC |
+	ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC |
 		    RK_CRYPTO_DEC;
 	return rk_handle_req(dev, req);
 }
 
 static void rk_ablk_hw_init(struct rk_crypto_info *dev)
 {
-	struct crypto_ablkcipher *cipher =
-		crypto_ablkcipher_reqtfm(dev->ablk_req);
+	struct ablkcipher_request *req =
+		ablkcipher_request_cast(dev->async_req);
+	struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(req);
 	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
 	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(cipher);
 	u32 ivsize, block, conf_reg = 0;
@@ -235,25 +205,23 @@ static void rk_ablk_hw_init(struct rk_crypto_info *dev)
 	ivsize = crypto_ablkcipher_ivsize(cipher);
 
 	if (block == DES_BLOCK_SIZE) {
-		dev->mode |= RK_CRYPTO_TDES_FIFO_MODE |
+		ctx->mode |= RK_CRYPTO_TDES_FIFO_MODE |
 			     RK_CRYPTO_TDES_BYTESWAP_KEY |
 			     RK_CRYPTO_TDES_BYTESWAP_IV;
-		CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, dev->mode);
-		memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0,
-			    dev->ablk_req->info, ivsize);
+		CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, ctx->mode);
+		memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0, req->info, ivsize);
 		conf_reg = RK_CRYPTO_DESSEL;
 	} else {
-		dev->mode |= RK_CRYPTO_AES_FIFO_MODE |
+		ctx->mode |= RK_CRYPTO_AES_FIFO_MODE |
 			     RK_CRYPTO_AES_KEY_CHANGE |
 			     RK_CRYPTO_AES_BYTESWAP_KEY |
 			     RK_CRYPTO_AES_BYTESWAP_IV;
 		if (ctx->keylen == AES_KEYSIZE_192)
-			dev->mode |= RK_CRYPTO_AES_192BIT_key;
+			ctx->mode |= RK_CRYPTO_AES_192BIT_key;
 		else if (ctx->keylen == AES_KEYSIZE_256)
-			dev->mode |= RK_CRYPTO_AES_256BIT_key;
-		CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, dev->mode);
-		memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0,
-			    dev->ablk_req->info, ivsize);
+			ctx->mode |= RK_CRYPTO_AES_256BIT_key;
+		CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, ctx->mode);
+		memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0, req->info, ivsize);
 	}
 	conf_reg |= RK_CRYPTO_BYTESWAP_BTFIFO |
 		    RK_CRYPTO_BYTESWAP_BRFIFO;
@@ -283,8 +251,18 @@ static int rk_set_data_start(struct rk_crypto_info *dev)
 
 static int rk_ablk_start(struct rk_crypto_info *dev)
 {
+	struct ablkcipher_request *req =
+		ablkcipher_request_cast(dev->async_req);
 	unsigned long flags;
-	int err;
+	int err = 0;
+
+	dev->left_bytes = req->nbytes;
+	dev->total = req->nbytes;
+	dev->sg_src = req->src;
+	dev->first = req->src;
+	dev->nents = sg_nents(req->src);
+	dev->sg_dst = req->dst;
+	dev->aligned = 1;
 
 	spin_lock_irqsave(&dev->lock, flags);
 	rk_ablk_hw_init(dev);
@@ -295,15 +273,16 @@ static int rk_ablk_start(struct rk_crypto_info *dev)
 
 static void rk_iv_copyback(struct rk_crypto_info *dev)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(dev->ablk_req);
+	struct ablkcipher_request *req =
+		ablkcipher_request_cast(dev->async_req);
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
 	u32 ivsize = crypto_ablkcipher_ivsize(tfm);
 
 	if (ivsize == DES_BLOCK_SIZE)
-		memcpy_fromio(dev->ablk_req->info,
-			      dev->reg + RK_CRYPTO_TDES_IV_0, ivsize);
+		memcpy_fromio(req->info, dev->reg + RK_CRYPTO_TDES_IV_0,
+			      ivsize);
 	else if (ivsize == AES_BLOCK_SIZE)
-		memcpy_fromio(dev->ablk_req->info,
-			      dev->reg + RK_CRYPTO_AES_IV_0, ivsize);
+		memcpy_fromio(req->info, dev->reg + RK_CRYPTO_AES_IV_0, ivsize);
 }
 
 /* return:
@@ -313,10 +292,12 @@ static void rk_iv_copyback(struct rk_crypto_info *dev)
 static int rk_ablk_rx(struct rk_crypto_info *dev)
 {
 	int err = 0;
+	struct ablkcipher_request *req =
+		ablkcipher_request_cast(dev->async_req);
 
 	dev->unload_data(dev);
 	if (!dev->aligned) {
-		if (!sg_pcopy_from_buffer(dev->ablk_req->dst, dev->nents,
+		if (!sg_pcopy_from_buffer(req->dst, dev->nents,
 					  dev->addr_vir, dev->count,
 					  dev->total - dev->left_bytes -
 					  dev->count)) {
@@ -339,7 +320,8 @@ static int rk_ablk_rx(struct rk_crypto_info *dev)
 	} else {
 		rk_iv_copyback(dev);
 		/* here show the calculation is over without any err */
-		dev->complete(dev, 0);
+		dev->complete(dev->async_req, 0);
+		tasklet_schedule(&dev->queue_task);
 	}
 out_rx:
 	return err;
diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
index ebc46e007804f..821a506b9e17f 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
@@ -40,14 +40,16 @@ static int zero_message_process(struct ahash_request *req)
 	return 0;
 }
 
-static void rk_ahash_crypto_complete(struct rk_crypto_info *dev, int err)
+static void rk_ahash_crypto_complete(struct crypto_async_request *base, int err)
 {
-	if (dev->ahash_req->base.complete)
-		dev->ahash_req->base.complete(&dev->ahash_req->base, err);
+	if (base->complete)
+		base->complete(base, err);
 }
 
 static void rk_ahash_reg_init(struct rk_crypto_info *dev)
 {
+	struct ahash_request *req = ahash_request_cast(dev->async_req);
+	struct rk_ahash_rctx *rctx = ahash_request_ctx(req);
 	int reg_status = 0;
 
 	reg_status = CRYPTO_READ(dev, RK_CRYPTO_CTRL) |
@@ -67,7 +69,7 @@ static void rk_ahash_reg_init(struct rk_crypto_info *dev)
 	CRYPTO_WRITE(dev, RK_CRYPTO_INTSTS, RK_CRYPTO_HRDMA_ERR_INT |
 					    RK_CRYPTO_HRDMA_DONE_INT);
 
-	CRYPTO_WRITE(dev, RK_CRYPTO_HASH_CTRL, dev->mode |
+	CRYPTO_WRITE(dev, RK_CRYPTO_HASH_CTRL, rctx->mode |
 					       RK_CRYPTO_HASH_SWAP_DO);
 
 	CRYPTO_WRITE(dev, RK_CRYPTO_CONF, RK_CRYPTO_BYTESWAP_HRFIFO |
@@ -164,78 +166,13 @@ static int rk_ahash_export(struct ahash_request *req, void *out)
 
 static int rk_ahash_digest(struct ahash_request *req)
 {
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
 	struct rk_ahash_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
-	struct crypto_async_request *async_req, *backlog;
-	struct rk_crypto_info *dev = NULL;
-	unsigned long flags;
-	int ret;
+	struct rk_crypto_info *dev = tctx->dev;
 
 	if (!req->nbytes)
 		return zero_message_process(req);
-
-	dev = tctx->dev;
-	dev->total = req->nbytes;
-	dev->left_bytes = req->nbytes;
-	dev->aligned = 0;
-	dev->mode = 0;
-	dev->align_size = 4;
-	dev->sg_dst = NULL;
-	dev->sg_src = req->src;
-	dev->first = req->src;
-	dev->nents = sg_nents(req->src);
-
-	switch (crypto_ahash_digestsize(tfm)) {
-	case SHA1_DIGEST_SIZE:
-		dev->mode = RK_CRYPTO_HASH_SHA1;
-		break;
-	case SHA256_DIGEST_SIZE:
-		dev->mode = RK_CRYPTO_HASH_SHA256;
-		break;
-	case MD5_DIGEST_SIZE:
-		dev->mode = RK_CRYPTO_HASH_MD5;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	rk_ahash_reg_init(dev);
-
-	spin_lock_irqsave(&dev->lock, flags);
-	ret = crypto_enqueue_request(&dev->queue, &req->base);
-	backlog   = crypto_get_backlog(&dev->queue);
-	async_req = crypto_dequeue_request(&dev->queue);
-	spin_unlock_irqrestore(&dev->lock, flags);
-
-	if (!async_req) {
-		dev_err(dev->dev, "async_req is NULL !!\n");
-		return ret;
-	}
-	if (backlog) {
-		backlog->complete(backlog, -EINPROGRESS);
-		backlog = NULL;
-	}
-
-	dev->ahash_req = ahash_request_cast(async_req);
-
-	tasklet_schedule(&dev->queue_task);
-
-	/*
-	 * it will take some time to process date after last dma transmission.
-	 *
-	 * waiting time is relative with the last date len,
-	 * so cannot set a fixed time here.
-	 * 10-50 makes system not call here frequently wasting
-	 * efficiency, and make it response quickly when dma
-	 * complete.
-	 */
-	while (!CRYPTO_READ(dev, RK_CRYPTO_HASH_STS))
-		usleep_range(10, 50);
-
-	memcpy_fromio(req->result, dev->reg + RK_CRYPTO_HASH_DOUT_0,
-		      crypto_ahash_digestsize(tfm));
-
-	return 0;
+	else
+		return dev->enqueue(dev, &req->base);
 }
 
 static void crypto_ahash_dma_start(struct rk_crypto_info *dev)
@@ -258,12 +195,45 @@ static int rk_ahash_set_data_start(struct rk_crypto_info *dev)
 
 static int rk_ahash_start(struct rk_crypto_info *dev)
 {
+	struct ahash_request *req = ahash_request_cast(dev->async_req);
+	struct crypto_ahash *tfm;
+	struct rk_ahash_rctx *rctx;
+
+	dev->total = req->nbytes;
+	dev->left_bytes = req->nbytes;
+	dev->aligned = 0;
+	dev->align_size = 4;
+	dev->sg_dst = NULL;
+	dev->sg_src = req->src;
+	dev->first = req->src;
+	dev->nents = sg_nents(req->src);
+	rctx = ahash_request_ctx(req);
+	rctx->mode = 0;
+
+	tfm = crypto_ahash_reqtfm(req);
+	switch (crypto_ahash_digestsize(tfm)) {
+	case SHA1_DIGEST_SIZE:
+		rctx->mode = RK_CRYPTO_HASH_SHA1;
+		break;
+	case SHA256_DIGEST_SIZE:
+		rctx->mode = RK_CRYPTO_HASH_SHA256;
+		break;
+	case MD5_DIGEST_SIZE:
+		rctx->mode = RK_CRYPTO_HASH_MD5;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	rk_ahash_reg_init(dev);
 	return rk_ahash_set_data_start(dev);
 }
 
 static int rk_ahash_crypto_rx(struct rk_crypto_info *dev)
 {
 	int err = 0;
+	struct ahash_request *req = ahash_request_cast(dev->async_req);
+	struct crypto_ahash *tfm;
 
 	dev->unload_data(dev);
 	if (dev->left_bytes) {
@@ -278,7 +248,24 @@ static int rk_ahash_crypto_rx(struct rk_crypto_info *dev)
 		}
 		err = rk_ahash_set_data_start(dev);
 	} else {
-		dev->complete(dev, 0);
+		/*
+		 * it will take some time to process date after last dma
+		 * transmission.
+		 *
+		 * waiting time is relative with the last date len,
+		 * so cannot set a fixed time here.
+		 * 10us makes system not call here frequently wasting
+		 * efficiency, and make it response quickly when dma
+		 * complete.
+		 */
+		while (!CRYPTO_READ(dev, RK_CRYPTO_HASH_STS))
+			udelay(10);
+
+		tfm = crypto_ahash_reqtfm(req);
+		memcpy_fromio(req->result, dev->reg + RK_CRYPTO_HASH_DOUT_0,
+			      crypto_ahash_digestsize(tfm));
+		dev->complete(dev->async_req, 0);
+		tasklet_schedule(&dev->queue_task);
 	}
 
 out_rx:

From b7d65fe18129e2f557c31126fe6623e8c2731528 Mon Sep 17 00:00:00 2001
From: Christophe Jaillet <christophe.jaillet@wanadoo.fr>
Date: Tue, 15 Aug 2017 21:33:24 +0200
Subject: [PATCH 114/116] crypto: inside-secure - fix an error handling path in
 safexcel_probe()

'ret' is known to be 0 at this point.
If 'safexcel_request_ring_irq()' fails, it returns an error code.
Return this value instead of 0 which means success.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/inside-secure/safexcel.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c
index 1fabd4aee81b7..89ba9e85c0f37 100644
--- a/drivers/crypto/inside-secure/safexcel.c
+++ b/drivers/crypto/inside-secure/safexcel.c
@@ -839,9 +839,10 @@ static int safexcel_probe(struct platform_device *pdev)
 		snprintf(irq_name, 6, "ring%d", i);
 		irq = safexcel_request_ring_irq(pdev, irq_name, safexcel_irq_ring,
 						ring_irq);
-
-		if (irq < 0)
+		if (irq < 0) {
+			ret = irq;
 			goto err_clk;
+		}
 
 		priv->ring[i].work_data.priv = priv;
 		priv->ring[i].work_data.ring = i;

From 32e67b9af077e905388f1746bf429ea0c39b6917 Mon Sep 17 00:00:00 2001
From: Christophe Jaillet <christophe.jaillet@wanadoo.fr>
Date: Wed, 16 Aug 2017 07:16:06 +0200
Subject: [PATCH 115/116] crypto: cavium/nitrox - Fix an error handling path in
 'nitrox_probe()'

'err' is known to be 0 at this point.
If 'kzalloc()' fails, returns -ENOMEM instead of 0 which means success.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/cavium/nitrox/nitrox_main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c
index 9ccefb9b7232e..fee7cb2ce747c 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_main.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_main.c
@@ -513,8 +513,10 @@ static int nitrox_probe(struct pci_dev *pdev,
 	pci_set_master(pdev);
 
 	ndev = kzalloc(sizeof(*ndev), GFP_KERNEL);
-	if (!ndev)
+	if (!ndev) {
+		err = -ENOMEM;
 		goto ndev_fail;
+	}
 
 	pci_set_drvdata(pdev, ndev);
 	ndev->pdev = pdev;

From 2d45a7e89833f88b38112292ff227af437f81f2f Mon Sep 17 00:00:00 2001
From: Stephan Mueller <smueller@chronox.de>
Date: Thu, 10 Aug 2017 16:40:03 +0200
Subject: [PATCH 116/116] crypto: af_alg - get_page upon reassignment to TX SGL

When a page is assigned to a TX SGL, call get_page to increment the
reference counter. It is possible that one page is referenced in
multiple SGLs:

- in the global TX SGL in case a previous af_alg_pull_tsgl only
reassigned parts of a page to a per-request TX SGL

- in the per-request TX SGL as assigned by af_alg_pull_tsgl

Note, multiple requests can be active at the same time whose TX SGLs all
point to different parts of the same page.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/af_alg.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index d6936c0e08d97..ffa9f4ccd9b45 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -641,9 +641,9 @@ void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst,
 				if (dst_offset >= plen) {
 					/* discard page before offset */
 					dst_offset -= plen;
-					put_page(page);
 				} else {
 					/* reassign page to dst after offset */
+					get_page(page);
 					sg_set_page(dst + j, page,
 						    plen - dst_offset,
 						    sg[i].offset + dst_offset);
@@ -661,9 +661,7 @@ void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst,
 			if (sg[i].length)
 				return;
 
-			if (!dst)
-				put_page(page);
-
+			put_page(page);
 			sg_assign_page(sg + i, NULL);
 		}