[PATCH OLK-6.6 0/3] crypto: hisilicon - add backlog support
From: parm64 <parm64@huawei.com> Chenghai Huang (1): crypto: hisilicon/zip - add backlog support for zip Wenkai Lin (1): crypto: hisilicon/sec2 - fix UAF in sec_alg_send_backlog lizhi (1): crypto: hisilicon/hpre: implement full backlog support for hpre driver drivers/crypto/hisilicon/hpre/hpre_crypto.c | 287 ++++++++++++------ drivers/crypto/hisilicon/sec2/sec_crypto.c | 23 +- drivers/crypto/hisilicon/zip/zip_crypto.c | 308 +++++++++++++------- 3 files changed, 409 insertions(+), 209 deletions(-) -- 2.43.0
From: Chenghai Huang <huangchenghai2@huawei.com> driver inclusion category: feature bugzilla: https://atomgit.com/openeuler/kernel/issues/9217 CVE: NA ---------------------------------------------------------------------- Implement backlog mechanism to queue requests when hardware is busy, This prevents request failures during hardware congestion and provides a fallback function in the event of a hardware reset. Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com> Signed-off-by: JiangShui Yang <yangjiangshui@h-partners.com> --- drivers/crypto/hisilicon/zip/zip_crypto.c | 308 ++++++++++++++-------- 1 file changed, 197 insertions(+), 111 deletions(-) diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c index 108ad882393d..5491349518e5 100644 --- a/drivers/crypto/hisilicon/zip/zip_crypto.c +++ b/drivers/crypto/hisilicon/zip/zip_crypto.c @@ -29,6 +29,7 @@ #define HZIP_ALG_DEFLATE GENMASK(5, 4) #define HZIP_ALG_LZ4 BIT(8) +#define HZIP_INVAL_REQ_ID ((u16)0xFFFF) static DEFINE_MUTEX(zip_algs_lock); static unsigned int zip_available_devs; @@ -56,11 +57,11 @@ struct hisi_zip_req { dma_addr_t dma_src; dma_addr_t dma_dst; struct hisi_zip_qp_ctx *qp_ctx; + struct list_head list; u16 req_id; }; struct hisi_zip_req_q { - struct hisi_zip_req *q; unsigned long *req_bitmap; spinlock_t req_lock; u16 size; @@ -126,6 +127,9 @@ static int hisi_zip_fallback_do_work(struct crypto_comp *tfm, struct acomp_req * const char *algo; int ret; + if (!tfm) + return -EINVAL; + input = kmap_local_page(sg_page(acomp_req->src)) + acomp_req->src->offset; output = kmap_local_page(sg_page(acomp_req->dst)) + acomp_req->dst->offset; @@ -146,12 +150,10 @@ static int hisi_zip_fallback_do_work(struct crypto_comp *tfm, struct acomp_req * return ret; } -static struct hisi_zip_req *hisi_zip_create_req(struct hisi_zip_qp_ctx *qp_ctx, - struct acomp_req *req) +static int hisi_zip_create_req(struct hisi_zip_req *req) { + struct hisi_zip_qp_ctx *qp_ctx = req->qp_ctx; struct hisi_zip_req_q *req_q = &qp_ctx->req_q; - struct hisi_zip_req *q = req_q->q; - struct hisi_zip_req *req_cache; int req_id; spin_lock(&req_q->req_lock); @@ -160,28 +162,26 @@ static struct hisi_zip_req *hisi_zip_create_req(struct hisi_zip_qp_ctx *qp_ctx, if (req_id >= req_q->size) { spin_unlock(&req_q->req_lock); dev_dbg(&qp_ctx->qp->qm->pdev->dev, "req cache is full!\n"); - return ERR_PTR(-EAGAIN); + return -EBUSY; } set_bit(req_id, req_q->req_bitmap); spin_unlock(&req_q->req_lock); - req_cache = q + req_id; - req_cache->req_id = req_id; - req_cache->req = req; - req_cache->qp_ctx = qp_ctx; + req->req_id = req_id; - return req_cache; + return 0; } -static void hisi_zip_remove_req(struct hisi_zip_qp_ctx *qp_ctx, - struct hisi_zip_req *req) +static void hisi_zip_remove_req(struct hisi_zip_req *req) { - struct hisi_zip_req_q *req_q = &qp_ctx->req_q; + struct hisi_zip_req_q *req_q = &req->qp_ctx->req_q; spin_lock(&req_q->req_lock); clear_bit(req->req_id, req_q->req_bitmap); spin_unlock(&req_q->req_lock); + + req->req_id = HZIP_INVAL_REQ_ID; } static void hisi_zip_fill_addr(struct hisi_zip_sqe *sqe, struct hisi_zip_req *req) @@ -258,19 +258,21 @@ static void hisi_zip_fill_sqe(struct hisi_zip_ctx *ctx, struct hisi_zip_sqe *sqe ops->fill_sqe_type(sqe, ops->sqe_type); } -static int hisi_zip_do_work(struct hisi_zip_qp_ctx *qp_ctx, - struct hisi_zip_req *req) +static void hisi_zip_enqueue_backlog(struct hisi_zip_req *req) { + struct hisi_qp *qp = req->qp_ctx->qp; + + spin_lock_bh(&qp->backlog.lock); + list_add_tail(&req->list, &qp->backlog.list); + spin_unlock_bh(&qp->backlog.lock); +} + +static int hisi_zip_map_req_buffers(struct hisi_zip_req *req) +{ + struct hisi_zip_qp_ctx *qp_ctx = req->qp_ctx; struct hisi_acc_sgl_pool *pool = qp_ctx->sgl_pool; - struct hisi_zip_dfx *dfx = &qp_ctx->zip_dev->dfx; + struct device *dev = &qp_ctx->qp->qm->pdev->dev; struct acomp_req *a_req = req->req; - struct hisi_qp *qp = qp_ctx->qp; - struct device *dev = &qp->qm->pdev->dev; - struct hisi_zip_sqe zip_sqe; - int ret; - - if (unlikely(!a_req->src || !a_req->slen || !a_req->dst || !a_req->dlen)) - return -EINVAL; req->hw_src = hisi_acc_sg_buf_map_to_hw_sgl(dev, a_req->src, pool, req->req_id << 1, &req->dma_src, @@ -285,33 +287,114 @@ static int hisi_zip_do_work(struct hisi_zip_qp_ctx *qp_ctx, (req->req_id << 1) + 1, &req->dma_dst, DMA_FROM_DEVICE); if (IS_ERR(req->hw_dst)) { - ret = PTR_ERR(req->hw_dst); - dev_err(dev, "failed to map the dst buffer to hw sgl (%d)!\n", - ret); - goto err_unmap_input; + dev_err(dev, "failed to map the dst buffer to hw sgl (%ld)!\n", + PTR_ERR(req->hw_dst)); + hisi_acc_sg_buf_unmap(dev, a_req->src, req->hw_src, DMA_TO_DEVICE); + return PTR_ERR(req->hw_dst); } + return 0; +} + +static void hisi_zip_unmap_req_buffers(struct hisi_zip_req *req) +{ + struct device *dev = &req->qp_ctx->qp->qm->pdev->dev; + struct acomp_req *a_req = req->req; + + hisi_acc_sg_buf_unmap(dev, a_req->dst, req->hw_dst, DMA_FROM_DEVICE); + hisi_acc_sg_buf_unmap(dev, a_req->src, req->hw_src, DMA_TO_DEVICE); +} + +static int hisi_zip_do_work(struct hisi_zip_req *req) +{ + struct hisi_zip_qp_ctx *qp_ctx = req->qp_ctx; + struct hisi_zip_dfx *dfx = &qp_ctx->zip_dev->dfx; + struct hisi_qp *qp = qp_ctx->qp; + struct hisi_zip_sqe zip_sqe; + int ret; + hisi_zip_fill_sqe(qp_ctx->ctx, &zip_sqe, qp_ctx->req_type, req); /* send command to start a task */ - atomic64_inc(&dfx->send_cnt); ret = hisi_qp_send(qp, &zip_sqe); - if (unlikely(ret < 0)) { - atomic64_inc(&dfx->send_busy_cnt); - ret = -EAGAIN; - dev_dbg_ratelimited(dev, "failed to send request!\n"); - goto err_unmap_output; + if (likely(!ret)) { + atomic64_inc(&dfx->send_cnt); + return -EINPROGRESS; } - return -EINPROGRESS; + if (ret == -EBUSY) + atomic64_inc(&dfx->send_busy_cnt); -err_unmap_output: - hisi_acc_sg_buf_unmap(dev, a_req->dst, req->hw_dst, DMA_FROM_DEVICE); -err_unmap_input: - hisi_acc_sg_buf_unmap(dev, a_req->src, req->hw_src, DMA_TO_DEVICE); return ret; } +static void hisi_zip_send_backlog_soft(struct hisi_zip_qp_ctx *qp_ctx) +{ + struct hisi_zip_ctx *ctx = qp_ctx->ctx; + bool is_decomp = qp_ctx->qp->alg_type; + struct hisi_zip_req *req, *tmp; + int ret; + + list_for_each_entry_safe(req, tmp, &qp_ctx->qp->backlog.list, list) { + list_del(&req->list); + + if (req->req_id != HZIP_INVAL_REQ_ID) { + hisi_zip_unmap_req_buffers(req); + hisi_zip_remove_req(req); + } + + ret = hisi_zip_fallback_do_work(ctx->soft_tfm, req->req, + is_decomp); + + /* Wake up the busy thread first, then return the errno. */ + if (req->req->base.complete) { + acomp_request_complete(req->req, -EINPROGRESS); + acomp_request_complete(req->req, ret); + } + } +} + +static void hisi_zip_send_backlog(struct hisi_qp *qp) +{ + struct hisi_zip_req *req, *tmp; + struct hisi_zip_req_q *req_q; + int ret; + + spin_lock_bh(&qp->backlog.lock); + list_for_each_entry_safe(req, tmp, &qp->backlog.list, list) { + req_q = &req->qp_ctx->req_q; + if (req->req_id == HZIP_INVAL_REQ_ID) { + ret = hisi_zip_create_req(req); + if (ret) + continue; + + ret = hisi_zip_map_req_buffers(req); + if (unlikely(ret)) { + hisi_zip_remove_req(req); + hisi_zip_send_backlog_soft(req->qp_ctx); + goto unlock; + } + } + + ret = hisi_zip_do_work(req); + switch (ret) { + case -EINPROGRESS: + list_del(&req->list); + if (req->req->base.complete) + acomp_request_complete(req->req, -EINPROGRESS); + break; + case -EBUSY: + goto unlock; + default: + hisi_zip_send_backlog_soft(req->qp_ctx); + goto unlock; + } + } + +unlock: + spin_unlock_bh(&qp->backlog.lock); +} + static u32 hisi_zip_get_status(struct hisi_zip_sqe *sqe) { return sqe->dw3 & HZIP_BD_STATUS_M; @@ -344,75 +427,94 @@ static void hisi_zip_acomp_cb(struct hisi_qp *qp, void *data) err = -EIO; } - hisi_acc_sg_buf_unmap(dev, acomp_req->dst, req->hw_dst, DMA_FROM_DEVICE); - hisi_acc_sg_buf_unmap(dev, acomp_req->src, req->hw_src, DMA_TO_DEVICE); + hisi_zip_unmap_req_buffers(req); acomp_req->dlen = ops->get_dstlen(sqe); + hisi_zip_remove_req(req); if (acomp_req->base.complete) acomp_request_complete(acomp_req, err); - hisi_zip_remove_req(qp_ctx, req); + hisi_zip_send_backlog(qp); } -static int hisi_zip_acompress(struct acomp_req *acomp_req) +static int hisi_zip_do_comp(struct hisi_zip_req *req) { + struct acomp_req *acomp_req = req->req; struct hisi_zip_ctx *ctx = crypto_tfm_ctx(acomp_req->base.tfm); - struct hisi_zip_qp_ctx *qp_ctx = &ctx->qp_ctx[HZIP_QPC_COMP]; - struct hisi_zip_req *req; - struct device *dev; + struct hisi_zip_qp_ctx *qp_ctx = req->qp_ctx; int ret; - if (ctx->fallback) - return hisi_zip_fallback_do_work(ctx->soft_tfm, acomp_req, 0); - - dev = &qp_ctx->qp->qm->pdev->dev; + if (unlikely(!acomp_req->src || !acomp_req->slen || + !acomp_req->dst || !acomp_req->dlen)) + return -EINVAL; - req = hisi_zip_create_req(qp_ctx, acomp_req); - if (IS_ERR(req)) - return PTR_ERR(req); + if (ctx->fallback) + return hisi_zip_fallback_do_work(ctx->soft_tfm, acomp_req, + qp_ctx->req_type); + + ret = hisi_zip_create_req(req); + if (ret && (acomp_req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { + /* all req bitmaps are used add to backlog list */ + req->req_id = HZIP_INVAL_REQ_ID; + hisi_zip_enqueue_backlog(req); + return -EBUSY; + } else if (unlikely(ret)) { + goto fallback_req; + } - ret = hisi_zip_do_work(qp_ctx, req); - if (unlikely(ret != -EINPROGRESS)) { - dev_info_ratelimited(dev, "failed to do compress (%d)!\n", ret); - hisi_zip_remove_req(qp_ctx, req); + ret = hisi_zip_map_req_buffers(req); + if (unlikely(ret)) + goto remove_req; + + ret = hisi_zip_do_work(req); + if (ret == -EBUSY && (acomp_req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { + /* hardwre busy add to backlog list */ + hisi_zip_enqueue_backlog(req); + } else if (unlikely(ret != -EINPROGRESS)) { + dev_info_ratelimited(&qp_ctx->qp->qm->pdev->dev, + "failed to do %scompress (%d)!\n", + qp_ctx->req_type ? "de" : "", ret); + goto unmap_req; } return ret; + +unmap_req: + hisi_zip_unmap_req_buffers(req); +remove_req: + hisi_zip_remove_req(req); +fallback_req: + return hisi_zip_fallback_do_work(ctx->soft_tfm, acomp_req, + req->qp_ctx->req_type); } -static int hisi_zip_adecompress(struct acomp_req *acomp_req) +static int hisi_zip_acompress(struct acomp_req *acomp_req) { struct hisi_zip_ctx *ctx = crypto_tfm_ctx(acomp_req->base.tfm); - struct hisi_zip_qp_ctx *qp_ctx = &ctx->qp_ctx[HZIP_QPC_DECOMP]; - struct hisi_zip_req *req; - struct device *dev; - int ret; - - if (ctx->fallback) - return hisi_zip_fallback_do_work(ctx->soft_tfm, acomp_req, 1); - - dev = &qp_ctx->qp->qm->pdev->dev; + struct hisi_zip_req *req = acomp_request_ctx(acomp_req); - req = hisi_zip_create_req(qp_ctx, acomp_req); - if (IS_ERR(req)) - return PTR_ERR(req); + req->req = acomp_req; + req->qp_ctx = &ctx->qp_ctx[HZIP_QPC_COMP]; + return hisi_zip_do_comp(req); +} - ret = hisi_zip_do_work(qp_ctx, req); - if (unlikely(ret != -EINPROGRESS)) { - dev_info_ratelimited(dev, "failed to do decompress (%d)!\n", - ret); - hisi_zip_remove_req(qp_ctx, req); - } +static int hisi_zip_adecompress(struct acomp_req *acomp_req) +{ + struct hisi_zip_ctx *ctx = crypto_tfm_ctx(acomp_req->base.tfm); + struct hisi_zip_req *req = acomp_request_ctx(acomp_req); - return ret; + req->req = acomp_req; + req->qp_ctx = &ctx->qp_ctx[HZIP_QPC_DECOMP]; + return hisi_zip_do_comp(req); } static int hisi_zip_decompress(struct acomp_req *acomp_req) { struct hisi_zip_ctx *ctx = crypto_tfm_ctx(acomp_req->base.tfm); - return hisi_zip_fallback_do_work(ctx->soft_tfm, acomp_req, 1); + return hisi_zip_fallback_do_work(ctx->soft_tfm, acomp_req, + HZIP_ALG_TYPE_DECOMP); } static const struct hisi_zip_sqe_ops hisi_zip_ops = { @@ -476,7 +578,7 @@ static int hisi_zip_create_req_q(struct hisi_zip_ctx *ctx) { u16 q_depth = ctx->qp_ctx[0].qp->sq_depth; struct hisi_zip_req_q *req_q; - int i, ret; + int i; for (i = 0; i < HZIP_CTX_Q_NUM; i++) { req_q = &ctx->qp_ctx[i].req_q; @@ -484,44 +586,21 @@ static int hisi_zip_create_req_q(struct hisi_zip_ctx *ctx) req_q->req_bitmap = bitmap_zalloc(req_q->size, GFP_KERNEL); if (!req_q->req_bitmap) { - ret = -ENOMEM; - if (i == 0) - return ret; - - goto err_free_comp_q; + bitmap_free(ctx->qp_ctx[HZIP_QPC_COMP].req_q.req_bitmap); + return -ENOMEM; } spin_lock_init(&req_q->req_lock); - - req_q->q = kcalloc(req_q->size, sizeof(struct hisi_zip_req), - GFP_KERNEL); - if (!req_q->q) { - ret = -ENOMEM; - if (i == 0) - goto err_free_comp_bitmap; - else - goto err_free_decomp_bitmap; - } } return 0; - -err_free_decomp_bitmap: - bitmap_free(ctx->qp_ctx[HZIP_QPC_DECOMP].req_q.req_bitmap); -err_free_comp_q: - kfree(ctx->qp_ctx[HZIP_QPC_COMP].req_q.q); -err_free_comp_bitmap: - bitmap_free(ctx->qp_ctx[HZIP_QPC_COMP].req_q.req_bitmap); - return ret; } static void hisi_zip_release_req_q(struct hisi_zip_ctx *ctx) { int i; - for (i = 0; i < HZIP_CTX_Q_NUM; i++) { - kfree(ctx->qp_ctx[i].req_q.q); + for (i = 0; i < HZIP_CTX_Q_NUM; i++) bitmap_free(ctx->qp_ctx[i].req_q.req_bitmap); - } } static int hisi_zip_create_sgl_pool(struct hisi_zip_ctx *ctx) @@ -571,7 +650,7 @@ static void hisi_zip_set_acomp_cb(struct hisi_zip_ctx *ctx, static int hisi_zip_fallback_init(struct hisi_zip_ctx *ctx, const char *alg_name) { - if (!IS_ERR_OR_NULL(ctx->soft_tfm)) + if (ctx->soft_tfm) return 0; if (!crypto_has_comp(alg_name, 0, 0)) @@ -580,7 +659,8 @@ static int hisi_zip_fallback_init(struct hisi_zip_ctx *ctx, const char *alg_name ctx->soft_tfm = crypto_alloc_comp(alg_name, 0, 0); if (IS_ERR_OR_NULL(ctx->soft_tfm)) { pr_err("could not alloc soft tfm %s\n", alg_name); - return PTR_ERR(ctx->soft_tfm); + ctx->soft_tfm = NULL; + return -ENOMEM; } return 0; @@ -588,7 +668,7 @@ static int hisi_zip_fallback_init(struct hisi_zip_ctx *ctx, const char *alg_name static void hisi_zip_fallback_uninit(struct hisi_zip_ctx *ctx) { - if (IS_ERR_OR_NULL(ctx->soft_tfm)) + if (!ctx->soft_tfm) return; crypto_free_comp(ctx->soft_tfm); @@ -599,8 +679,10 @@ static int hisi_zip_acomp_init(struct crypto_acomp *tfm) { const char *alg_name = crypto_tfm_alg_name(&tfm->base); struct hisi_zip_ctx *ctx = crypto_tfm_ctx(&tfm->base); + int ret, fallback_err; struct device *dev; - int ret; + + fallback_err = hisi_zip_fallback_init(ctx, alg_name); ret = hisi_zip_ctx_init(ctx, COMP_NAME_TO_TYPE(alg_name), tfm->base.node); if (ret) { @@ -631,8 +713,10 @@ static int hisi_zip_acomp_init(struct crypto_acomp *tfm) err_ctx_exit: hisi_zip_ctx_exit(ctx); switch_to_soft: - ctx->fallback = true; - return hisi_zip_fallback_init(ctx, alg_name); + if (!fallback_err) + ctx->fallback = true; + + return fallback_err; } static int hisi_zip_acomp_init_fb(struct crypto_acomp *tfm) @@ -672,6 +756,7 @@ static struct acomp_alg hisi_zip_acomp_deflate = { .exit = hisi_zip_acomp_exit, .compress = hisi_zip_acompress, .decompress = hisi_zip_adecompress, + .reqsize = sizeof(struct hisi_zip_req), .base = { .cra_name = "deflate", .cra_driver_name = "hisi-deflate-acomp", @@ -710,6 +795,7 @@ static struct acomp_alg hisi_zip_acomp_lz4 = { .exit = hisi_zip_acomp_exit, .compress = hisi_zip_acompress, .decompress = hisi_zip_decompress, + .reqsize = sizeof(struct hisi_zip_req), .base = { .cra_name = "lz4", .cra_driver_name = "hisi-lz4-acomp", -- 2.43.0
From: Wenkai Lin <linwenkai6@hisilicon.com> driver inclusion category: bugfix bugzilla: https://atomgit.com/openeuler/kernel/issues/9217 CVE: NA ---------------------------------------------------------------------- After crypto_request_complete() is invoked, the crypto core may immediately free the request structure and its associated tfm context. Consequently, the sec_ctx and qp_ctx are also released. However, sec_alg_send_backlog() can still attempt to access these structures when processing queued requests, resulting in a use-after-free (UAF) bug. Fix this by accessing the backlog list through the long-term qp memory and using the ctx memory only when the backlog list is not empty. Fixes: f0ae287c5045 ("crypto: hisilicon/sec2 - implement full backlog mode for sec") Signed-off-by: Wenkai Lin <linwenkai6@hisilicon.com> Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com> Signed-off-by: JiangShui Yang <yangjiangshui@h-partners.com> --- drivers/crypto/hisilicon/sec2/sec_crypto.c | 23 +++++++++++----------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index a44055d33720..6750738e8536 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -234,13 +234,15 @@ static int qp_send_message(struct sec_req *req) return -EINPROGRESS; } -static void sec_alg_send_backlog_soft(struct sec_ctx *ctx, struct sec_qp_ctx *qp_ctx) +static void sec_alg_send_backlog_soft(struct hisi_qp *qp) { struct sec_req *req, *tmp; + struct sec_ctx *ctx; int ret; - list_for_each_entry_safe(req, tmp, &qp_ctx->qp->backlog.list, list) { + list_for_each_entry_safe(req, tmp, &qp->backlog.list, list) { list_del(&req->list); + ctx = req->qp_ctx->ctx; ctx->req_op->buf_unmap(ctx, req); if (req->req_id >= 0) sec_free_req_id(req); @@ -258,9 +260,8 @@ static void sec_alg_send_backlog_soft(struct sec_ctx *ctx, struct sec_qp_ctx *qp } } -static void sec_alg_send_backlog(struct sec_ctx *ctx, struct sec_qp_ctx *qp_ctx) +static void sec_alg_send_backlog(struct hisi_qp *qp) { - struct hisi_qp *qp = qp_ctx->qp; struct sec_req *req, *tmp; int ret; @@ -277,7 +278,7 @@ static void sec_alg_send_backlog(struct sec_ctx *ctx, struct sec_qp_ctx *qp_ctx) goto unlock; default: /* Release memory resources and send all requests through software. */ - sec_alg_send_backlog_soft(ctx, qp_ctx); + sec_alg_send_backlog_soft(qp); goto unlock; } } @@ -306,6 +307,7 @@ static void sec_req_cb(struct hisi_qp *qp, void *resp) ctx->req_op->buf_unmap(ctx, req); ctx->req_op->callback(ctx, req, err); + sec_alg_send_backlog(qp); } static void sec_req_cb3(struct hisi_qp *qp, void *resp) @@ -331,6 +333,7 @@ static void sec_req_cb3(struct hisi_qp *qp, void *resp) ctx->req_op->buf_unmap(ctx, req); ctx->req_op->callback(ctx, req, err); + sec_alg_send_backlog(qp); } static int sec_alg_send_message_retry(struct sec_req *req) @@ -1674,8 +1677,6 @@ static void sec_update_iv(struct sec_req *req, enum sec_alg_type alg_type) static void sec_skcipher_callback(struct sec_ctx *ctx, struct sec_req *req, int err) { - struct sec_qp_ctx *qp_ctx = req->qp_ctx; - if (req->req_id >= 0) sec_free_req_id(req); @@ -1685,7 +1686,6 @@ static void sec_skcipher_callback(struct sec_ctx *ctx, struct sec_req *req, sec_update_iv(req, SEC_SKCIPHER); crypto_request_complete(req->base, err); - sec_alg_send_backlog(ctx, qp_ctx); } static void set_aead_auth_iv(struct sec_ctx *ctx, struct sec_req *req) @@ -1924,7 +1924,7 @@ static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err) struct aead_request *a_req = req->aead_req.aead_req; struct crypto_aead *tfm = crypto_aead_reqtfm(a_req); size_t authsize = crypto_aead_authsize(tfm); - struct sec_qp_ctx *qp_ctx = req->qp_ctx; + int error = err; size_t sz; if (!err && req->c_req.encrypt) { @@ -1935,15 +1935,14 @@ static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err) authsize, a_req->cryptlen + a_req->assoclen); if (unlikely(sz != authsize)) { dev_err(c->dev, "copy out mac err!\n"); - err = -EINVAL; + error = -EINVAL; } } if (req->req_id >= 0) sec_free_req_id(req); - crypto_request_complete(req->base, err); - sec_alg_send_backlog(c, qp_ctx); + crypto_request_complete(req->base, error); } static void sec_request_uninit(struct sec_req *req) -- 2.43.0
From: lizhi <lizhi206@huawei.com> driver inclusion category: feature bugzilla: https://atomgit.com/openeuler/kernel/issues/9217 CVE: NA ---------------------------------------------------------------------- When the hardware queue returns -EBUSY, requests are queued instead of being failed immediately. The driver retries queued requests from the completion path after earlier requests have finished. This reduces request failures caused by temporary hardware congestion and improves throughput and stability under high load. Signed-off-by: lizhi <lizhi206@huawei.com> Signed-off-by: JiangShui Yang <yangjiangshui@h-partners.com> --- drivers/crypto/hisilicon/hpre/hpre_crypto.c | 287 ++++++++++++++------ 1 file changed, 201 insertions(+), 86 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c index c83a6868ffc8..379544cc087b 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c +++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c @@ -31,6 +31,7 @@ struct hpre_ctx; #define HPRE_DH_G_FLAG 0x02 #define HPRE_TRY_SEND_TIMES 100 #define HPRE_INVLD_REQ_ID (-1) +#define HPRE_ALG_TYPE_MASK 0x1F #define HPRE_SQE_ALG_BITS 5 #define HPRE_SQE_DONE_SHIFT 30 @@ -40,6 +41,7 @@ struct hpre_ctx; #define HPRE_DFX_US_TO_NS 1000 #define HPRE_ENABLE_HPCORE_SHIFT 7 +#define HPRE_ECDH_CLR_DATA_SHIFT 2 /* due to nist p521 */ #define HPRE_ECC_MAX_KSZ 66 @@ -152,6 +154,8 @@ struct hpre_asym_request { int err; hpre_cb cb; struct timespec64 req_time; + struct crypto_async_request *base; + struct list_head list; }; static inline unsigned int hpre_align_sz(void) @@ -255,8 +259,8 @@ static void hpre_hw_data_clr_all(struct hpre_ctx *ctx, struct scatterlist *dst, struct scatterlist *src) { - struct device *dev = ctx->dev; struct hpre_sqe *sqe = &req->req; + struct device *dev = ctx->dev; dma_addr_t tmp; tmp = le64_to_cpu(sqe->in); @@ -284,6 +288,61 @@ static void hpre_hw_data_clr_all(struct hpre_ctx *ctx, } } +static void hpre_ecdh_hw_data_clr_all(struct hpre_ctx *ctx, + struct hpre_asym_request *req, + struct scatterlist *dst, + struct scatterlist *src) +{ + struct hpre_sqe *sqe = &req->req; + struct device *dev = ctx->dev; + dma_addr_t dma; + + dma = le64_to_cpu(sqe->in); + if (unlikely(dma_mapping_error(dev, dma))) + return; + + /* req->src may contain garbage value, check both src and req->src before freeing */ + if (src && req->src) + dma_free_coherent(dev, ctx->key_sz << HPRE_ECDH_CLR_DATA_SHIFT, + req->src, dma); + + dma = le64_to_cpu(sqe->out); + if (unlikely(dma_mapping_error(dev, dma))) + return; + + if (req->dst) + dma_free_coherent(dev, ctx->key_sz << 1, req->dst, dma); + if (dst) + dma_unmap_single(dev, dma, ctx->key_sz << 1, DMA_FROM_DEVICE); +} + +static void hpre_curve25519_hw_data_clr_all(struct hpre_ctx *ctx, + struct hpre_asym_request *req, + struct scatterlist *dst, + struct scatterlist *src) +{ + struct hpre_sqe *sqe = &req->req; + struct device *dev = ctx->dev; + dma_addr_t dma; + + dma = le64_to_cpu(sqe->in); + if (unlikely(dma_mapping_error(dev, dma))) + return; + + /* req->src may contain garbage value, check both src and req->src before freeing */ + if (src && req->src) + dma_free_coherent(dev, ctx->key_sz, req->src, dma); + + dma = le64_to_cpu(sqe->out); + if (unlikely(dma_mapping_error(dev, dma))) + return; + + if (req->dst) + dma_free_coherent(dev, ctx->key_sz, req->dst, dma); + if (dst) + dma_unmap_single(dev, dma, ctx->key_sz, DMA_FROM_DEVICE); +} + static int hpre_alg_res_post_hf(struct hpre_ctx *ctx, struct hpre_sqe *sqe, void **kreq) { @@ -337,6 +396,98 @@ static bool hpre_is_bd_timeout(struct hpre_asym_request *req, return true; } +static int hpre_send(struct hpre_ctx *ctx, struct hpre_sqe *msg) +{ + struct hpre_dfx *dfx = ctx->hpre->debug.dfx; + int cnt = 0; + int ret; + + do { + ret = hisi_qp_send(ctx->qp, msg); + if (ret != -EBUSY) + break; + atomic64_inc(&dfx[HPRE_SEND_BUSY_CNT].value); + } while (cnt++ < HPRE_TRY_SEND_TIMES); + + if (likely(!ret)) { + atomic64_inc(&dfx[HPRE_SEND_CNT].value); + return ret; + } + + if (ret != -EBUSY) + atomic64_inc(&dfx[HPRE_SEND_FAIL_CNT].value); + + return ret; +} + +static int hpre_send_backlog(struct hpre_ctx *ctx, struct hpre_sqe *msg) +{ + struct hpre_dfx *dfx = ctx->hpre->debug.dfx; + int ret; + + ret = hisi_qp_send(ctx->qp, msg); + if (likely(!ret)) + atomic64_inc(&dfx[HPRE_SEND_CNT].value); + else if (unlikely(ret != -EBUSY)) + atomic64_inc(&dfx[HPRE_SEND_FAIL_CNT].value); + else + atomic64_inc(&dfx[HPRE_SEND_BUSY_CNT].value); + + return ret; +} + +static void hpre_alg_hw_data_clr_all(struct hpre_ctx *ctx, struct hpre_asym_request *h_req) +{ + switch (le32_to_cpu(h_req->req.dw0) & HPRE_ALG_TYPE_MASK) { + case HPRE_ALG_DH_G2: + case HPRE_ALG_DH: + hpre_hw_data_clr_all(ctx, h_req, h_req->areq.dh->dst, h_req->areq.dh->src); + break; + case HPRE_ALG_NC_NCRT: + case HPRE_ALG_NC_CRT: + hpre_hw_data_clr_all(ctx, h_req, h_req->areq.rsa->dst, h_req->areq.rsa->src); + break; + case HPRE_ALG_ECC_MUL: + hpre_ecdh_hw_data_clr_all(ctx, h_req, h_req->areq.ecdh->dst, h_req->areq.ecdh->src); + break; + case HPRE_ALG_CURVE25519_MUL: + hpre_curve25519_hw_data_clr_all(ctx, h_req, h_req->areq.curve25519->dst, + h_req->areq.curve25519->src); + break; + default: + break; + } +} + +static void hpre_alg_send_backlog(struct hisi_qp *qp) +{ + struct hpre_asym_request *req, *tmp; + int ret; + + spin_lock_bh(&qp->backlog.lock); + list_for_each_entry_safe(req, tmp, &qp->backlog.list, list) { + ret = hpre_send_backlog(req->ctx, &req->req); + switch (ret) { + case 0: + list_del(&req->list); + crypto_request_complete(req->base, -EINPROGRESS); + break; + case -EBUSY: + /* Device is busy and stop send any request. */ + goto unlock; + default: + /* Current no fallback for any send error. */ + list_del(&req->list); + hpre_alg_hw_data_clr_all(req->ctx, req); + crypto_request_complete(req->base, -EIO); + break; + } + } + +unlock: + spin_unlock_bh(&qp->backlog.lock); +} + static void hpre_dh_cb(struct hpre_ctx *ctx, void *resp) { struct hpre_dfx *dfx = ctx->hpre->debug.dfx; @@ -391,6 +542,7 @@ static void hpre_alg_cb(struct hisi_qp *qp, void *resp) } h_req->cb(h_req->ctx, resp); + hpre_alg_send_backlog(qp); } static int hpre_ctx_init(struct hpre_ctx *ctx, u8 type) @@ -464,25 +616,39 @@ static int hpre_msg_request_set(struct hpre_ctx *ctx, void *req, bool is_rsa) return 0; } -static int hpre_send(struct hpre_ctx *ctx, struct hpre_sqe *msg) +static int hpre_alg_try_enqueue(struct hpre_asym_request *hpre_req) { - struct hpre_dfx *dfx = ctx->hpre->debug.dfx; - int ctr = 0; + struct hisi_qp *qp = hpre_req->ctx->qp; + + /* Check if any request is already backlogged */ + if (!list_empty(&qp->backlog.list)) + return -EBUSY; + + /* Try to enqueue to HW ring */ + return hpre_send_backlog(hpre_req->ctx, &hpre_req->req); +} + +static int hpre_alg_send_message(struct hpre_asym_request *hpre_req) +{ + struct hisi_qp *qp = hpre_req->ctx->qp; int ret; - do { - atomic64_inc(&dfx[HPRE_SEND_CNT].value); - ret = hisi_qp_send(ctx->qp, msg); - if (ret != -EBUSY) - break; - atomic64_inc(&dfx[HPRE_SEND_BUSY_CNT].value); - } while (ctr++ < HPRE_TRY_SEND_TIMES); + if (!(hpre_req->base->flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { + ret = hpre_send(hpre_req->ctx, &hpre_req->req); + if (ret == -EBUSY) + return -ENOSPC; + } else { + ret = hpre_alg_try_enqueue(hpre_req); + if (ret == -EBUSY) { + spin_lock_bh(&qp->backlog.lock); + list_add_tail(&hpre_req->list, &qp->backlog.list); + spin_unlock_bh(&qp->backlog.lock); + return -EBUSY; + } + } if (likely(!ret)) - return ret; - - if (ret != -EBUSY) - atomic64_inc(&dfx[HPRE_SEND_FAIL_CNT].value); + return -EINPROGRESS; return ret; } @@ -496,6 +662,7 @@ static int hpre_dh_compute_value(struct kpp_request *req) struct hpre_sqe *msg = &hpre_req->req; int ret; + hpre_req->base = &req->base; ret = hpre_msg_request_set(ctx, req, false); if (unlikely(ret)) return ret; @@ -517,14 +684,12 @@ static int hpre_dh_compute_value(struct kpp_request *req) else msg->dw0 = cpu_to_le32(le32_to_cpu(msg->dw0) | HPRE_ALG_DH); - /* success */ - ret = hpre_send(ctx, msg); - if (likely(!ret)) - return -EINPROGRESS; + ret = hpre_alg_send_message(hpre_req); + if (likely(ret == -EINPROGRESS || ret == -EBUSY)) + return ret; clear_all: hpre_hw_data_clr_all(ctx, hpre_req, req->dst, req->src); - return ret; } @@ -769,6 +934,7 @@ static int hpre_rsa_enc(struct akcipher_request *req) struct hpre_sqe *msg = &hpre_req->req; int ret; + hpre_req->base = &req->base; /* For unsupported key size and unavailable devices, use soft tfm instead */ if (ctx->fallback) { akcipher_request_set_tfm(req, ctx->rsa.soft_tfm); @@ -795,10 +961,9 @@ static int hpre_rsa_enc(struct akcipher_request *req) if (unlikely(ret)) goto clear_all; - /* success */ - ret = hpre_send(ctx, msg); - if (likely(!ret)) - return -EINPROGRESS; + ret = hpre_alg_send_message(hpre_req); + if (likely(ret == -EINPROGRESS || ret == -EBUSY)) + return ret; clear_all: hpre_hw_data_clr_all(ctx, hpre_req, req->dst, req->src); @@ -815,6 +980,7 @@ static int hpre_rsa_dec(struct akcipher_request *req) struct hpre_sqe *msg = &hpre_req->req; int ret; + hpre_req->base = &req->base; /* For unsupported key size and unavailable devices, use soft tfm instead */ if (ctx->fallback) { akcipher_request_set_tfm(req, ctx->rsa.soft_tfm); @@ -848,10 +1014,9 @@ static int hpre_rsa_dec(struct akcipher_request *req) if (unlikely(ret)) goto clear_all; - /* success */ - ret = hpre_send(ctx, msg); - if (likely(!ret)) - return -EINPROGRESS; + ret = hpre_alg_send_message(hpre_req); + if (likely(ret == -EINPROGRESS || ret == -EBUSY)) + return ret; clear_all: hpre_hw_data_clr_all(ctx, hpre_req, req->dst, req->src); @@ -1416,32 +1581,6 @@ static int hpre_ecdh_set_secret(struct crypto_kpp *tfm, const void *buf, return 0; } -static void hpre_ecdh_hw_data_clr_all(struct hpre_ctx *ctx, - struct hpre_asym_request *req, - struct scatterlist *dst, - struct scatterlist *src) -{ - struct device *dev = ctx->dev; - struct hpre_sqe *sqe = &req->req; - dma_addr_t dma; - - dma = le64_to_cpu(sqe->in); - if (unlikely(dma_mapping_error(dev, dma))) - return; - - if (src && req->src) - dma_free_coherent(dev, ctx->key_sz << 2, req->src, dma); - - dma = le64_to_cpu(sqe->out); - if (unlikely(dma_mapping_error(dev, dma))) - return; - - if (req->dst) - dma_free_coherent(dev, ctx->key_sz << 1, req->dst, dma); - if (dst) - dma_unmap_single(dev, dma, ctx->key_sz << 1, DMA_FROM_DEVICE); -} - static void hpre_ecdh_cb(struct hpre_ctx *ctx, void *resp) { unsigned int curve_sz = hpre_ecdh_get_curvesz(ctx->curve_id); @@ -1567,6 +1706,7 @@ static int hpre_ecdh_compute_value(struct kpp_request *req) struct hpre_sqe *msg = &hpre_req->req; int ret; + hpre_req->base = &req->base; ret = hpre_ecdh_msg_request_set(ctx, req); if (unlikely(ret)) { dev_err(dev, "failed to set ecdh request, ret = %d!\n", ret); @@ -1592,9 +1732,9 @@ static int hpre_ecdh_compute_value(struct kpp_request *req) msg->dw0 = cpu_to_le32(le32_to_cpu(msg->dw0) | HPRE_ALG_ECC_MUL); msg->resv1 = ctx->enable_hpcore << HPRE_ENABLE_HPCORE_SHIFT; - ret = hpre_send(ctx, msg); - if (likely(!ret)) - return -EINPROGRESS; + ret = hpre_alg_send_message(hpre_req); + if (likely(ret == -EINPROGRESS || ret == -EBUSY)) + return ret; clear_all: hpre_ecdh_hw_data_clr_all(ctx, hpre_req, req->dst, req->src); @@ -1795,32 +1935,6 @@ static int hpre_curve25519_set_secret(struct crypto_kpp *tfm, const void *buf, return 0; } -static void hpre_curve25519_hw_data_clr_all(struct hpre_ctx *ctx, - struct hpre_asym_request *req, - struct scatterlist *dst, - struct scatterlist *src) -{ - struct device *dev = ctx->dev; - struct hpre_sqe *sqe = &req->req; - dma_addr_t dma; - - dma = le64_to_cpu(sqe->in); - if (unlikely(dma_mapping_error(dev, dma))) - return; - - if (src && req->src) - dma_free_coherent(dev, ctx->key_sz, req->src, dma); - - dma = le64_to_cpu(sqe->out); - if (unlikely(dma_mapping_error(dev, dma))) - return; - - if (req->dst) - dma_free_coherent(dev, ctx->key_sz, req->dst, dma); - if (dst) - dma_unmap_single(dev, dma, ctx->key_sz, DMA_FROM_DEVICE); -} - static void hpre_curve25519_cb(struct hpre_ctx *ctx, void *resp) { struct hpre_dfx *dfx = ctx->hpre->debug.dfx; @@ -1982,6 +2096,7 @@ static int hpre_curve25519_compute_value(struct kpp_request *req) struct hpre_sqe *msg = &hpre_req->req; int ret; + hpre_req->base = &req->base; ret = hpre_curve25519_msg_request_set(ctx, req); if (unlikely(ret)) { dev_err(dev, "failed to set curve25519 request, ret = %d!\n", ret); @@ -2006,9 +2121,9 @@ static int hpre_curve25519_compute_value(struct kpp_request *req) } msg->dw0 = cpu_to_le32(le32_to_cpu(msg->dw0) | HPRE_ALG_CURVE25519_MUL); - ret = hpre_send(ctx, msg); - if (likely(!ret)) - return -EINPROGRESS; + ret = hpre_alg_send_message(hpre_req); + if (likely(ret == -EINPROGRESS || ret == -EBUSY)) + return ret; clear_all: hpre_curve25519_hw_data_clr_all(ctx, hpre_req, req->dst, req->src); -- 2.43.0
participants (1)
-
ZongYu Wu