
From: Chenghai Huang <huangchenghai2@huawei.com> Replace the user output buffer with the internal buffer when the size of the user-provided buffer is less than 200 bytes. It need to copy data in either of the following scenarios: 1.There is data in the internal buffer before hardware processing. 2.After the hardware uses the internal buffer to process. The following two situations may occur during each copy process: a.Data is completely copied and output. b.Part of the data is copied and output. 1 and 2 are combined with a and b to implement the code logic of check_store_buf and copy_from_hw. Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com> Signed-off-by: Qi Tao <taoqi10@huawei.com> --- drv/hisi_comp.c | 223 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 217 insertions(+), 6 deletions(-) diff --git a/drv/hisi_comp.c b/drv/hisi_comp.c index 1e55094b..bfeb8fad 100644 --- a/drv/hisi_comp.c +++ b/drv/hisi_comp.c @@ -81,6 +81,13 @@ #define BUF_TYPE 2 +/* 200 * 1.125 + GZIP_HEADER_SZ, align with 4 byte */ +#define STORE_BUF_SIZE 236 +/* The hardware requires at least 200byte output buffers */ +#define SW_STOREBUF_TH 200 +/* The 38KB offset in ctx_buf is used as the internal buffer */ +#define CTX_STOREBUF_OFFSET 0x9800 + enum alg_type { HW_DEFLATE = 0x1, HW_ZLIB, @@ -110,6 +117,23 @@ enum lz77_compress_status { COMP_BLK, }; +struct hisi_comp_buf { + /* Denoted whether the output is copied from the storage buffer */ + bool skip_hw; + /* Denoted internal store buf */ + __u8 dst[STORE_BUF_SIZE]; + /* Denoted data size left in uadk */ + __u32 pending_out; + /* Size that have been copied */ + __u32 output_offset; + /* Input data consumption when all data copies are complete */ + __u32 fallback_size; + /* Store end flag return by HW */ + __u32 status; + /* Denoted internal store sgl */ + struct wd_datalist list_dst; +}; + struct hisi_zip_sqe { __u32 consumed; __u32 produced; @@ -214,6 +238,127 @@ static int buf_size_check_deflate(__u32 *in_size, __u32 *out_size) return 0; } +static __u32 copy_to_out(struct wd_comp_msg *msg, struct hisi_comp_buf *buf, __u32 total_len) +{ + struct wd_comp_req *req = &msg->req; + struct wd_datalist *node = req->list_dst; + __u32 sgl_restlen, copy_len; + __u32 len = 0, sgl_cplen = 0; + + copy_len = total_len > req->dst_len ? + req->dst_len : total_len; + sgl_restlen = copy_len; + + if (req->data_fmt == WD_FLAT_BUF) { + memcpy(req->dst, buf->dst + buf->output_offset, copy_len); + return copy_len; + } + + while (node != NULL && sgl_restlen > 0) { + len = node->len > sgl_restlen ? sgl_restlen : node->len; + memcpy(node->data, buf->list_dst.data + buf->output_offset + sgl_cplen, + len); + sgl_restlen -= len; + sgl_cplen += len; + node = node->next; + } + + return sgl_cplen; +} + +static int check_store_buf(struct wd_comp_msg *msg) +{ + struct wd_comp_req *req = &msg->req; + struct hisi_comp_buf *buf; + __u32 copy_len; + + if (!msg->ctx_buf) + return 0; + + buf = (struct hisi_comp_buf *)(msg->ctx_buf + CTX_STOREBUF_OFFSET); + if (!buf->pending_out) + return 0; + + if (!req->src_len && buf->fallback_size) { + WD_ERR("Couldn't handle, input size is 0!\n"); + return -WD_EINVAL; + } + + copy_len = copy_to_out(msg, buf, buf->pending_out); + buf->pending_out -= copy_len; + msg->produced = copy_len; + buf->skip_hw = true; + + if (!buf->pending_out) { + /* All data copied to output */ + msg->in_cons = buf->fallback_size; + buf->fallback_size = 0; + buf->output_offset = 0; + memset(buf->dst, 0, STORE_BUF_SIZE); + req->status = buf->status == WD_STREAM_END ? WD_STREAM_END : WD_SUCCESS; + } else { + /* Still data need to be copied */ + buf->output_offset += copy_len; + req->status = WD_SUCCESS; + } + + return 1; +} + +static void copy_from_hw(struct wd_comp_msg *msg, struct hisi_comp_buf *buf) +{ + struct wd_comp_req *req = &msg->req; + __u32 copy_len; + + copy_len = copy_to_out(msg, buf, msg->produced); + buf->pending_out = msg->produced - copy_len; + msg->produced = copy_len; + + if (!buf->pending_out) { + /* All data copied to output */ + buf->fallback_size = 0; + buf->output_offset = 0; + memset(buf->dst, 0, STORE_BUF_SIZE); + } else { + /* Still data need to be copied */ + buf->output_offset += copy_len; + + /* Feedback to users that a maximum of 1 byte of data is not consumed */ + if (msg->in_cons > 1) { + buf->fallback_size = 1; + msg->in_cons--; + } else { + buf->fallback_size = msg->in_cons; + msg->in_cons = 0; + } + + /* + * The end flag is cached. It can be output only + * after the data is completely copied to the output. + */ + if (req->status == WD_STREAM_END) { + buf->status = WD_STREAM_END; + req->status = WD_SUCCESS; + } + } +} + +static int check_enable_store_buf(struct wd_comp_msg *msg, __u32 out_size, int head_size) +{ + if (msg->stream_mode != WD_COMP_STATEFUL) + return 0; + + if (msg->stream_pos != WD_COMP_STREAM_NEW && out_size > SW_STOREBUF_TH) + return 0; + + if (msg->stream_pos == WD_COMP_STREAM_NEW && + out_size - head_size > SW_STOREBUF_TH) + return 0; + + /* 1 mean it need store buf */ + return 1; +} + static void fill_buf_size_deflate(struct hisi_zip_sqe *sqe, __u32 in_size, __u32 out_size) { @@ -238,11 +383,29 @@ static int fill_buf_deflate_generic(struct hisi_zip_sqe *sqe, { __u32 in_size = msg->req.src_len; __u32 out_size = msg->avail_out; + struct hisi_comp_buf *buf; void *src = msg->req.src; void *dst = msg->req.dst; void *ctx_buf = NULL; int ret; + /* + * When the output buffer is smaller than the SW_STOREBUF_TH in STATEFUL, + * the internal buffer is used. + */ + ret = check_enable_store_buf(msg, out_size, head_size); + if (ret) { + if (!msg->ctx_buf) { + WD_ERR("ctx_buf is NULL when out_size is less than 200!\n"); + return -WD_EINVAL; + } + + buf = (struct hisi_comp_buf *)(msg->ctx_buf + CTX_STOREBUF_OFFSET); + dst = buf->dst; + out_size = STORE_BUF_SIZE; + buf->pending_out = STORE_BUF_SIZE; + } + if (msg->stream_pos == WD_COMP_STREAM_NEW && head != NULL) { if (msg->req.op_type == WD_DIR_COMPRESS) { memcpy(dst, head, head_size); @@ -296,9 +459,9 @@ static void fill_buf_type_sgl(struct hisi_zip_sqe *sqe) } static int fill_buf_addr_deflate_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, - struct wd_comp_msg *msg) + struct wd_datalist *list_src, + struct wd_datalist *list_dst) { - struct wd_comp_req *req = &msg->req; void *hw_sgl_in, *hw_sgl_out; handle_t h_sgl_pool; @@ -308,13 +471,13 @@ static int fill_buf_addr_deflate_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, return -WD_EINVAL; } - hw_sgl_in = hisi_qm_get_hw_sgl(h_sgl_pool, req->list_src); + hw_sgl_in = hisi_qm_get_hw_sgl(h_sgl_pool, list_src); if (unlikely(!hw_sgl_in)) { WD_ERR("failed to get hw sgl in!\n"); return -WD_ENOMEM; } - hw_sgl_out = hisi_qm_get_hw_sgl(h_sgl_pool, req->list_dst); + hw_sgl_out = hisi_qm_get_hw_sgl(h_sgl_pool, list_dst); if (unlikely(!hw_sgl_out)) { WD_ERR("failed to get hw sgl out!\n"); hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_in); @@ -345,15 +508,37 @@ static int fill_buf_deflate_sgl_generic(handle_t h_qp, struct hisi_zip_sqe *sqe, int head_size) { struct wd_comp_req *req = &msg->req; + struct wd_datalist *list_src = req->list_src; + struct wd_datalist *list_dst = req->list_dst; __u32 out_size = msg->avail_out; __u32 in_size = req->src_len; + struct hisi_comp_buf *buf; __u32 src_skip = 0; __u32 dst_skip = 0; int ret; + /* + * When the output buffer is smaller than the SW_STOREBUF_TH in STATEFUL, + * the internal buffer is used. + */ + ret = check_enable_store_buf(msg, out_size, head_size); + if (ret) { + if (!msg->ctx_buf) { + WD_ERR("ctx_buf is NULL when out_size is less than 200!\n"); + return -WD_EINVAL; + } + + buf = (struct hisi_comp_buf *)(msg->ctx_buf + CTX_STOREBUF_OFFSET); + buf->pending_out = STORE_BUF_SIZE; + buf->list_dst.data = buf->dst; + buf->list_dst.len = STORE_BUF_SIZE; + list_dst = &buf->list_dst; + out_size = STORE_BUF_SIZE; + } + fill_buf_type_sgl(sqe); - ret = fill_buf_addr_deflate_sgl(h_qp, sqe, msg); + ret = fill_buf_addr_deflate_sgl(h_qp, sqe, list_src, list_dst); if (unlikely(ret)) return ret; @@ -943,6 +1128,11 @@ static int hisi_zip_comp_send(struct wd_alg_driver *drv, handle_t ctx, void *com __u16 count = 0; int ret; + /* Skip hardware, if the store buffer need to be copied to output */ + ret = check_store_buf(msg); + if (ret) + return ret < 0 ? ret : 0; + hisi_set_msg_id(h_qp, &msg->tag); ret = fill_zip_comp_sqe(qp, msg, &sqe); if (unlikely(ret < 0)) { @@ -1082,16 +1272,37 @@ static int hisi_zip_comp_recv(struct wd_alg_driver *drv, handle_t ctx, void *com { struct hisi_qp *qp = wd_ctx_get_priv(ctx); struct wd_comp_msg *recv_msg = comp_msg; + struct hisi_comp_buf *buf = NULL; handle_t h_qp = (handle_t)qp; struct hisi_zip_sqe sqe = {0}; __u16 count = 0; int ret; + if (recv_msg && recv_msg->ctx_buf) { + buf = (struct hisi_comp_buf *)(recv_msg->ctx_buf + CTX_STOREBUF_OFFSET); + /* + * The output has been copied from the storage buffer, + * and no data need to be received. + */ + if (buf->skip_hw) { + buf->skip_hw = false; + return 0; + } + } + ret = hisi_qm_recv(h_qp, &sqe, 1, &count); if (unlikely(ret < 0)) return ret; - return parse_zip_sqe(qp, &sqe, recv_msg); + ret = parse_zip_sqe(qp, &sqe, recv_msg); + if (unlikely(ret < 0 || recv_msg->req.status == WD_IN_EPARA)) + return ret; + + /* There are data in buf, copy to output */ + if (buf && buf->pending_out) + copy_from_hw(recv_msg, buf); + + return 0; } #define GEN_ZIP_ALG_DRIVER(zip_alg_name) \ -- 2.33.0