New subject: [PATCH 02/30] uadk: unbind UADK from SVA mode

14 Nov 2025

From: Longfang Liu <liulongfang@huawei.com>

Add kernel-state reserved memory handling functionality to the
uadk SVA framework to adapt to No-SVA features, including functions
for applying for and initializing reserved memory pools, applying for,
using, and releasing memory within the memory pool.

Signed-off-by: Longfang Liu <liulongfang@huawei.com>
Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com>
---
 Makefile.am             |    9 +-
 include/uacce.h         |    7 +
 include/wd.h            |   23 +
 include/wd_alg_common.h |   36 +-
 include/wd_bmm.h        |   44 ++
 include/wd_internal.h   |   70 +++
 include/wd_util.h       |    2 +
 libwd.map               |   14 +
 wd.c                    |   38 +-
 wd_bmm.c                | 1057 +++++++++++++++++++++++++++++++++++++++
 wd_util.c               |  107 +++-
 11 files changed, 1355 insertions(+), 52 deletions(-)
 create mode 100644 include/wd_bmm.h
 create mode 100644 include/wd_internal.h
 create mode 100644 wd_bmm.c

diff --git a/Makefile.am b/Makefile.am
index f897533..0e1203a 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -32,12 +32,13 @@ YEAR = 2025
 AM_CFLAGS+= -DUADK_VERSION_NUMBER="\"UADK version: ${MAJOR}.${MINOR}.${REVISION}\""
 AM_CFLAGS+= -DUADK_RELEASED_TIME="\"Released ${MONTH} ${DAY}, ${YEAR}\""
 
-pkginclude_HEADERS = include/wd.h include/wd_cipher.h include/wd_aead.h \
+pkginclude_HEADERS = include/wd.h include/wd_internal.h include/wd_cipher.h include/wd_aead.h \
 		  include/wd_comp.h include/wd_dh.h include/wd_digest.h \
 		  include/wd_rsa.h  include/uacce.h include/wd_alg_common.h \
 		  include/wd_ecc.h include/wd_sched.h include/wd_alg.h \
 		  include/wd_zlibwrapper.h include/wd_dae.h include/wd_agg.h \
-		  include/wd_udma.h include/wd_join_gather.h
+		  include/wd_udma.h include/wd_join_gather.h \
+		  include/wd_bmm.h
 
 nobase_pkginclude_HEADERS = v1/wd.h v1/wd_cipher.h v1/wd_aead.h v1/uacce.h v1/wd_dh.h \
 			 v1/wd_digest.h v1/wd_rsa.h v1/wd_bmm.h
@@ -48,7 +49,7 @@ uadk_driversdir=$(libdir)/uadk
 uadk_drivers_LTLIBRARIES=libhisi_sec.la libhisi_hpre.la libhisi_zip.la \
 			 libisa_ce.la libisa_sve.la libhisi_dae.la libhisi_udma.la
 
-libwd_la_SOURCES=wd.c wd_mempool.c wd.h	wd_alg.c wd_alg.h	\
+libwd_la_SOURCES=wd.c wd_mempool.c wd_bmm.c wd_bmm.h wd.h wd_alg.c wd_alg.h	\
 		 v1/wd.c v1/wd.h v1/wd_adapter.c v1/wd_adapter.h \
 		 v1/wd_rsa.c v1/wd_rsa.h	\
 		 v1/wd_aead.c v1/wd_aead.h	\
@@ -126,7 +127,7 @@ libwd_comp_la_DEPENDENCIES = libwd.la
 
 libhisi_zip_la_LIBADD = -ldl
 
-libwd_crypto_la_LIBADD = $(libwd_la_OBJECTS) -ldl -lnuma
+libwd_crypto_la_LIBADD = -lwd -ldl -lnuma -lm -lpthread
 libwd_crypto_la_DEPENDENCIES = libwd.la
 
 libwd_udma_la_LIBADD = $(libwd_la_OBJECTS) -ldl -lnuma -lm -lpthread
diff --git a/include/uacce.h b/include/uacce.h
index f7fae27..c6bb4fb 100644
--- a/include/uacce.h
+++ b/include/uacce.h
@@ -15,6 +15,12 @@ extern "C" {
 
 #define UACCE_CMD_START         _IO('W', 0)
 #define UACCE_CMD_PUT_Q         _IO('W', 1)
+#define UACCE_CMD_GET_SS_DMA		_IOR('W', 3, unsigned long)
+
+/* Pass DMA SS region slice size by granularity 64KB */
+#define UACCE_GRAN_SIZE		0x10000ull
+#define UACCE_GRAN_SHIFT		16
+#define UACCE_GRAN_NUM_MASK		0xfffull
 
 /**
  * UACCE Device flags:
@@ -33,6 +39,7 @@ enum {
 enum uacce_qfrt {
 	UACCE_QFRT_MMIO = 0, /* device mmio region */
 	UACCE_QFRT_DUS = 1, /* device user share */
+	UACCE_QFRT_SS,		/* static share memory */
 	UACCE_QFRT_MAX,
 };
 
diff --git a/include/wd.h b/include/wd.h
index b62d355..b97e5c7 100644
--- a/include/wd.h
+++ b/include/wd.h
@@ -38,6 +38,7 @@ typedef unsigned long long __u64;
 /* Required compiler attributes */
 #define likely(x)       __builtin_expect(!!(x), 1)
 #define unlikely(x)     __builtin_expect(!!(x), 0)
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 
 #define handle_t uintptr_t
 typedef struct wd_dev_mask wd_dev_mask_t;
@@ -115,6 +116,28 @@ enum wd_alg_type {
 	WD_AEAD,
 };
 
+/* Memory APIs for UADK API Layer */
+typedef void *(*wd_alloc)(void *usr, size_t size);
+typedef void (*wd_free)(void *usr, void *va);
+
+ /* Memory VA to DMA address map and unmap */
+typedef void *(*wd_map)(void *usr, void *va, size_t sz);
+typedef void (*wd_unmap)(void *usr, void *va, void *dma, size_t sz);
+typedef __u32 (*wd_bufsize)(void *usr);
+
+/* Memory from user, it is given at ctx creating. */
+struct wd_mm_ops {
+	wd_alloc alloc; /* Memory allocation */
+	wd_free free; /* Memory free */
+	wd_map iova_map; /* Get iova from user space VA */
+
+	/* Destroy the mapping between the PA of VA and iova */
+	wd_unmap iova_unmap;
+	wd_bufsize get_bufsize; /* Optional */
+	void *usr; /* Data for the above operations */
+	bool sva_mode; /* Record whether the OS is SVA or No-SVA mode */
+};
+
 /*
  * If the actual size of data is inconsistent
  * with dsize, undefined behavior occurs.
diff --git a/include/wd_alg_common.h b/include/wd_alg_common.h
index fd77426..a294877 100644
--- a/include/wd_alg_common.h
+++ b/include/wd_alg_common.h
@@ -12,6 +12,7 @@
 #include <numa.h>
 #include "wd.h"
 #include "wd_alg.h"
+#include "wd_internal.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -24,7 +25,6 @@ extern "C" {
 #define BITS_TO_BYTES(bits)	(((bits) + 7) >> 3)
 #define BYTES_TO_BITS(bytes)	((bytes) << 3)
 
-#define ARRAY_SIZE(x)		(sizeof(x) / sizeof((x)[0]))
 #define MAX_STR_LEN		256
 #define CTX_TYPE_INVALID	9999
 #define POLL_TIME		1000
@@ -60,6 +60,13 @@ enum wd_init_type {
 	WD_TYPE_V2,
 };
 
+enum wd_mem_type {
+	UADK_MEM_AUTO,
+	UADK_MEM_USER,
+	UADK_MEM_PROXY,
+	UADK_MEM_MAX,
+};
+
 /*
  * struct wd_ctx - Define one ctx and related type.
  * @ctx:	The ctx itself.
@@ -132,27 +139,6 @@ struct wd_ctx_params {
 	struct wd_cap_config *cap;
 };
 
-struct wd_soft_ctx {
-	void *priv;
-};
-
-struct wd_ctx_internal {
-	handle_t ctx;
-	__u8 op_type;
-	__u8 ctx_mode;
-	__u16 sqn;
-	pthread_spinlock_t lock;
-};
-
-struct wd_ctx_config_internal {
-	__u32 ctx_num;
-	int shmid;
-	struct wd_ctx_internal *ctxs;
-	void *priv;
-	bool epoll_en;
-	unsigned long *msg_cnt;
-};
-
 /*
  * struct wd_comp_sched - Define a scheduler.
  * @name:		Name of this scheduler.
@@ -181,12 +167,6 @@ struct wd_sched {
 typedef int (*wd_alg_init)(struct wd_ctx_config *config, struct wd_sched *sched);
 typedef int (*wd_alg_poll_ctx)(__u32 idx, __u32 expt, __u32 *count);
 
-struct wd_datalist {
-	void *data;
-	__u32 len;
-	struct wd_datalist *next;
-};
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/wd_bmm.h b/include/wd_bmm.h
new file mode 100644
index 0000000..76b56a0
--- /dev/null
+++ b/include/wd_bmm.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+/*
+ * Copyright 2025 Huawei Technologies Co.,Ltd. All rights reserved.
+ */
+
+#ifndef _WD_SVA_BMM_H
+#define _WD_SVA_BMM_H
+
+#include <stdint.h>
+#include "wd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Memory pool creating parameters */
+struct wd_mempool_setup {
+	__u32 block_size; /* Block buffer size */
+	__u32 block_num; /* Block buffer number */
+	__u32 align_size; /* Block buffer starting address align size */
+	struct wd_mm_ops ops; /* memory from user if don't use UADK memory */
+};
+
+void *wd_mempool_alloc(handle_t h_ctx, struct wd_mempool_setup *setup);
+void wd_mempool_free(handle_t h_ctx, void *pool);
+void *wd_mem_alloc(void *pool, size_t size);
+void wd_mem_free(void *pool, void *buf);
+
+void *wd_mem_map(void *pool, void *buf, size_t sz);
+void wd_mem_unmap(void *pool, void *buf_dma, void *buf, size_t sz);
+int wd_get_free_num(void *pool, __u32 *free_num);
+int wd_get_fail_num(void *pool, __u32 *fail_num);
+__u32 wd_get_bufsize(void *pool);
+
+handle_t wd_find_ctx(const char *alg_name);
+void wd_remove_ctx_list(void);
+int wd_insert_ctx_list(handle_t h_ctx, char *alg_name);
+__u32 wd_get_dev_id(void *pool);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _WD_SVA_BMM_H */
diff --git a/include/wd_internal.h b/include/wd_internal.h
new file mode 100644
index 0000000..cd90ebf
--- /dev/null
+++ b/include/wd_internal.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+/*
+ * Copyright 2025 Huawei Technologies Co.,Ltd. All rights reserved.
+ */
+
+#ifndef WD_INTERNAL_H
+#define WD_INTERNAL_H
+
+#include <pthread.h>
+#include <stdbool.h>
+#include "wd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define DECIMAL_NUMBER		10
+#define MAX_FD_NUM	65535
+
+struct wd_ctx_h {
+	int fd;
+	char dev_path[MAX_DEV_NAME_LEN];
+	char *dev_name;
+	char *drv_name;
+	unsigned long qfrs_offs[UACCE_QFRT_MAX];
+	void *qfrs_base[UACCE_QFRT_MAX];
+	struct uacce_dev *dev;
+	void *priv;
+};
+
+struct wd_soft_ctx {
+	int fd;
+	void *priv;
+};
+
+struct wd_ce_ctx {
+	int fd;
+	char *drv_name;
+	void *priv;
+};
+
+struct wd_ctx_internal {
+	handle_t ctx;
+	__u8 op_type;
+	__u8 ctx_mode;
+	__u16 sqn;
+	pthread_spinlock_t lock;
+};
+
+struct wd_ctx_config_internal {
+	__u32 ctx_num;
+	int shmid;
+	struct wd_ctx_internal *ctxs;
+	void *priv;
+	bool epoll_en;
+	unsigned long *msg_cnt;
+	char *alg_name;
+};
+
+struct wd_datalist {
+	void *data;
+	__u32 len;
+	struct wd_datalist *next;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/wd_util.h b/include/wd_util.h
index 4a5204d..a337284 100644
--- a/include/wd_util.h
+++ b/include/wd_util.h
@@ -553,6 +553,8 @@ static inline void wd_ctx_spin_unlock(struct wd_ctx_internal *ctx, int type)
 	pthread_spin_unlock(&ctx->lock);
 }
 
+int wd_mem_ops_init(handle_t h_ctx, struct wd_mm_ops *mm_ops, int mem_type);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/libwd.map b/libwd.map
index 5522ec0..b1b90b3 100644
--- a/libwd.map
+++ b/libwd.map
@@ -49,5 +49,19 @@ global:
 	wd_enable_drv;
 	wd_disable_drv;
 	wd_get_alg_head;
+
+	wd_find_ctx;
+	wd_get_dev_id;
+	wd_remove_ctx_list;
+	wd_insert_ctx_list;
+	wd_mempool_alloc;
+	wd_mempool_free;
+	wd_mem_alloc;
+	wd_mem_free;
+	wd_mem_map;
+	wd_mem_unmap;
+	wd_get_free_num;
+	wd_get_fail_num;
+	wd_get_bufsize;
 local: *;
 };
diff --git a/wd.c b/wd.c
index c1cc282..3e867b6 100644
--- a/wd.c
+++ b/wd.c
@@ -20,6 +20,7 @@
 
 #include "wd.h"
 #include "wd_alg.h"
+#include "wd_internal.h"
 #define SYS_CLASS_DIR			"/sys/class/uacce"
 #define FILE_MAX_SIZE			(8 << 20)
 
@@ -33,16 +34,18 @@ enum UADK_LOG_LEVEL {
 
 static int uadk_log_level = WD_LOG_INVALID;
 
-struct wd_ctx_h {
-	int fd;
-	char dev_path[MAX_DEV_NAME_LEN];
-	char *dev_name;
-	char *drv_name;
-	unsigned long qfrs_offs[UACCE_QFRT_MAX];
-	void *qfrs_base[UACCE_QFRT_MAX];
-	struct uacce_dev *dev;
-	void *priv;
-};
+static int wd_check_ctx_type(handle_t h_ctx)
+{
+	struct wd_ctx_h	*ctx = (struct wd_ctx_h *)h_ctx;
+
+	/* A simple and efficient method to check the queue type */
+	if (ctx->fd < 0 || ctx->fd > MAX_FD_NUM) {
+		WD_INFO("Invalid: this ctx not HW ctx.\n");
+		return -WD_HW_EACCESS;
+	}
+
+	return 0;
+}
 
 static void wd_parse_log_level(void)
 {
@@ -446,7 +449,7 @@ void wd_release_ctx(handle_t h_ctx)
 {
 	struct wd_ctx_h	*ctx = (struct wd_ctx_h *)h_ctx;
 
-	if (!ctx)
+	if (!ctx || wd_check_ctx_type(h_ctx))
 		return;
 
 	close(ctx->fd);
@@ -461,7 +464,7 @@ int wd_ctx_start(handle_t h_ctx)
 	struct wd_ctx_h	*ctx = (struct wd_ctx_h *)h_ctx;
 	int ret;
 
-	if (!ctx)
+	if (!ctx || wd_check_ctx_type(h_ctx))
 		return -WD_EINVAL;
 
 	ret = wd_ctx_set_io_cmd(h_ctx, UACCE_CMD_START, NULL);
@@ -527,6 +530,7 @@ void wd_ctx_unmap_qfr(handle_t h_ctx, enum uacce_qfrt qfrt)
 unsigned long wd_ctx_get_region_size(handle_t h_ctx, enum uacce_qfrt qfrt)
 {
 	struct wd_ctx_h *ctx = (struct wd_ctx_h *)h_ctx;
+
 	if (!ctx || qfrt >= UACCE_QFRT_MAX)
 			return 0;
 	return ctx->qfrs_offs[qfrt];
@@ -585,8 +589,16 @@ int wd_ctx_wait(handle_t h_ctx, __u16 ms)
 int wd_is_sva(handle_t h_ctx)
 {
 	struct wd_ctx_h	*ctx = (struct wd_ctx_h *)h_ctx;
+	int ret;
 
-	if (!ctx || !ctx->dev)
+	if (!ctx)
+		return -WD_EINVAL;
+
+	ret = wd_check_ctx_type(h_ctx);
+	if (ret)
+		return ret;
+
+	if (!ctx->dev)
 		return -WD_EINVAL;
 
 	if ((unsigned int)ctx->dev->flags & UACCE_DEV_SVA)
diff --git a/wd_bmm.c b/wd_bmm.c
new file mode 100644
index 0000000..21c46ca
--- /dev/null
+++ b/wd_bmm.c
@@ -0,0 +1,1057 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+/*
+ * Copyright 2025 Huawei Technologies Co.,Ltd. All rights reserved.
+ */
+
+/* Block Memory Management (lib): Adapted for SVA mode */
+#define _GNU_SOURCE
+#include <dirent.h>
+#include <numa.h>
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/queue.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include "wd_internal.h"
+#include "wd_bmm.h"
+#include "uacce.h"
+#include "wd.h"
+
+#define __ALIGN_MASK(x, mask)  (((x) + (mask)) & ~(mask))
+#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
+#define ARRAY_SIZE(x)		(sizeof(x) / sizeof((x)[0]))
+#define UACCE_DEV_IOMMU		(1<<7)
+
+#define TAG_FREE	0x12345678  /* block is free */
+#define TAG_USED	0x87654321  /* block is busy */
+#define MAX_ALIGN_SIZE	0x1000 /* 4KB */
+#define MAX_BLOCK_SIZE	0x10000000 /* 256MB */
+#define BLK_BALANCE_SZ	0x100000ul
+#define NUM_TIMES(x)	(87 * (x) / 100)
+
+#define BYTE_SIZE	8
+#define BIT_SHIFT	3
+
+struct wd_ss_region {
+	unsigned long long pa;
+	void *va;
+	size_t size;
+	TAILQ_ENTRY(wd_ss_region) next;
+};
+TAILQ_HEAD(wd_ss_region_list, wd_ss_region);
+
+struct ctx_info {
+	int fd;
+	int iommu_type;
+	void *ss_va;
+	size_t ss_mm_size;
+	struct wd_ss_region_list ss_list;
+	struct wd_ss_region_list *head;
+	unsigned long qfrs_offset[UACCE_QFRT_MAX];
+};
+
+struct wd_blk_hd {
+	unsigned int blk_tag;
+	unsigned int blk_num;
+	void *blk_dma;
+	void *blk;
+};
+
+struct wd_blkpool {
+	pthread_spinlock_t pool_lock;
+	unsigned int free_blk_num;
+	unsigned int alloc_failures;
+	struct ctx_info *cinfo;
+	struct wd_blk_hd *blk_array; // memory blk array
+	unsigned int total_blocks; // total blk numbers
+	unsigned char *free_bitmap; // free blk bitmap, 0 mean unused
+	unsigned int bitmap_size; // bitmap's memory size
+	void *usr_mem_start;
+	void *act_start;
+	unsigned int act_hd_sz;
+	unsigned int act_blk_sz;
+	unsigned long act_mem_sz;
+	unsigned int dev_id;
+	struct wd_mempool_setup setup;
+};
+
+struct mem_ctx_node {
+	char alg_name[CRYPTO_MAX_ALG_NAME];
+	handle_t h_ctx;
+	int numa_id;
+	bool used;
+	TAILQ_ENTRY(mem_ctx_node) list_node;
+};
+static TAILQ_HEAD(, mem_ctx_node) g_mem_ctx_list = TAILQ_HEAD_INITIALIZER(g_mem_ctx_list);
+static pthread_mutex_t g_mem_ctx_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+handle_t wd_find_ctx(const char *alg_name)
+{
+	struct mem_ctx_node *close_node = NULL;
+	struct mem_ctx_node *node;
+	int min_distance = 0xFFFF;
+	int cpu = sched_getcpu();
+	int nid = numa_node_of_cpu(cpu);
+	handle_t h_ctx = 0;
+	int numa_dis;
+
+	if (!alg_name) {
+		WD_ERR("Invalid: alg_name is NULL!\n");
+		return 0;
+	}
+
+	pthread_mutex_lock(&g_mem_ctx_mutex);
+	TAILQ_FOREACH(node, &g_mem_ctx_list, list_node) {
+		if (node->used == false && strstr(node->alg_name, alg_name)) {
+			if (node->numa_id == nid) {
+				h_ctx = node->h_ctx;
+				node->used = true;
+				break;
+			}
+
+			/* Query the queue with the shortest NUMA distance */
+			numa_dis = numa_distance(nid, node->numa_id);
+			if (numa_dis < min_distance) {
+				min_distance = numa_dis;
+				close_node = node;
+			}
+		}
+	}
+
+	/* If no ctx matching the NUMA ID, use the shortest distance instead ctx */
+	if (!h_ctx && close_node) {
+		h_ctx = close_node->h_ctx;
+		close_node->used = true;
+	}
+	pthread_mutex_unlock(&g_mem_ctx_mutex);
+
+	if (!h_ctx)
+		WD_ERR("Failed to find mem ctx for alg: %s\n", alg_name);
+
+	return h_ctx;
+}
+
+void wd_remove_ctx_list(void)
+{
+	struct mem_ctx_node *node;
+
+	pthread_mutex_lock(&g_mem_ctx_mutex);
+	/* Free all list node */
+	while ((node = TAILQ_FIRST(&g_mem_ctx_list)) != NULL) {
+		/* Use TAILQ_REMOVE to remove list node */
+		TAILQ_REMOVE(&g_mem_ctx_list, node, list_node);
+		free(node);
+	}
+
+	pthread_mutex_unlock(&g_mem_ctx_mutex);
+}
+
+int wd_insert_ctx_list(handle_t h_ctx, char *alg_name)
+{
+	struct wd_ctx_h	*ctx = (struct wd_ctx_h *)h_ctx;
+	struct mem_ctx_node *new_node;
+	int numa_id;
+
+	if (!alg_name || !h_ctx) {
+		WD_ERR("Invalid: input params is NULL!\n");
+		return -WD_EINVAL;
+	}
+
+	/* A simple and efficient method to check the queue type */
+	if (ctx->fd < 0 || ctx->fd > MAX_FD_NUM) {
+		WD_INFO("Invalid ctx: this ctx not HW ctx.\n");
+		return 0;
+	}
+
+	numa_id = ctx->dev->numa_id;
+	new_node = malloc(sizeof(struct mem_ctx_node));
+	if (new_node) {
+		pthread_mutex_lock(&g_mem_ctx_mutex);
+		strncpy(new_node->alg_name, alg_name, CRYPTO_MAX_ALG_NAME - 1);
+		new_node->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0';
+		new_node->numa_id = numa_id;
+		new_node->h_ctx = h_ctx;
+		new_node->used = false;
+		TAILQ_INSERT_TAIL(&g_mem_ctx_list, new_node, list_node);
+		pthread_mutex_unlock(&g_mem_ctx_mutex);
+		return 0;
+	}
+
+	return -WD_ENOMEM;
+}
+
+static void wd_free_slice(struct ctx_info *cinfo)
+{
+	struct wd_ss_region *rgn;
+
+	while (true) {
+		rgn = TAILQ_FIRST(&cinfo->ss_list);
+		if (!rgn)
+			break;
+		TAILQ_REMOVE(&cinfo->ss_list, rgn, next);
+		free(rgn);
+	}
+}
+
+static void wd_add_slice(struct ctx_info *cinfo, struct wd_ss_region *rgn)
+{
+	struct wd_ss_region *rg;
+
+	rg = TAILQ_LAST(&cinfo->ss_list, wd_ss_region_list);
+	if (rg) {
+		if (rg->pa + rg->size == rgn->pa) {
+			rg->size += rgn->size;
+			free(rgn);
+			return;
+		}
+	}
+
+	TAILQ_INSERT_TAIL(&cinfo->ss_list, rgn, next);
+}
+
+static void wd_show_ss_slices(struct ctx_info *cinfo)
+{
+	struct wd_ss_region *rgn;
+	int i = 0;
+
+	TAILQ_FOREACH(rgn, cinfo->head, next) {
+		WD_ERR("slice-%d:size = 0x%lx\n", i, rgn->size);
+		i++;
+	}
+}
+
+static void bitmap_set_bit(unsigned char *bitmap, unsigned int bit_index)
+{
+	if (!bitmap)
+		return;
+
+	bitmap[bit_index >> BIT_SHIFT] |= (1 << (bit_index % BYTE_SIZE));
+}
+
+static void bitmap_clear_bit(unsigned char *bitmap, unsigned int bit_index)
+{
+	if (!bitmap)
+		return;
+
+	bitmap[bit_index >> BIT_SHIFT] &= ~(1 << (bit_index % BYTE_SIZE));
+}
+
+static bool bitmap_test_bit(const unsigned char *bitmap, unsigned int bit_index)
+{
+	if (!bitmap)
+		return false;
+
+	/* bit is 1, it indicates that the block has already been used and is not free */
+	if ((bitmap[bit_index >> BIT_SHIFT] >> (bit_index % BYTE_SIZE)) & 0x1)
+		return false;
+
+	return true;
+}
+
+static void *wd_mmap_qfr(struct ctx_info *cinfo, enum uacce_qfrt qfrt, size_t size)
+{
+	off_t off;
+
+	off = qfrt * getpagesize();
+
+	return mmap(0, size, PROT_READ | PROT_WRITE,
+		    MAP_SHARED, cinfo->fd, off);
+}
+
+static void wd_unmap_reserve_mem(void *addr, size_t size)
+{
+	int ret;
+
+	if (!addr)
+		return;
+
+	ret = munmap(addr, size);
+	if (ret)
+		WD_ERR("wd qfr unmap failed!\n");
+}
+
+static void *wd_map_reserve_mem(struct wd_blkpool *pool, size_t size)
+{
+	struct ctx_info *cinfo = pool->cinfo;
+	struct wd_ss_region *rgn;
+	unsigned long info;
+	size_t tmp = size;
+	unsigned long i = 0;
+	void *ptr;
+	int ret = 1;
+
+	if (!cinfo) {
+		WD_ERR("ctx queue information is NULL!\n");
+		return NULL;
+	}
+
+	/* Make sure memory map granularity size align */
+	if (!cinfo->iommu_type)
+		tmp = ALIGN(tmp, UACCE_GRAN_SIZE);
+
+	ptr = wd_mmap_qfr(cinfo, UACCE_QFRT_SS, tmp);
+	if (ptr == MAP_FAILED) {
+		WD_ERR("wd drv mmap fail!(err = %d)\n", errno);
+		return NULL;
+	}
+
+	cinfo->ss_va = ptr;
+	cinfo->ss_mm_size = tmp;
+	tmp = 0;
+	while (ret > 0) {
+		info = i;
+		ret = ioctl(cinfo->fd, UACCE_CMD_GET_SS_DMA, &info);
+		if (ret < 0) {
+			wd_show_ss_slices(cinfo);
+			WD_ERR("get DMA fail!\n");
+			goto err_out;
+		}
+
+		rgn = malloc(sizeof(*rgn));
+		if (!rgn) {
+			WD_ERR("alloc ss region fail!\n");
+			goto err_out;
+		}
+		memset(rgn, 0, sizeof(*rgn));
+
+		if (cinfo->iommu_type)
+			rgn->size = cinfo->ss_mm_size;
+		else
+			rgn->size = (info & UACCE_GRAN_NUM_MASK) <<
+				UACCE_GRAN_SHIFT;
+		rgn->pa = info & (~UACCE_GRAN_NUM_MASK);
+		rgn->va = ptr + tmp;
+		tmp += rgn->size;
+		wd_add_slice(cinfo, rgn);
+
+		i++;
+	}
+
+	return ptr;
+
+err_out:
+	wd_free_slice(cinfo);
+	wd_unmap_reserve_mem(cinfo->ss_va, cinfo->ss_mm_size);
+
+	return NULL;
+}
+
+static int wd_pool_params_check(struct wd_mempool_setup *setup)
+{
+	if (!setup->block_num || !setup->block_size ||
+		setup->block_size > MAX_BLOCK_SIZE) {
+		WD_ERR("Invalid: block_size or block_num(%x, %u)!\n",
+			setup->block_size, setup->block_num);
+		return -WD_EINVAL;
+	}
+
+	/* Check parameters, and align_size must be 2^N */
+	if (setup->align_size <= 0x1 || setup->align_size > MAX_ALIGN_SIZE ||
+	    (setup->align_size & (setup->align_size - 0x1))) {
+		WD_ERR("Invalid align_size.\n");
+		return -WD_EINVAL;
+	}
+
+	return WD_SUCCESS;
+}
+
+static int wd_ctx_info_init(struct wd_ctx_h *ctx, struct wd_blkpool *p)
+{
+	struct ctx_info *cinfo;
+
+	cinfo = calloc(1, sizeof(struct ctx_info));
+	if (!cinfo) {
+		WD_ERR("failed to alloc ctx info memory.\n");
+		return -WD_ENOMEM;
+	}
+
+	cinfo->fd = ctx->fd;
+	cinfo->iommu_type = (unsigned int)ctx->dev->flags & UACCE_DEV_IOMMU;
+	cinfo->head = &cinfo->ss_list;
+	TAILQ_INIT(&cinfo->ss_list);
+	(void)memcpy(cinfo->qfrs_offset, ctx->qfrs_offs,
+				sizeof(cinfo->qfrs_offset));
+	p->cinfo = (void *)cinfo;
+
+	return 0;
+}
+
+static int wd_pool_pre_layout(handle_t h_ctx,
+			      struct wd_blkpool *p,
+			      struct wd_mempool_setup *sp)
+{
+	struct wd_ctx_h	*ctx = (struct wd_ctx_h *)h_ctx;
+	struct ctx_info *cinfo = NULL;
+	unsigned int asz;
+	int ret;
+
+	if (!ctx && !sp->ops.alloc) {
+		WD_ERR("ctx is NULL!\n");
+		return -WD_EINVAL;
+	}
+
+	if (!sp->ops.alloc) {
+		ret = wd_ctx_info_init(ctx, p);
+		if (ret) {
+			WD_ERR("failed to init ctx info.\n");
+			return ret;
+		}
+		cinfo = p->cinfo;
+	}
+
+	ret = wd_pool_params_check(sp);
+	if (ret) {
+		free(p->cinfo);
+		p->cinfo = NULL;
+		return ret;
+	}
+
+	asz = sp->align_size;
+
+	/* Get actual value by align */
+	p->act_hd_sz = ALIGN(sizeof(struct wd_blk_hd), asz);
+	p->act_blk_sz = ALIGN(sp->block_size, asz);
+	p->act_mem_sz = (p->act_hd_sz + p->act_blk_sz) *
+			 (unsigned long)sp->block_num + asz;
+
+	/*
+	 * When we use WD reserve memory and the blk_sz is larger than 1M,
+	 * in order to ensure the mem_pool to be success,
+	 * ensure that the allocated memory is an integer multiple of 1M.
+	 */
+	if (!sp->ops.alloc && (cinfo && !cinfo->iommu_type))
+		p->act_mem_sz = ((p->act_mem_sz + BLK_BALANCE_SZ - 1) & ~(BLK_BALANCE_SZ - 1)) << 1;
+
+	return WD_SUCCESS;
+}
+
+/**
+ * wd_iova_map - Map virtual address to physical address
+ * @cinfo: context information
+ * @va: virtual address to map
+ * @sz: size of the mapping (not used in current implementation)
+ *
+ * When IOMMU is enabled, the PA is actually an IOVA; userspace still sees it
+ * as consistent and contiguous with the VA.
+ * When IOMMU is disabled, the PA refers to the kernel's physical address, which
+ * must be physically contiguous to be allocated by the kernel.
+ * Therefore, the PA address can be obtained from the offset of the VA.
+ * 
+ */
+static void *wd_iova_map(struct ctx_info *cinfo, void *va, size_t sz)
+{
+	struct wd_ss_region *rgn;
+	unsigned long offset;
+	void *dma_addr;
+
+	if (!cinfo || !va) {
+		WD_ERR("wd iova map: parameter err!\n");
+		return NULL;
+	}
+
+	/* Search through all memory regions to find where va belongs */
+	TAILQ_FOREACH(rgn, cinfo->head, next) {
+		if (rgn->va <= va && va < rgn->va + rgn->size) {
+			/* Calculate offset within the region */
+			offset = (uintptr_t)va - (uintptr_t)rgn->va;
+			/* Add base physical address of the region */
+			dma_addr = (void *)((uintptr_t)rgn->pa + offset);
+			return dma_addr;
+		}
+	}
+
+	WD_ERR("wd iova map: va not found in any region\n");
+	return NULL;
+}
+
+/**
+ * wd_iova_unmap - Unmap physical address (no-op in non-IOMMU mode)
+ * @cinfo: context information
+ * @va: virtual address
+ * @dma: physical address
+ * @sz: size of the mapping (not used in current implementation)
+ *
+ * In non-IOMMU mode, this function does nothing as there's no need to unmap.
+ * In IOMMU mode, this would typically involve unmapping the DMA address.
+ */
+static void wd_iova_unmap(struct ctx_info *cinfo, void *va, void *dma, size_t sz)
+{
+	/* For no-iommu, dma-unmap doing nothing */
+}
+
+static void wd_pool_uninit(struct wd_blkpool *p)
+{
+	struct ctx_info *cinfo = p->cinfo;
+	struct wd_blk_hd *fhd = NULL;
+	unsigned long block_size;
+	unsigned int i;
+
+	block_size = (unsigned long)p->act_hd_sz + p->act_blk_sz;
+	/* Clean up the allocated resources. */
+    	for (i = 0; i < p->total_blocks; i++) {
+			/* Release the previously allocated blocks. */
+        	fhd = &p->blk_array[i];
+       		wd_iova_unmap(cinfo, fhd->blk, fhd->blk_dma, block_size);
+    	}
+
+	free(p->free_bitmap);
+	p->free_bitmap = NULL;
+	free(p->blk_array);
+	p->blk_array = NULL;
+}
+
+static int wd_pool_init(struct wd_blkpool *p)
+{
+	struct ctx_info *cinfo = p->cinfo;
+	__u32 blk_size = p->setup.block_size;
+	void *dma_start, *dma_end, *va;
+	struct wd_blk_hd *fhd = NULL;
+	struct wd_blk_hd *hd = NULL;
+	unsigned int i, j, act_num;
+	unsigned long block_size;
+	unsigned int dma_num = 0;
+
+	p->act_start = (void *)ALIGN((uintptr_t)p->usr_mem_start,
+				     p->setup.align_size);
+
+	/* Calculate the actual number of allocatable blocks */
+	block_size = (unsigned long)(p->act_hd_sz + p->act_blk_sz);
+	if (block_size == 0) {
+		WD_ERR("Invalid block size with header.\n");
+		return -WD_EINVAL;
+	}
+	act_num = p->act_mem_sz / block_size;
+	if (!act_num) {
+		WD_ERR("Invalid memory size.\n");
+		return -WD_EINVAL;
+	}
+
+	/* Allocate block array */
+	p->blk_array = (struct wd_blk_hd *)malloc(act_num * sizeof(struct wd_blk_hd));
+	if (!p->blk_array) {
+		WD_ERR("Failed to allocate block array.\n");
+		return -WD_ENOMEM;
+	}
+
+	/* Allocate bitmap */
+	p->total_blocks = act_num;
+	p->bitmap_size = (act_num + BYTE_SIZE - 1) >> BIT_SHIFT;
+	p->free_bitmap = (unsigned char *)calloc(1, p->bitmap_size);
+	if (!p->free_bitmap) {
+		WD_ERR("Failed to allocate free bitmap.\n");
+		goto bitmap_error;
+	}
+
+	/* Initialize all blocks. */
+	for (i = 0; i < act_num; i++) {
+		/* Calculate the virtual address of the current block. */
+		va = (void *)((uintptr_t)p->act_start + block_size * i);
+
+		/* Get the physical address. */
+		dma_start = wd_iova_map(cinfo, va, 0);
+		dma_end = wd_iova_map(cinfo, va + blk_size - 1, 0);
+		if (!dma_start || !dma_end) {
+	    		WD_ERR("wd_iova_map err.\n");
+		    	/* Clean up the allocated resources. */
+		    	goto init_blk_error;
+		}
+
+		/* Check whether the physical addresses are contiguous. */
+		if ((uintptr_t)dma_end - (uintptr_t)dma_start != blk_size - 1) {
+			/* If OS kernel is not open SMMU, need to check dma address */
+			WD_INFO("wd dma address not continuous.\n");
+			/* Mark as unavailable, bit value is 1. */
+			bitmap_set_bit(p->free_bitmap, i);
+			continue;
+		}
+
+		/* Initialize the block. */
+		hd = &p->blk_array[i];
+		hd->blk_dma = dma_start;
+		hd->blk = va;
+		hd->blk_tag = TAG_FREE;
+		hd->blk_num = 0;
+
+		dma_num++;
+	}
+
+	/*
+	 * if dma_num <= (1 / 1.15) * user's block_num, we think the pool
+	 * is created with failure.
+	 */
+	if (dma_num <= NUM_TIMES(p->setup.block_num)) {
+		WD_ERR("dma_num = %u, not enough.\n", dma_num);
+		goto init_blk_error;
+	}
+
+	p->free_blk_num = dma_num;
+	p->setup.block_num = dma_num;
+
+	return WD_SUCCESS;
+
+init_blk_error:
+	/* Clean up the allocated resources. */
+    	for (j = 0; j < i; j++) {
+        	/* Release the previously allocated blocks. */
+        	fhd = &p->blk_array[j];
+       		wd_iova_unmap(cinfo, fhd->blk, fhd->blk_dma, block_size);
+    	}
+	free(p->free_bitmap);
+
+bitmap_error:
+	free(p->blk_array);
+
+	return -WD_ENOMEM;
+}
+
+static int usr_pool_init(struct wd_blkpool *p)
+{
+	struct wd_mempool_setup *sp = &p->setup;
+	__u32 blk_size = sp->block_size;
+	struct wd_blk_hd *hd = NULL;
+	__u32 i;
+
+	p->act_start = (void *)ALIGN((uintptr_t)p->usr_mem_start,
+				     sp->align_size);
+	for (i = 0; i < sp->block_num; i++) {
+		hd = (void *)((uintptr_t)p->act_start + (p->act_hd_sz + p->act_blk_sz) * i);
+		hd->blk = (void *)((uintptr_t)hd + p->act_hd_sz);
+		hd->blk_dma = sp->ops.iova_map(sp->ops.usr, hd->blk, blk_size);
+		if (!hd->blk_dma) {
+			WD_ERR("failed to map usr blk.\n");
+			return -WD_ENOMEM;
+		}
+		hd->blk_tag = TAG_FREE;
+	}
+
+	p->free_blk_num = sp->block_num;
+
+	return WD_SUCCESS;
+}
+
+static int wd_parse_dev_id(char *dev_name)
+{
+	char *last_dash = NULL;
+	char *endptr;
+	int dev_id;
+
+	if (!dev_name)
+		return -WD_EINVAL;
+
+	/* Find the last '-' in the string. */
+	last_dash = strrchr(dev_name, '-');
+	if (!last_dash || *(last_dash + 1) == '\0')
+		return -WD_EINVAL;
+
+	/* Parse the following number */
+	dev_id = strtol(last_dash + 1, &endptr, DECIMAL_NUMBER);
+	/* Check whether it is truly all digits */
+	if (*endptr != '\0' || dev_id < 0)
+		return -WD_EINVAL;
+
+	return dev_id;
+}
+
+static int wd_mempool_init(handle_t h_ctx, struct wd_blkpool *pool,
+				  struct wd_mempool_setup *setup)
+{
+	struct wd_ctx_h	*ctx = (struct wd_ctx_h *)h_ctx;
+	struct ctx_info *cinfo = pool->cinfo;
+	void *addr = NULL;
+	int ret;
+
+	/* Use user's memory, and its ops alloc function */
+	if (setup->ops.alloc && setup->ops.free && setup->ops.iova_map) {
+		addr = setup->ops.alloc(setup->ops.usr, pool->act_mem_sz);
+		if (!addr) {
+			WD_ERR("failed to allocate memory in user pool.\n");
+			return -WD_EINVAL;
+		}
+
+		pool->usr_mem_start = addr;
+		if (usr_pool_init(pool)) {
+			WD_ERR("failed to initialize user pool.\n");
+			setup->ops.free(setup->ops.usr, addr);
+			return -WD_EINVAL;
+		}
+	} else {
+		/* Use wd to reserve memory */
+		addr = wd_map_reserve_mem(pool, pool->act_mem_sz);
+		if (!addr) {
+			WD_ERR("wd pool failed to reserve memory.\n");
+			return -WD_ENOMEM;
+		}
+
+		pool->usr_mem_start = addr;
+		if (wd_pool_init(pool)) {
+			WD_ERR("failed to initialize wd pool.\n");
+			goto err_out;
+		}
+		setup->block_num = pool->setup.block_num;
+	}
+
+	ret = wd_parse_dev_id(ctx->dev_path);
+	if (ret < 0) {
+		wd_pool_uninit(pool);
+		goto err_out;
+	}
+	pool->dev_id = ret;
+
+	return WD_SUCCESS;
+
+err_out:
+	if (pool->cinfo) {
+		wd_free_slice(cinfo);
+		wd_unmap_reserve_mem(cinfo->ss_va, cinfo->ss_mm_size);
+		pool->cinfo = NULL;
+	}
+	return -WD_EINVAL;
+}
+
+void *wd_mempool_alloc(handle_t h_ctx, struct wd_mempool_setup *setup)
+{
+	struct wd_blkpool *pool = NULL;
+	int ret;
+
+	if (!setup || !h_ctx) {
+		WD_ERR("Input param is NULL!\n");
+		return NULL;
+	}
+
+	ret = wd_is_sva(h_ctx);
+	if (ret < 0) {
+		WD_ERR("failed to check device ctx!\n");
+		return NULL;
+	} else if (ret == UACCE_DEV_SVA) {
+		WD_ERR("the device is SVA mode!\n");
+		return NULL;
+	}
+
+	pool = calloc(1, sizeof(*pool));
+	if (!pool) {
+		WD_ERR("failed to malloc pool.\n");
+		return NULL;
+	}
+	ret = pthread_spin_init(&pool->pool_lock, PTHREAD_PROCESS_PRIVATE);
+	if (ret)
+		goto err_pool_alloc;
+
+	memcpy(&pool->setup, setup, sizeof(pool->setup));
+
+	ret = wd_pool_pre_layout(h_ctx, pool, setup);
+	if (ret)
+		goto err_pool_layout;
+
+	ret = wd_mempool_init(h_ctx, pool, setup);
+	if (ret)
+		goto err_pool_init;
+
+	return pool;
+
+err_pool_init:
+	if (pool->cinfo) {
+		free(pool->cinfo);
+		pool->cinfo = NULL;
+	}
+err_pool_layout:
+	pthread_spin_destroy(&pool->pool_lock);
+err_pool_alloc:
+	free(pool);
+
+	return NULL;
+}
+
+void wd_mempool_free(handle_t h_ctx, void *pool)
+{
+	struct wd_mempool_setup *setup;
+	struct wd_blkpool *p = pool;
+
+	if (!p || !h_ctx) {
+		WD_ERR("pool destroy err, pool or ctx is NULL.\n");
+		return;
+	}
+
+	setup = &p->setup;
+	if (p->free_blk_num != setup->block_num) {
+		WD_ERR("Can not destroy blk pool, as it's in use.\n");
+		return;
+	}
+
+	if (setup->ops.free)
+		setup->ops.free(setup->ops.usr, p->usr_mem_start);
+
+	if (p->cinfo) {
+		/* Free block array memory */
+		if (p->blk_array)
+			free(p->blk_array);
+
+		if (p->free_bitmap)
+			free(p->free_bitmap);
+
+		wd_free_slice(p->cinfo);
+		wd_unmap_reserve_mem(p->cinfo->ss_va, p->cinfo->ss_mm_size);
+		free(p->cinfo);
+		p->cinfo = NULL;
+	}
+
+	pthread_spin_destroy(&p->pool_lock);
+	free(p);
+}
+
+void wd_mem_free(void *pool, void *buf)
+{
+	struct wd_blkpool *p = pool;
+	struct wd_blk_hd *current_hd;
+	struct wd_blk_hd *hd;
+	unsigned int current_idx;	
+	unsigned int blk_idx;
+	unsigned long offset;
+	unsigned int i, num;
+	unsigned long sz;
+
+	if (unlikely(!p || !buf)) {
+		WD_ERR("free blk parameters err!\n");
+		return;
+	}
+
+	sz = p->act_hd_sz + p->act_blk_sz;
+	if (!sz) {
+		WD_ERR("memory pool blk size is zero!\n");
+		return;
+	}
+
+	if ((uintptr_t)buf < (uintptr_t)p->act_start) {
+		WD_ERR("free block addr is error.\n");
+		return;
+	}
+
+	/* Calculate the block index. */
+	offset = (unsigned long)((uintptr_t)buf - (uintptr_t)p->act_start);	
+	blk_idx = offset / sz;
+
+	/* Check if the index is valid. */
+	if (blk_idx >= p->total_blocks) {
+		WD_ERR("Invalid block index<%u>.\n", blk_idx);
+		return;
+	}
+
+	/* Get the block header. */
+	hd = &p->blk_array[blk_idx];
+	num = hd->blk_num;
+
+	pthread_spin_lock(&p->pool_lock);
+	/* Release all related blocks. */
+	for (i = 0; i < num; i++) {
+		// Recalculate the index (since it is contiguous).
+		current_idx = blk_idx + i;
+		current_hd = &p->blk_array[current_idx];
+		current_hd->blk_tag = TAG_FREE;
+		current_hd->blk_num = 0;
+		bitmap_clear_bit(p->free_bitmap, current_idx);
+	}
+	p->free_blk_num += num;
+	pthread_spin_unlock(&p->pool_lock);
+}
+
+static int wd_find_contiguous_blocks(struct wd_blkpool *p,
+				     unsigned int required_blocks,
+				     unsigned int *start_block)
+{
+#define MAX_SKIP_ATTEMPTS 10
+	unsigned int consecutive_count = 0;
+	unsigned int skip_attempts = 0;
+	struct wd_blk_hd *hd, *tl;
+	unsigned int i;
+
+	if (required_blocks == 0 || required_blocks > p->total_blocks)
+		return -WD_EINVAL;
+
+	for (i = 0; i < p->total_blocks; i++) {
+		if (!bitmap_test_bit(p->free_bitmap, i)) {
+			consecutive_count = 0;
+			continue;
+		}
+
+		if (consecutive_count == 0)
+			*start_block = i;
+		consecutive_count++;
+
+		if (consecutive_count < required_blocks)
+			continue;
+
+		/* Check DMA contiguity only if more than one block is needed */
+		if (required_blocks > 1) {
+			hd = &p->blk_array[*start_block];
+			tl = &p->blk_array[*start_block + required_blocks - 1];
+
+			if (((uintptr_t)tl->blk_dma - (uintptr_t)hd->blk_dma) !=
+			    ((uintptr_t)tl->blk - (uintptr_t)hd->blk)) {
+				/* Not contiguous, skip this start and try again */
+				if (++skip_attempts > MAX_SKIP_ATTEMPTS)
+					return -WD_ENOMEM;
+
+				i = *start_block; // will be incremented by loop
+				consecutive_count = 0;
+				continue;
+			}
+		}
+
+		/* Found and DMA is contiguous */
+		return WD_SUCCESS;
+	}
+
+	return -WD_ENOMEM;
+}
+
+void *wd_mem_alloc(void *pool, size_t size)
+{
+	unsigned int required_blocks;
+	unsigned int start_block = 0;
+	struct wd_blk_hd *hd = NULL;
+	struct wd_blkpool *p = pool;
+	unsigned int j;
+	int ret;
+
+	if (unlikely(!p || !size)) {
+		WD_ERR("blk alloc pool is null!\n");
+		return NULL;
+	}
+
+	if (!p->act_blk_sz) {
+		WD_ERR("blk pool is error!\n");
+		return NULL;
+	}
+
+	/* Calculate the number of blocks required. */
+	required_blocks = (size + p->act_blk_sz - 1) / p->act_blk_sz;
+	if (required_blocks > p->free_blk_num) {
+		p->alloc_failures++;
+		WD_ERR("Not enough free blocks.\n");
+		return NULL;
+	}
+
+	pthread_spin_lock(&p->pool_lock);
+	/* Find contiguous free blocks. */
+	ret = wd_find_contiguous_blocks(p, required_blocks, &start_block);
+	if (ret != 0) {
+		p->alloc_failures++;
+		pthread_spin_unlock(&p->pool_lock);
+		WD_ERR("Failed to find contiguous blocks.\n");
+		return NULL;
+	}
+
+	/* Mark all required blocks as used */
+	for (j = start_block; j < start_block + required_blocks; j++) {
+		p->blk_array[j].blk_tag = TAG_USED;
+		bitmap_set_bit(p->free_bitmap, j);
+	}
+
+	p->free_blk_num -= required_blocks;
+	hd = &p->blk_array[start_block];
+	hd->blk_num = required_blocks;
+	pthread_spin_unlock(&p->pool_lock);
+
+	return hd->blk;
+}
+
+void *wd_mem_map(void *pool, void *buf, size_t sz)
+{
+	struct wd_blkpool *p = pool;
+	struct wd_blk_hd *hd;
+	unsigned long offset;
+	unsigned long blk_sz;
+	unsigned long blk_idx;
+
+	if (unlikely(!pool || !buf)) {
+		WD_ERR("blk map err, pool is NULL!\n");
+		return NULL;
+	}
+
+	if (!sz || (uintptr_t)buf < (uintptr_t)p->act_start) {
+		WD_ERR("map buf addr is error.\n");
+		return NULL;
+	}
+	/* Calculate the block index. */
+	offset = (unsigned long)((uintptr_t)buf - (uintptr_t)p->act_start);
+	blk_sz = p->act_hd_sz + p->act_blk_sz;
+	blk_idx = offset / blk_sz;
+
+	/* Check if the index is valid. */
+	if (blk_idx >= p->total_blocks) {
+		WD_ERR("Invalid block index<%lu> in map.\n", blk_idx);
+		return NULL;
+	}
+
+	hd = &p->blk_array[blk_idx];
+	if (unlikely(hd->blk_tag != TAG_USED ||
+	    (uintptr_t)buf < (uintptr_t)hd->blk)) {
+		WD_ERR("dma map fail!\n");
+		return NULL;
+	}
+
+	return (void *)((uintptr_t)hd->blk_dma + ((uintptr_t)buf -
+		(uintptr_t)hd->blk));
+}
+
+void wd_mem_unmap(void *pool, void *buf_dma, void *buf, size_t sz)
+{
+	/* do nothing at no-iommu mode */
+}
+
+int wd_get_free_num(void *pool, __u32 *free_num)
+{
+	struct wd_blkpool *p = pool;
+
+	if (!p || !free_num) {
+		WD_ERR("get_free_blk_num err, parameter err!\n");
+		return -WD_EINVAL;
+	}
+
+	*free_num = __atomic_load_n(&p->free_blk_num, __ATOMIC_RELAXED);
+
+	return WD_SUCCESS;
+}
+
+int wd_get_fail_num(void *pool, __u32 *fail_num)
+{
+	struct wd_blkpool *p = pool;
+
+	if (!p || !fail_num) {
+		WD_ERR("get_blk_alloc_failure err, pool is NULL!\n");
+		return -WD_EINVAL;
+	}
+
+	*fail_num = __atomic_load_n(&p->alloc_failures, __ATOMIC_RELAXED);
+
+	return WD_SUCCESS;
+}
+
+__u32 wd_get_bufsize(void *pool)
+{
+	struct wd_blkpool *p = pool;
+
+	if (!p) {
+		WD_ERR("get dev id is null!\n");
+		return 0;
+	}
+
+	return p->act_blk_sz;
+}
+
+__u32 wd_get_dev_id(void *pool)
+{
+	struct wd_blkpool *p = pool;
+
+	if (!p) {
+		WD_ERR("failed to get dev id!\n");
+		return 0;
+	}
+
+	return p->dev_id;
+}
+
diff --git a/wd_util.c b/wd_util.c
index e8a2934..d0d83eb 100644
--- a/wd_util.c
+++ b/wd_util.c
@@ -13,6 +13,8 @@
 #include <ctype.h>
 #include "wd_sched.h"
 #include "wd_util.h"
+#include "wd_alg.h"
+#include "wd_bmm.h"
 
 #define WD_ASYNC_DEF_POLL_NUM		1
 #define WD_ASYNC_DEF_QUEUE_DEPTH	1024
@@ -100,11 +102,6 @@ struct acc_alg_item {
 	const char *algtype;
 };
 
-struct wd_ce_ctx {
-	char *drv_name;
-	void *priv;
-};
-
 static struct acc_alg_item alg_options[] = {
 	{"zlib", "zlib"},
 	{"gzip", "gzip"},
@@ -172,6 +169,93 @@ static struct acc_alg_item alg_options[] = {
 	{"", ""}
 };
 
+static void *wd_internal_alloc(void *usr, size_t size)
+{
+	if (size != 0)
+		return malloc(size);
+	else
+		return NULL;
+}
+
+static void wd_internal_free(void *usr, void *va)
+{
+	if (va != NULL)
+		free(va);
+}
+
+static __u32 wd_mem_bufsize(void *usr)
+{
+	/* Malloc memory min size is 1 Byte */
+	return 1;
+}
+
+int wd_mem_ops_init(handle_t h_ctx, struct wd_mm_ops *mm_ops, int mem_type)
+{
+	int ret;
+
+	ret = wd_is_sva(h_ctx);
+	if (ret == UACCE_DEV_SVA || ret == -WD_HW_EACCESS) {
+		/*
+		 * In software queue scenario, all memory is handled as virtual memory
+		 * and processed in the same way as SVA mode
+		 */
+		mm_ops->sva_mode = true;
+	} else if (!ret) {
+		mm_ops->sva_mode = false;
+	} else {
+		WD_ERR("failed to check ctx!\n");
+		return ret;
+	}
+
+	/*
+	 * Under SVA mode, there is no need to consider the memory type;
+	 * directly proceed with virtual memory handling
+	 */
+	if (mm_ops->sva_mode) {
+		mm_ops->alloc = (void *)wd_internal_alloc;
+		mm_ops->free = (void *)wd_internal_free;
+		mm_ops->iova_map = NULL;
+		mm_ops->iova_unmap = NULL;
+		mm_ops->get_bufsize = (void *)wd_mem_bufsize;
+		mm_ops->usr = NULL;
+		return 0;
+	}
+
+	switch (mem_type) {
+	case UADK_MEM_AUTO:
+		/*
+		 * The memory pool needs to be allocated according to
+		 * the block size when it is first executed in the UADK
+		 */
+		mm_ops->usr = NULL;
+		WD_ERR("automatic under No-SVA mode is not supported!\n");
+		return -WD_EINVAL;
+	case UADK_MEM_USER:
+		if (!mm_ops->alloc || !mm_ops->free || !mm_ops->iova_map ||
+		    !mm_ops->iova_unmap || !mm_ops->usr) { // The user create a memory pool
+			WD_ERR("failed to check memory ops, some ops function is NULL!\n");
+			return -WD_EINVAL;
+		}
+		break;
+	case UADK_MEM_PROXY:
+		if (!mm_ops->usr) {
+			WD_ERR("failed to check memory pool!\n");
+			return -WD_EINVAL;
+		}
+		mm_ops->alloc = (void *)wd_mem_alloc;
+		mm_ops->free = (void *)wd_mem_free;
+		mm_ops->iova_map = (void *)wd_mem_map;
+		mm_ops->iova_unmap = (void *)wd_mem_unmap;
+		mm_ops->get_bufsize = (void *)wd_get_bufsize;
+		break;
+	default:
+		WD_ERR("failed to check memory type!\n");
+		return -WD_EINVAL;
+	}
+
+	return 0;
+}
+
 static void clone_ctx_to_internal(struct wd_ctx *ctx,
 				  struct wd_ctx_internal *ctx_in)
 {
@@ -257,6 +341,12 @@ int wd_init_ctx_config(struct wd_ctx_config_internal *in,
 			WD_ERR("failed to init ctxs lock!\n");
 			goto err_out;
 		}
+
+		ret = wd_insert_ctx_list(cfg->ctxs[i].ctx, in->alg_name);
+		if (ret) {
+			WD_ERR("failed to add ctx to mem list!\n");
+			goto err_out;
+		}
 	}
 
 	in->ctxs = ctxs;
@@ -318,6 +408,7 @@ void wd_clear_ctx_config(struct wd_ctx_config_internal *in)
 		in->ctxs = NULL;
 	}
 
+	wd_remove_ctx_list();
 	wd_shm_delete(in);
 }
 
@@ -2485,7 +2576,7 @@ static int wd_init_ctx_set(struct wd_init_attrs *attrs, struct uacce_dev_list *l
 
 	/* If the ctx set number is 0, the initialization is skipped. */
 	if (!ctx_set_num)
-		return 0;
+		return -WD_ENOPROC;
 
 	dev = wd_find_dev_by_numa(list, numa_id);
 	if (WD_IS_ERR(dev))
@@ -2573,7 +2664,9 @@ static int wd_init_ctx_and_sched(struct wd_init_attrs *attrs, struct bitmask *bm
 		for (j = 0; j < op_type_num; j++) {
 			ctx_nums = ctx_params->ctx_set_num[j];
 			ret = wd_init_ctx_set(attrs, list, idx, i, j);
-			if (ret)
+			if (ret == -WD_ENOPROC)
+				continue;
+			else if (ret)
 				goto free_ctxs;
 			ret = wd_instance_sched_set(attrs->sched, ctx_nums, idx, i, j);
 			if (ret)
-- 
2.33.0

    

[PATCH 01/30] uadk: add reserved memory handling functionality to uadk

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

ZongYu Wu

tags

participants (1)