From: Yang Shen <shenyang39(a)huawei.com>
In Warpdrive sync mode, the driver writes a tag in the SQE to mark
the owner and checks this tag when a request is completed.
To ensure it functions properly, we added a lock to protect the
receiving process. However, pthread_spin_lock is a CAS lock, which
results in a situation where a thread can repeatedly acquire the lock
while the completed request does not belong to it until timeout.
Therefore, we introduce a fair lock to replace pthread_spin_lock.
This way, every thread can acquire the lock in a round-robin manner.
Signed-off-by: Yang Shen <shenyang39(a)huawei.com>
---
v1/drv/hisi_qm_udrv.c | 39 ++++++++++++---------------------------
v1/drv/hisi_qm_udrv.h | 4 ++--
v1/wd_util.c | 31 +++++++++++++++++++++++++++++++
v1/wd_util.h | 8 ++++++++
4 files changed, 53 insertions(+), 29 deletions(-)
diff --git a/v1/drv/hisi_qm_udrv.c b/v1/drv/hisi_qm_udrv.c
index c711a497..7b0183bc 100644
--- a/v1/drv/hisi_qm_udrv.c
+++ b/v1/drv/hisi_qm_udrv.c
@@ -535,24 +535,11 @@ static int qm_set_queue_info(struct wd_queue *q)
goto err_with_regions;
}
- ret = pthread_spin_init(&info->sd_lock, PTHREAD_PROCESS_PRIVATE);
- if (ret) {
- WD_ERR("failed to init qinfo sd_lock!\n");
- goto free_cache;
- }
-
- ret = pthread_spin_init(&info->rc_lock, PTHREAD_PROCESS_PRIVATE);
- if (ret) {
- WD_ERR("failed to init qinfo rc_lock!\n");
- goto uninit_lock;
- }
+ wd_fair_init(&info->sd_lock);
+ wd_fair_init(&info->rc_lock);
return 0;
-uninit_lock:
- pthread_spin_destroy(&info->sd_lock);
-free_cache:
- free(info->req_cache);
err_with_regions:
qm_unset_queue_regions(q);
return ret;
@@ -593,8 +580,6 @@ void qm_uninit_queue(struct wd_queue *q)
struct q_info *qinfo = q->qinfo;
struct qm_queue_info *info = qinfo->priv;
- pthread_spin_destroy(&info->rc_lock);
- pthread_spin_destroy(&info->sd_lock);
free(info->req_cache);
qm_unset_queue_regions(q);
free(qinfo->priv);
@@ -624,10 +609,10 @@ int qm_send(struct wd_queue *q, void **req, __u32 num)
int ret;
__u32 i;
- pthread_spin_lock(&info->sd_lock);
+ wd_fair_lock(&info->sd_lock);
if (unlikely((__u32)__atomic_load_n(&info->used, __ATOMIC_RELAXED) >
info->sq_depth - num - 1)) {
- pthread_spin_unlock(&info->sd_lock);
+ wd_fair_unlock(&info->sd_lock);
WD_ERR("queue is full!\n");
return -WD_EBUSY;
}
@@ -636,7 +621,7 @@ int qm_send(struct wd_queue *q, void **req, __u32 num)
ret = info->sqe_fill[qinfo->atype](req[i], qinfo->priv,
info->sq_tail_index);
if (unlikely(ret != WD_SUCCESS)) {
- pthread_spin_unlock(&info->sd_lock);
+ wd_fair_unlock(&info->sd_lock);
WD_ERR("sqe fill error, ret %d!\n", ret);
return -WD_EINVAL;
}
@@ -648,7 +633,7 @@ int qm_send(struct wd_queue *q, void **req, __u32 num)
}
ret = qm_tx_update(info, num);
- pthread_spin_unlock(&info->sd_lock);
+ wd_fair_unlock(&info->sd_lock);
return ret;
}
@@ -681,9 +666,9 @@ static int check_ds_rx_base(struct qm_queue_info *info,
return 0;
if (before) {
- pthread_spin_lock(&info->rc_lock);
+ wd_fair_lock(&info->rc_lock);
qm_rx_from_cache(info, resp, num);
- pthread_spin_unlock(&info->rc_lock);
+ wd_fair_unlock(&info->rc_lock);
WD_ERR("wd queue hw error happened before qm receive!\n");
} else {
WD_ERR("wd queue hw error happened after qm receive!\n");
@@ -724,7 +709,7 @@ int qm_recv(struct wd_queue *q, void **resp, __u32 num)
if (unlikely(ret))
return ret;
- pthread_spin_lock(&info->rc_lock);
+ wd_fair_lock(&info->rc_lock);
for (i = 0; i < num; i++) {
cqe = info->cq_base + info->cq_head_index * sizeof(struct cqe);
if (info->cqc_phase != CQE_PHASE(cqe))
@@ -733,7 +718,7 @@ int qm_recv(struct wd_queue *q, void **resp, __u32 num)
mb(); /* make sure the data is all in memory before read */
sq_head = CQE_SQ_HEAD_INDEX(cqe);
if (unlikely(sq_head >= info->sq_depth)) {
- pthread_spin_unlock(&info->rc_lock);
+ wd_fair_unlock(&info->rc_lock);
WD_ERR("CQE_SQ_HEAD_INDEX(%u) error\n", sq_head);
return -WD_EIO;
}
@@ -745,7 +730,7 @@ int qm_recv(struct wd_queue *q, void **resp, __u32 num)
if (!ret) {
break;
} else if (ret < 0) {
- pthread_spin_unlock(&info->rc_lock);
+ wd_fair_unlock(&info->rc_lock);
WD_ERR("recv sqe error %u\n", sq_head);
return ret;
}
@@ -766,7 +751,7 @@ int qm_recv(struct wd_queue *q, void **resp, __u32 num)
ret = i;
}
- pthread_spin_unlock(&info->rc_lock);
+ wd_fair_unlock(&info->rc_lock);
return ret;
}
diff --git a/v1/drv/hisi_qm_udrv.h b/v1/drv/hisi_qm_udrv.h
index 06ac66a0..d71f9de8 100644
--- a/v1/drv/hisi_qm_udrv.h
+++ b/v1/drv/hisi_qm_udrv.h
@@ -166,8 +166,8 @@ struct qm_queue_info {
qm_sqe_parse sqe_parse[WCRYPTO_MAX_ALG];
hisi_qm_sqe_fill_priv sqe_fill_priv;
hisi_qm_sqe_parse_priv sqe_parse_priv;
- pthread_spinlock_t sd_lock;
- pthread_spinlock_t rc_lock;
+ struct wd_fair_lock sd_lock;
+ struct wd_fair_lock rc_lock;
struct wd_queue *q;
int (*sgl_info)(struct hw_sgl_info *info);
int (*sgl_init)(void *pool, struct wd_sgl *sgl);
diff --git a/v1/wd_util.c b/v1/wd_util.c
index 29f65792..3dac2d74 100644
--- a/v1/wd_util.c
+++ b/v1/wd_util.c
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include <stdatomic.h>
#include <stdio.h>
#include <string.h>
#include <dirent.h>
@@ -21,6 +22,7 @@
#include "v1/wd_util.h"
#define BYTE_TO_BIT 8
+#define LOCK_TRY_CNT (0x800000000U)
void wd_spinlock(struct wd_lock *lock)
{
@@ -43,6 +45,35 @@ void wd_unspinlock(struct wd_lock *lock)
__atomic_store_n(&lock->lock, 0, __ATOMIC_RELEASE);
}
+void wd_fair_init(struct wd_fair_lock *lock)
+{
+ atomic_exchange_explicit(&lock->ticket, 0, memory_order_acq_rel);
+ atomic_exchange_explicit(&lock->serving, 0, memory_order_acq_rel);
+}
+
+void wd_fair_lock(struct wd_fair_lock *lock)
+{
+ __u32 my_ticket = atomic_fetch_add_explicit(&lock->ticket, 1,
+ memory_order_acq_rel);
+ __u32 val = atomic_load_explicit(&lock->serving, memory_order_acquire);
+ __u64 cnt = 0;
+
+ if (val == my_ticket)
+ return;
+
+ do {
+ if (++cnt == LOCK_TRY_CNT)
+ WD_ERR("failed to get lock with %lu times\n", LOCK_TRY_CNT);
+
+ val = atomic_load_explicit(&lock->serving, memory_order_acquire);
+ } while (val != my_ticket);
+}
+
+void wd_fair_unlock(struct wd_fair_lock *lock)
+{
+ atomic_fetch_add_explicit(&lock->serving, 1, memory_order_acq_rel);
+}
+
void *drv_iova_map(struct wd_queue *q, void *va, size_t sz)
{
struct q_info *qinfo = q->qinfo;
diff --git a/v1/wd_util.h b/v1/wd_util.h
index 21137d2d..70f9ed92 100644
--- a/v1/wd_util.h
+++ b/v1/wd_util.h
@@ -111,6 +111,11 @@ struct wd_lock {
__u8 lock;
};
+struct wd_fair_lock {
+ volatile __u32 ticket;
+ volatile __u32 serving;
+};
+
struct wd_ss_region {
void *va;
unsigned long long pa;
@@ -392,6 +397,9 @@ static inline uint32_t wd_reg_read(void *reg_addr)
void wd_spinlock(struct wd_lock *lock);
void wd_unspinlock(struct wd_lock *lock);
+void wd_fair_init(struct wd_fair_lock *lock);
+void wd_fair_lock(struct wd_fair_lock *lock);
+void wd_fair_unlock(struct wd_fair_lock *lock);
void *wd_drv_mmap_qfr(struct wd_queue *q, enum uacce_qfrt qfrt, size_t size);
void wd_drv_unmmap_qfr(struct wd_queue *q, void *addr,
enum uacce_qfrt qfrt, size_t size);
--
2.33.0