From: Yang Shen shenyang39@huawei.com
In Warpdrive sync mode, the driver writes a tag in the SQE to mark the owner and checks this tag when a request is completed.
To ensure it functions properly, we added a lock to protect the receiving process. However, pthread_spin_lock is a CAS lock, which results in a situation where a thread can repeatedly acquire the lock while the completed request does not belong to it until timeout.
Therefore, we introduce a fair lock to replace pthread_spin_lock. This way, every thread can acquire the lock in a round-robin manner.
Signed-off-by: Yang Shen shenyang39@huawei.com --- v1/drv/hisi_qm_udrv.c | 39 ++++++++++++--------------------------- v1/drv/hisi_qm_udrv.h | 4 ++-- v1/wd_util.c | 31 +++++++++++++++++++++++++++++++ v1/wd_util.h | 8 ++++++++ 4 files changed, 53 insertions(+), 29 deletions(-)
diff --git a/v1/drv/hisi_qm_udrv.c b/v1/drv/hisi_qm_udrv.c index c711a497..7b0183bc 100644 --- a/v1/drv/hisi_qm_udrv.c +++ b/v1/drv/hisi_qm_udrv.c @@ -535,24 +535,11 @@ static int qm_set_queue_info(struct wd_queue *q) goto err_with_regions; }
- ret = pthread_spin_init(&info->sd_lock, PTHREAD_PROCESS_PRIVATE); - if (ret) { - WD_ERR("failed to init qinfo sd_lock!\n"); - goto free_cache; - } - - ret = pthread_spin_init(&info->rc_lock, PTHREAD_PROCESS_PRIVATE); - if (ret) { - WD_ERR("failed to init qinfo rc_lock!\n"); - goto uninit_lock; - } + wd_fair_init(&info->sd_lock); + wd_fair_init(&info->rc_lock);
return 0;
-uninit_lock: - pthread_spin_destroy(&info->sd_lock); -free_cache: - free(info->req_cache); err_with_regions: qm_unset_queue_regions(q); return ret; @@ -593,8 +580,6 @@ void qm_uninit_queue(struct wd_queue *q) struct q_info *qinfo = q->qinfo; struct qm_queue_info *info = qinfo->priv;
- pthread_spin_destroy(&info->rc_lock); - pthread_spin_destroy(&info->sd_lock); free(info->req_cache); qm_unset_queue_regions(q); free(qinfo->priv); @@ -624,10 +609,10 @@ int qm_send(struct wd_queue *q, void **req, __u32 num) int ret; __u32 i;
- pthread_spin_lock(&info->sd_lock); + wd_fair_lock(&info->sd_lock); if (unlikely((__u32)__atomic_load_n(&info->used, __ATOMIC_RELAXED) > info->sq_depth - num - 1)) { - pthread_spin_unlock(&info->sd_lock); + wd_fair_unlock(&info->sd_lock); WD_ERR("queue is full!\n"); return -WD_EBUSY; } @@ -636,7 +621,7 @@ int qm_send(struct wd_queue *q, void **req, __u32 num) ret = info->sqe_fill[qinfo->atype](req[i], qinfo->priv, info->sq_tail_index); if (unlikely(ret != WD_SUCCESS)) { - pthread_spin_unlock(&info->sd_lock); + wd_fair_unlock(&info->sd_lock); WD_ERR("sqe fill error, ret %d!\n", ret); return -WD_EINVAL; } @@ -648,7 +633,7 @@ int qm_send(struct wd_queue *q, void **req, __u32 num) }
ret = qm_tx_update(info, num); - pthread_spin_unlock(&info->sd_lock); + wd_fair_unlock(&info->sd_lock);
return ret; } @@ -681,9 +666,9 @@ static int check_ds_rx_base(struct qm_queue_info *info, return 0;
if (before) { - pthread_spin_lock(&info->rc_lock); + wd_fair_lock(&info->rc_lock); qm_rx_from_cache(info, resp, num); - pthread_spin_unlock(&info->rc_lock); + wd_fair_unlock(&info->rc_lock); WD_ERR("wd queue hw error happened before qm receive!\n"); } else { WD_ERR("wd queue hw error happened after qm receive!\n"); @@ -724,7 +709,7 @@ int qm_recv(struct wd_queue *q, void **resp, __u32 num) if (unlikely(ret)) return ret;
- pthread_spin_lock(&info->rc_lock); + wd_fair_lock(&info->rc_lock); for (i = 0; i < num; i++) { cqe = info->cq_base + info->cq_head_index * sizeof(struct cqe); if (info->cqc_phase != CQE_PHASE(cqe)) @@ -733,7 +718,7 @@ int qm_recv(struct wd_queue *q, void **resp, __u32 num) mb(); /* make sure the data is all in memory before read */ sq_head = CQE_SQ_HEAD_INDEX(cqe); if (unlikely(sq_head >= info->sq_depth)) { - pthread_spin_unlock(&info->rc_lock); + wd_fair_unlock(&info->rc_lock); WD_ERR("CQE_SQ_HEAD_INDEX(%u) error\n", sq_head); return -WD_EIO; } @@ -745,7 +730,7 @@ int qm_recv(struct wd_queue *q, void **resp, __u32 num) if (!ret) { break; } else if (ret < 0) { - pthread_spin_unlock(&info->rc_lock); + wd_fair_unlock(&info->rc_lock); WD_ERR("recv sqe error %u\n", sq_head); return ret; } @@ -766,7 +751,7 @@ int qm_recv(struct wd_queue *q, void **resp, __u32 num) ret = i; }
- pthread_spin_unlock(&info->rc_lock); + wd_fair_unlock(&info->rc_lock);
return ret; } diff --git a/v1/drv/hisi_qm_udrv.h b/v1/drv/hisi_qm_udrv.h index 06ac66a0..d71f9de8 100644 --- a/v1/drv/hisi_qm_udrv.h +++ b/v1/drv/hisi_qm_udrv.h @@ -166,8 +166,8 @@ struct qm_queue_info { qm_sqe_parse sqe_parse[WCRYPTO_MAX_ALG]; hisi_qm_sqe_fill_priv sqe_fill_priv; hisi_qm_sqe_parse_priv sqe_parse_priv; - pthread_spinlock_t sd_lock; - pthread_spinlock_t rc_lock; + struct wd_fair_lock sd_lock; + struct wd_fair_lock rc_lock; struct wd_queue *q; int (*sgl_info)(struct hw_sgl_info *info); int (*sgl_init)(void *pool, struct wd_sgl *sgl); diff --git a/v1/wd_util.c b/v1/wd_util.c index 29f65792..3dac2d74 100644 --- a/v1/wd_util.c +++ b/v1/wd_util.c @@ -14,6 +14,7 @@ * limitations under the License. */
+#include <stdatomic.h> #include <stdio.h> #include <string.h> #include <dirent.h> @@ -21,6 +22,7 @@ #include "v1/wd_util.h"
#define BYTE_TO_BIT 8 +#define LOCK_TRY_CNT (0x800000000U)
void wd_spinlock(struct wd_lock *lock) { @@ -43,6 +45,35 @@ void wd_unspinlock(struct wd_lock *lock) __atomic_store_n(&lock->lock, 0, __ATOMIC_RELEASE); }
+void wd_fair_init(struct wd_fair_lock *lock) +{ + atomic_exchange_explicit(&lock->ticket, 0, memory_order_acq_rel); + atomic_exchange_explicit(&lock->serving, 0, memory_order_acq_rel); +} + +void wd_fair_lock(struct wd_fair_lock *lock) +{ + __u32 my_ticket = atomic_fetch_add_explicit(&lock->ticket, 1, + memory_order_acq_rel); + __u32 val = atomic_load_explicit(&lock->serving, memory_order_acquire); + __u64 cnt = 0; + + if (val == my_ticket) + return; + + do { + if (++cnt == LOCK_TRY_CNT) + WD_ERR("failed to get lock with %lu times\n", LOCK_TRY_CNT); + + val = atomic_load_explicit(&lock->serving, memory_order_acquire); + } while (val != my_ticket); +} + +void wd_fair_unlock(struct wd_fair_lock *lock) +{ + atomic_fetch_add_explicit(&lock->serving, 1, memory_order_acq_rel); +} + void *drv_iova_map(struct wd_queue *q, void *va, size_t sz) { struct q_info *qinfo = q->qinfo; diff --git a/v1/wd_util.h b/v1/wd_util.h index 21137d2d..70f9ed92 100644 --- a/v1/wd_util.h +++ b/v1/wd_util.h @@ -111,6 +111,11 @@ struct wd_lock { __u8 lock; };
+struct wd_fair_lock { + volatile __u32 ticket; + volatile __u32 serving; +}; + struct wd_ss_region { void *va; unsigned long long pa; @@ -392,6 +397,9 @@ static inline uint32_t wd_reg_read(void *reg_addr)
void wd_spinlock(struct wd_lock *lock); void wd_unspinlock(struct wd_lock *lock); +void wd_fair_init(struct wd_fair_lock *lock); +void wd_fair_lock(struct wd_fair_lock *lock); +void wd_fair_unlock(struct wd_fair_lock *lock); void *wd_drv_mmap_qfr(struct wd_queue *q, enum uacce_qfrt qfrt, size_t size); void wd_drv_unmmap_qfr(struct wd_queue *q, void *addr, enum uacce_qfrt qfrt, size_t size);