From: Zhen Lei thunder.leizhen@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8Z6DC CVE: NA
-------------------------------------------------------------------------
When a core can exclusively own an ECMDQ, competition with other cores does not need to be considered during command insertion. Therefore, we can delete the part of arm_smmu_cmdq_issue_cmdlist() that deals with multi-core contention and generate a more efficient ECMDQ-specific function arm_smmu_ecmdq_issue_cmdlist().
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 91 +++++++++++++++++++++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 + 2 files changed, 94 insertions(+)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 0f554a33673d..40e425fa486f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -840,6 +840,89 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds, } }
+#ifdef CONFIG_ARM_SMMU_V3_ECMDQ +/* + * The function is used when the current core exclusively occupies an ECMDQ. + * This is a reduced version of arm_smmu_cmdq_issue_cmdlist(), which eliminates + * a lot of unnecessary inter-core competition considerations. + */ +static int arm_smmu_ecmdq_issue_cmdlist(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, + u64 *cmds, int n, bool sync) +{ + u32 prod; + unsigned long flags; + struct arm_smmu_ll_queue llq = { + .max_n_shift = cmdq->q.llq.max_n_shift, + }, head; + int ret = 0; + + /* 1. Allocate some space in the queue */ + local_irq_save(flags); + llq.val = READ_ONCE(cmdq->q.llq.val); + do { + u64 old; + + while (!queue_has_space(&llq, n + sync)) { + local_irq_restore(flags); + if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq)) + dev_err_ratelimited(smmu->dev, "ECMDQ timeout\n"); + local_irq_save(flags); + } + + head.cons = llq.cons; + head.prod = queue_inc_prod_n(&llq, n + sync); + + old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val); + if (old == llq.val) + break; + + llq.val = old; + } while (1); + + /* 2. Write our commands into the queue */ + arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n); + if (sync) { + u64 cmd_sync[CMDQ_ENT_DWORDS]; + + prod = queue_inc_prod_n(&llq, n); + arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod); + queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS); + } + + /* 3. Ensuring commands are visible first */ + dma_wmb(); + + /* 4. Advance the hardware prod pointer */ + read_lock(&cmdq->q.ecmdq_lock); + writel_relaxed(head.prod | cmdq->q.ecmdq_prod, cmdq->q.prod_reg); + read_unlock(&cmdq->q.ecmdq_lock); + + /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */ + if (sync) { + llq.prod = queue_inc_prod_n(&llq, n); + ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq); + if (ret) { + dev_err_ratelimited(smmu->dev, + "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n", + llq.prod, + readl_relaxed(cmdq->q.prod_reg), + readl_relaxed(cmdq->q.cons_reg)); + } + + /* + * Update cmdq->q.llq.cons, to improve the success rate of + * queue_has_space() when some new commands are inserted next + * time. + */ + WRITE_ONCE(cmdq->q.llq.cons, llq.cons); + } + + local_irq_restore(flags); + return ret; +} +#endif + /* * This is the actual insertion function, and provides the following * ordering guarantees to callers: @@ -867,6 +950,11 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, struct arm_smmu_ll_queue llq, head; int ret = 0;
+#ifdef CONFIG_ARM_SMMU_V3_ECMDQ + if (!cmdq->shared) + return arm_smmu_ecmdq_issue_cmdlist(smmu, cmdq, cmds, n, sync); +#endif + llq.max_n_shift = cmdq->q.llq.max_n_shift;
/* 1. Allocate some space in the queue */ @@ -3103,6 +3191,9 @@ static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu) struct arm_smmu_cmdq *cmdq = &smmu->cmdq; unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
+#ifdef CONFIG_ARM_SMMU_V3_ECMDQ + cmdq->shared = 1; +#endif atomic_set(&cmdq->owner_prod, 0); atomic_set(&cmdq->lock, 0);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 3c531f8cd71f..4c5a8b774b25 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -576,6 +576,9 @@ struct arm_smmu_cmdq { atomic_long_t *valid_map; atomic_t owner_prod; atomic_t lock; +#ifdef CONFIG_ARM_SMMU_V3_ECMDQ + int shared; +#endif };
#ifdef CONFIG_ARM_SMMU_V3_ECMDQ