From: Zhen Lei thunder.leizhen@huawei.com
hulk inclusion category: feature bugzilla: 174251 CVE: NA
-------------------------------------------------------------------------
The obvious key to the performance optimization of commit 587e6c10a7ce ("iommu/arm-smmu-v3: Reduce contention during command-queue insertion") is to allow multiple cores to insert commands in parallel after a brief mutex contention.
Obviously, inserting as many commands at a time as possible can reduce the number of times the mutex contention participates, thereby improving the overall performance. At least it reduces the number of calls to function arm_smmu_cmdq_issue_cmdlist().
Therefore, function arm_smmu_cmdq_issue_cmd_with_sync() is added to insert the 'cmd+sync' commands at a time.
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 33 +++++++++++++-------- 1 file changed, 21 insertions(+), 12 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index abde4b9f3c88..f1b6597b1b0e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -856,11 +856,25 @@ static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false); }
-static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) +static int __maybe_unused arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) { return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true); }
+static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq_ent *ent) +{ + u64 cmd[CMDQ_ENT_DWORDS]; + + if (arm_smmu_cmdq_build_cmd(cmd, ent)) { + dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n", + ent->opcode); + return -EINVAL; + } + + return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, true); +} + static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, struct arm_smmu_cmdq_batch *cmds, struct arm_smmu_cmdq_ent *cmd) @@ -949,8 +963,7 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid) .tlbi.asid = asid, };
- arm_smmu_cmdq_issue_cmd(smmu, &cmd); - arm_smmu_cmdq_issue_sync(smmu); + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); }
static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, @@ -1246,8 +1259,7 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid) }, };
- arm_smmu_cmdq_issue_cmd(smmu, &cmd); - arm_smmu_cmdq_issue_sync(smmu); + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); }
static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, @@ -2047,8 +2059,7 @@ static void __arm_smmu_tlb_inv_context(struct arm_smmu_domain *smmu_domain, } else { cmd.opcode = CMDQ_OP_TLBI_S12_VMALL; cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); - arm_smmu_cmdq_issue_sync(smmu); + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); } if (smmu_domain->parent) arm_smmu_atc_inv_domain(smmu_domain->parent, smmu_domain->ssid, @@ -4387,18 +4398,16 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
/* Invalidate any cached configuration */ cmd.opcode = CMDQ_OP_CFGI_ALL; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); - arm_smmu_cmdq_issue_sync(smmu); + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
/* Invalidate any stale TLB entries */ if (smmu->features & ARM_SMMU_FEAT_HYP) { cmd.opcode = CMDQ_OP_TLBI_EL2_ALL; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); }
cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); - arm_smmu_cmdq_issue_sync(smmu); + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
/* Event queue */ writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);