From: Jason Gunthorpe jgg@nvidia.com
This function automatically uses the right set of invalidation operations, using the limiting logic that SVA had. If the limit is hit then a full S1 invalidation is done instead of trying to do a range.
Implement arm_smmu_tlb_inv_all_s1() in terms of calling this new function with a large range.
Signed-off-by: Jason Gunthorpe jgg@nvidia.com Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 32 ++---------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 52 +++++++++++++------ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 11 ++-- 3 files changed, 48 insertions(+), 47 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index a330c78fd9de..0334b0a8b5b9 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -140,15 +140,6 @@ static void arm_smmu_make_sva_cd(struct arm_smmu_cd *target, target->data[3] = cpu_to_le64(read_sysreg(mair_el1)); }
-/* - * Cloned from the MAX_TLBI_OPS in arch/arm64/include/asm/tlbflush.h, this - * is used as a threshold to replace per-page TLBI commands to issue in the - * command queue with an address-space TLBI command, when SMMU w/o a range - * invalidation feature handles too many per-page TLBI commands, which will - * otherwise result in a soft lockup. - */ -#define CMDQ_MAX_TLBI_OPS (1 << (PAGE_SHIFT - 3)) - static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, @@ -164,25 +155,12 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, * range. So do a simple translation here by calculating size correctly. */ size = end - start; - if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_RANGE_INV)) { - if (size >= CMDQ_MAX_TLBI_OPS * PAGE_SIZE) - size = 0; - } else { - if (size == ULONG_MAX) - size = 0; - } - - if (!smmu_domain->btm_invalidation) { - ioasid_t asid = READ_ONCE(smmu_domain->asid); - - if (!size) - arm_smmu_tlb_inv_all_s1(smmu_domain); - else - arm_smmu_tlb_inv_range_asid(start, size, asid, - PAGE_SIZE, false, - smmu_domain); - } + if (size == ULONG_MAX) + size = 0;
+ if (!smmu_domain->btm_invalidation) + arm_smmu_tlb_inv_range_s1(smmu_domain, start, size, PAGE_SIZE, + false); arm_smmu_atc_inv_domain(smmu_domain, start, size); }
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 588cf7e8448d..df258c44131f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1183,17 +1183,6 @@ static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused }
/* Context descriptor manipulation functions */ -void arm_smmu_tlb_inv_all_s1(struct arm_smmu_domain *smmu_domain) -{ - struct arm_smmu_device *smmu = smmu_domain->smmu; - struct arm_smmu_cmdq_ent cmd = { - .opcode = smmu->features & ARM_SMMU_FEAT_E2H ? - CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID, - .tlbi.asid = READ_ONCE(smmu_domain->asid), - }; - - arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); -}
/* * Based on the value of ent report which bits of the STE the HW will access. It @@ -2376,6 +2365,29 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, arm_smmu_preempt_enable(smmu); }
+static bool arm_smmu_inv_range_too_big(struct arm_smmu_device *smmu, + size_t size, size_t granule) +{ + unsigned int max_ops; + + /* 0 size means invalidate all */ + if (!size || size == SIZE_MAX) + return true; + + if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) + return false; + + /* + * Cloned from the MAX_TLBI_OPS in arch/arm64/include/asm/tlbflush.h, + * this is used as a threshold to replace per-page TLBI commands to + * issue in the command queue with an address-space TLBI command, when + * SMMU w/o a range invalidation feature handles too many per-page TLBI + * commands, which will otherwise result in a soft lockup. + */ + max_ops = 1 << (ilog2(granule) - 3); + return size >= max_ops * granule; +} + static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size, size_t granule, bool leaf, struct arm_smmu_domain *smmu_domain) @@ -2403,20 +2415,28 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size, arm_smmu_atc_inv_domain(smmu_domain, iova, size); }
-void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, - size_t granule, bool leaf, - struct arm_smmu_domain *smmu_domain) +void arm_smmu_tlb_inv_range_s1(struct arm_smmu_domain *smmu_domain, + unsigned long iova, size_t size, + size_t granule, bool leaf) { struct arm_smmu_cmdq_ent cmd = { .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ? CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA, .tlbi = { - .asid = asid, + .asid = READ_ONCE(smmu_domain->asid), .leaf = leaf, }, };
- __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain); + if (arm_smmu_inv_range_too_big(smmu_domain->smmu, size, granule)) { + cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ? + CMDQ_OP_TLBI_EL2_ASID : + CMDQ_OP_TLBI_NH_ASID, + arm_smmu_cmdq_issue_cmd_with_sync(smmu_domain->smmu, &cmd); + } else { + __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, + smmu_domain); + } }
static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 9d1d441c55cf..8315a2f4e661 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -843,10 +843,13 @@ void arm_smmu_remove_pasid(struct arm_smmu_master *master, int arm_smmu_domain_alloc_id(struct arm_smmu_device *smmu, struct arm_smmu_domain *smmu_domain); void arm_smmu_domain_free_id(struct arm_smmu_domain *smmu_domain); -void arm_smmu_tlb_inv_all_s1(struct arm_smmu_domain *smmu_domain); -void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, - size_t granule, bool leaf, - struct arm_smmu_domain *smmu_domain); +void arm_smmu_tlb_inv_range_s1(struct arm_smmu_domain *smmu_domain, + unsigned long iova, size_t size, size_t granule, + bool leaf); +static inline void arm_smmu_tlb_inv_all_s1(struct arm_smmu_domain *smmu_domain) +{ + arm_smmu_tlb_inv_range_s1(smmu_domain, 0, 0, PAGE_SIZE, false); +} int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, unsigned long iova, size_t size);