From: Jason Gunthorpe jgg@nvidia.com
BTM support is a feature where the CPU TLB invalidation can be forwarded to the IOMMU and also invalidate the IOTLB. For this to work the CPU and IOMMU ASID must be the same.
Retain the prior SVA design here of keeping the ASID allocator for the IOMMU private to SMMU and force SVA domains to set an ASID that matches the CPU ASID.
This requires changing the ASID assigned to a S1 domain if it happens to be overlapping with the required CPU ASID. We hold on to the CPU ASID so long as the SVA iommu_domain exists, so SVA domain conflict is not possible.
With the asid per-smmu we no longer have a problem that two per-smmu iommu_domain's would need to share a CPU ASID entry in the IOMMU's xarray.
Use the same ASID move algorithm for the S1 domains as before with some streamlining around how the xarray is being used. Do not synchronize the ASID's if BTM mode is not supported. Just leave BTM features off everywhere.
Audit all the places that touch cd->asid and think carefully about how the locking works with the change to the cd->asid by the move algorithm. Use xarray internal locking during xa_alloc() instead of double locking. Add a note that concurrent S1 invalidation doesn't fully work.
Note that this is all still dead code, ARM_SMMU_FEAT_BTM is never set.
Tested-by: Nicolin Chen nicolinc@nvidia.com Signed-off-by: Jason Gunthorpe jgg@nvidia.com Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 133 ++++++++++++++++-- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 15 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 2 +- 3 files changed, 129 insertions(+), 21 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index f49ba81c25f0..e35ae178a7b1 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -14,12 +14,33 @@
static DEFINE_MUTEX(sva_lock);
-static void __maybe_unused -arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain) +static int arm_smmu_realloc_s1_domain_asid(struct arm_smmu_device *smmu, + struct arm_smmu_domain *smmu_domain) { struct arm_smmu_master_domain *master_domain; + u32 old_asid = smmu_domain->cd.asid; struct arm_smmu_cd target_cd; unsigned long flags; + int ret; + + lockdep_assert_held(&smmu->asid_lock); + + /* + * FIXME: The unmap and invalidation path doesn't take any locks but + * this is not fully safe. Since updating the CD tables is not atomic + * there is always a hole where invalidating only one ASID of two active + * ASIDs during unmap will cause the IOTLB to become stale. + * + * This approach is to hopefully shift the racing CPUs to the new ASID + * before we start programming the HW. This increases the chance that + * racing IOPTE changes will pick up an invalidation for the new ASID + * and we achieve eventual consistency. For the brief period where the + * old ASID is still in the CD entries it will become incoherent. + */ + ret = xa_alloc(&smmu->asid_map, &smmu_domain->cd.asid, smmu_domain, + XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); + if (ret) + return ret;
spin_lock_irqsave(&smmu_domain->devices_lock, flags); list_for_each_entry(master_domain, &smmu_domain->devices, devices_elm) { @@ -35,6 +56,10 @@ arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain) &target_cd); } spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + + /* Clean the ASID we are about to assign to a new translation */ + arm_smmu_tlb_inv_asid(smmu, old_asid); + return 0; }
static u64 page_size_to_cd(void) @@ -152,12 +177,12 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, }
if (!smmu_domain->btm_invalidation) { + ioasid_t asid = READ_ONCE(smmu_domain->cd.asid); + if (!size) - arm_smmu_tlb_inv_asid(smmu_domain->smmu, - smmu_domain->cd.asid); + arm_smmu_tlb_inv_asid(smmu_domain->smmu, asid); else - arm_smmu_tlb_inv_range_asid(start, size, - smmu_domain->cd.asid, + arm_smmu_tlb_inv_range_asid(start, size, asid, PAGE_SIZE, false, smmu_domain); } @@ -186,6 +211,8 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) cdptr = arm_smmu_get_cd_ptr(master, master_domain->ssid); if (WARN_ON(!cdptr)) continue; + + /* An SVA ASID never changes, no asid_lock required */ arm_smmu_make_sva_cd(&target, master, NULL, smmu_domain->cd.asid, smmu_domain->btm_invalidation); @@ -381,6 +408,8 @@ static void arm_smmu_sva_domain_free(struct iommu_domain *domain) * unnecessary invalidation. */ arm_smmu_domain_free_id(smmu_domain); + if (smmu_domain->btm_invalidation) + arm64_mm_context_put(domain->mm);
/* * Actual free is defered to the SRCU callback @@ -394,13 +423,97 @@ static const struct iommu_domain_ops arm_smmu_sva_domain_ops = { .free = arm_smmu_sva_domain_free };
+static int arm_smmu_share_asid(struct arm_smmu_device *smmu, + struct arm_smmu_domain *smmu_domain, + struct mm_struct *mm) +{ + struct arm_smmu_domain *old_s1_domain; + int ret; + + /* + * Notice that BTM is never currently enabled, this is all dead code. + * The specification cautions: + * + * Note: Arm expects that SMMU stage 2 address spaces are generally + * shared with their respective PE virtual machine stage 2 + * configuration. If broadcast invalidation is required to be avoided + * for a particular SMMU stage 2 address space, Arm recommends that a + * hypervisor configures the STE with a VMID that is not allocated for + * virtual machine use on the PEs + * + * However, in Linux, both KVM and SMMU think they own the VMID pool. + * Unfortunately the ARM design is problematic for Linux as we do not + * currently share the S2 table with KVM. This creates a situation where + * the S2 needs to have the same VMID as KVM in order to allow the guest + * to use BTM, however we must still invalidate the S2 directly since it + * is a different radix tree. What Linux would like is something like + * ASET for the STE to disable BTM only for the S2. + * + * Arguably in a system with BTM the driver should prefer to use a S1 + * table in all cases execpt when explicitly asked to create a nesting + * parent. Then it should use the VMID of KVM to enable BTM in the + * guest. We cannot optimize away the resulting double invalidation of + * the S2 :( Or we simply ignore BTM entirely as we are doing now. + */ + if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM)) + return xa_alloc(&smmu->asid_map, &smmu_domain->cd.asid, + smmu_domain, + XA_LIMIT(1, (1 << smmu->asid_bits) - 1), + GFP_KERNEL); + + /* At this point the caller ensures we have a mmget() */ + smmu_domain->cd.asid = arm64_mm_context_get(mm); + + mutex_lock(&smmu->asid_lock); + old_s1_domain = xa_store(&smmu->asid_map, smmu_domain->cd.asid, + smmu_domain, GFP_KERNEL); + if (xa_err(old_s1_domain)) { + ret = xa_err(old_s1_domain); + goto out_put_asid; + } + + /* + * In BTM mode the CPU ASID and the IOMMU ASID have to be the same. + * Unfortunately we run separate allocators for this and the IOMMU + * ASID can already have been assigned to a S1 domain. SVA domains + * always align to their CPU ASIDs. In this case we change + * the S1 domain's ASID, update the CD entry and flush the caches. + * + * This is a bit tricky, all the places writing to a S1 CD, reading the + * S1 ASID, or doing xa_erase must hold the asid_lock or xa_lock to + * avoid IOTLB incoherence. + */ + if (old_s1_domain) { + if (WARN_ON(old_s1_domain->domain.type == IOMMU_DOMAIN_SVA)) { + ret = -EINVAL; + goto out_restore_s1; + } + ret = arm_smmu_realloc_s1_domain_asid(smmu, old_s1_domain); + if (ret) + goto out_restore_s1; + } + + smmu_domain->btm_invalidation = true; + + ret = 0; + goto out_unlock; + +out_restore_s1: + xa_store(&smmu->asid_map, smmu_domain->cd.asid, old_s1_domain, + GFP_KERNEL); +out_put_asid: + arm64_mm_context_put(mm); +out_unlock: + mutex_unlock(&smmu->asid_lock); + return ret; +} + struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); struct arm_smmu_device *smmu = master->smmu; struct arm_smmu_domain *smmu_domain; - u32 asid; int ret;
smmu_domain = arm_smmu_domain_alloc(); @@ -411,12 +524,10 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, smmu_domain->domain.ops = &arm_smmu_sva_domain_ops; smmu_domain->smmu = smmu;
- ret = xa_alloc(&smmu->asid_map, &asid, smmu_domain, - XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); + ret = arm_smmu_share_asid(smmu, smmu_domain, mm); if (ret) goto err_free;
- smmu_domain->cd.asid = asid; smmu_domain->mmu_notifier.ops = &arm_smmu_mmu_notifier_ops; ret = mmu_notifier_register(&smmu_domain->mmu_notifier, mm); if (ret) @@ -426,6 +537,8 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev,
err_asid: arm_smmu_domain_free_id(smmu_domain); + if (smmu_domain->btm_invalidation) + arm64_mm_context_put(mm); err_free: kfree(smmu_domain); return ERR_PTR(ret); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 6df98d8d7aeb..fdf5ad269769 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1528,6 +1528,8 @@ void arm_smmu_make_s1_cd(struct arm_smmu_cd *target, typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
+ lockdep_assert_held(&master->smmu->asid_lock); + memset(target, 0, sizeof(*target));
target->data[0] = cpu_to_le64( @@ -2291,7 +2293,7 @@ static void arm_smmu_tlb_inv_context(void *cookie) * careful, 007. */ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { - arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid); + arm_smmu_tlb_inv_asid(smmu, READ_ONCE(smmu_domain->cd.asid)); } else { cmd.opcode = CMDQ_OP_TLBI_S12_VMALL; cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; @@ -2538,17 +2540,10 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu, struct arm_smmu_domain *smmu_domain) { - int ret; - u32 asid; struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
- /* Prevent SVA from modifying the ASID until it is written to the CD */ - mutex_lock(&smmu->asid_lock); - ret = xa_alloc(&smmu->asid_map, &asid, smmu_domain, - XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); - cd->asid = (u16)asid; - mutex_unlock(&smmu->asid_lock); - return ret; + return xa_alloc(&smmu->asid_map, &cd->asid, smmu_domain, + XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); }
static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 3992047fcb71..1cd4161c329e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -633,7 +633,7 @@ struct arm_smmu_strtab_l1_desc { };
struct arm_smmu_ctx_desc { - u16 asid; + u32 asid; };
struct arm_smmu_l1_ctx_desc {