From: Jason Gunthorpe jgg@nvidia.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IB4WDJ CVE: NA
--------------------------------
The EATS flag needs to flow through the vSTE and into the pSTE, and ensure physical ATS is enabled on the PCI device.
The physical ATS state must match the VM's idea of EATS as we rely on the VM to issue the ATS invalidation commands. Thus ATS must remain off at the device until EATS on a nesting domain turns it on. Attaching a nesting domain is the point where the invalidation responsibility transfers to userspace.
Update the ATS logic to track EATS for nesting domains and flush the ATC whenever the S2 nesting parent changes.
Signed-off-by: Nicolin Chen nicolinc@nvidia.com Tested-by: Nicolin Chen nicolinc@nvidia.com Signed-off-by: Jason Gunthorpe jgg@nvidia.com Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com --- .../arm/arm-smmu-v3/arm-smmu-v3-iommufd.c | 31 ++++++++++++++++--- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 26 +++++++++++++--- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 4 ++- include/uapi/linux/iommufd.h | 2 +- 4 files changed, 53 insertions(+), 10 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c index a1c8fcd4797c..84c8a21c00ae 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c @@ -95,8 +95,6 @@ static int arm_smmu_attach_dev_nested(struct iommu_domain *domain, .master = master, .old_domain = iommu_get_domain_for_dev(dev), .ssid = IOMMU_NO_PASID, - /* Currently invalidation of ATC is not supported */ - .disable_ats = true, }; struct arm_smmu_ste ste; int ret; @@ -107,6 +105,15 @@ static int arm_smmu_attach_dev_nested(struct iommu_domain *domain, return -EBUSY;
mutex_lock(&arm_smmu_asid_lock); + /* + * The VM has to control the actual ATS state at the PCI device because + * we forward the invalidations directly from the VM. If the VM doesn't + * think ATS is on it will not generate ATC flushes and the ATC will + * become incoherent. Since we can't access the actual virtual PCI ATS + * config bit here base this off the EATS value in the STE. If the EATS + * is set then the VM must generate ATC flushes. + */ + state.disable_ats = !nested_domain->enable_ats; ret = arm_smmu_attach_prepare(&state, domain); if (ret) { mutex_unlock(&arm_smmu_asid_lock); @@ -131,8 +138,10 @@ static const struct iommu_domain_ops arm_smmu_nested_ops = { .free = arm_smmu_domain_nested_free, };
-static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg) +static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg, + bool *enable_ats) { + unsigned int eats; unsigned int cfg;
if (!(arg->ste[0] & cpu_to_le64(STRTAB_STE_0_V))) { @@ -149,6 +158,18 @@ static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg) if (cfg != STRTAB_STE_0_CFG_ABORT && cfg != STRTAB_STE_0_CFG_BYPASS && cfg != STRTAB_STE_0_CFG_S1_TRANS) return -EIO; + + /* + * Only Full ATS or ATS UR is supported + * The EATS field will be set by arm_smmu_make_nested_domain_ste() + */ + eats = FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(arg->ste[1])); + arg->ste[1] &= ~cpu_to_le64(STRTAB_STE_1_EATS); + if (eats != STRTAB_STE_1_EATS_ABT && eats != STRTAB_STE_1_EATS_TRANS) + return -EIO; + + if (cfg == STRTAB_STE_0_CFG_S1_TRANS) + *enable_ats = (eats == STRTAB_STE_1_EATS_TRANS); return 0; }
@@ -160,6 +181,7 @@ arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags, const u32 SUPPORTED_FLAGS = IOMMU_HWPT_FAULT_ID_VALID; struct arm_smmu_nested_domain *nested_domain; struct iommu_hwpt_arm_smmuv3 arg; + bool enable_ats = false; int ret;
/* @@ -175,7 +197,7 @@ arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags, if (ret) return ERR_PTR(ret);
- ret = arm_smmu_validate_vste(&arg); + ret = arm_smmu_validate_vste(&arg, &enable_ats); if (ret) return ERR_PTR(ret);
@@ -185,6 +207,7 @@ arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
nested_domain->domain.type = IOMMU_DOMAIN_NESTED; nested_domain->domain.ops = &arm_smmu_nested_ops; + nested_domain->enable_ats = enable_ats; nested_domain->vsmmu = vsmmu; nested_domain->ste[0] = arg.ste[0]; nested_domain->ste[1] = arg.ste[1] & ~cpu_to_le64(STRTAB_STE_1_EATS); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 2ca079df2fa2..0cdb8643aebd 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2306,7 +2306,16 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, if (!master->ats_enabled) continue;
- arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size, &cmd); + if (master_domain->nested_ats_flush) { + /* + * If a S2 used as a nesting parent is changed we have + * no option but to completely flush the ATC. + */ + arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd); + } else { + arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size, + &cmd); + }
for (i = 0; i < master->num_streams; i++) { cmd.atc.sid = master->streams[i].id; @@ -2856,7 +2865,7 @@ static void arm_smmu_disable_pasid(struct arm_smmu_master *master) static struct arm_smmu_master_domain * arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain, struct arm_smmu_master *master, - ioasid_t ssid) + ioasid_t ssid, bool nested_ats_flush) { struct arm_smmu_master_domain *master_domain;
@@ -2865,7 +2874,8 @@ arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain, list_for_each_entry(master_domain, &smmu_domain->devices, devices_elm) { if (master_domain->master == master && - master_domain->ssid == ssid) + master_domain->ssid == ssid && + master_domain->nested_ats_flush == nested_ats_flush) return master_domain; } return NULL; @@ -2896,13 +2906,18 @@ static void arm_smmu_remove_master_domain(struct arm_smmu_master *master, { struct arm_smmu_domain *smmu_domain = to_smmu_domain_devices(domain); struct arm_smmu_master_domain *master_domain; + bool nested_ats_flush = false; unsigned long flags;
if (!smmu_domain) return;
+ if (domain->type == IOMMU_DOMAIN_NESTED) + nested_ats_flush = to_smmu_nested_domain(domain)->enable_ats; + spin_lock_irqsave(&smmu_domain->devices_lock, flags); - master_domain = arm_smmu_find_master_domain(smmu_domain, master, ssid); + master_domain = arm_smmu_find_master_domain(smmu_domain, master, ssid, + nested_ats_flush); if (master_domain) { list_del(&master_domain->devices_elm); kfree(master_domain); @@ -2969,6 +2984,9 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, return -ENOMEM; master_domain->master = master; master_domain->ssid = state->ssid; + if (new_domain->type == IOMMU_DOMAIN_NESTED) + master_domain->nested_ats_flush = + to_smmu_nested_domain(new_domain)->enable_ats;
/* * During prepare we want the current smmu_domain and new diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 900cc4d341c6..8b3dac344213 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -334,7 +334,7 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid) #define STRTAB_STE_1_NESTING_ALLOWED \ cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR | \ STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH | \ - STRTAB_STE_1_S1STALLD) + STRTAB_STE_1_S1STALLD | STRTAB_STE_1_EATS)
/* * Context descriptors. @@ -887,6 +887,7 @@ struct arm_smmu_domain { struct arm_smmu_nested_domain { struct iommu_domain domain; struct arm_vsmmu *vsmmu; + bool enable_ats : 1;
__le64 ste[2]; }; @@ -928,6 +929,7 @@ struct arm_smmu_master_domain { struct list_head devices_elm; struct arm_smmu_master *master; ioasid_t ssid; + bool nested_ats_flush : 1; };
static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 47ee35ce050b..125b51b78ad8 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -404,7 +404,7 @@ struct iommu_hwpt_vtd_s1 { * the translation. Must be little-endian. * Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec) * - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax - * - word-1: S1DSS, S1CIR, S1COR, S1CSH, S1STALLD + * - word-1: EATS, S1DSS, S1CIR, S1COR, S1CSH, S1STALLD * * -EIO will be returned if @ste is not legal or contains any non-allowed field. * Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass