From: Jason Gunthorpe jgg@nvidia.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IB4WDJ CVE: NA
--------------------------------
For SMMUv3 a IOMMU_DOMAIN_NESTED is composed of a S2 iommu_domain acting as the parent and a user provided STE fragment that defines the CD table and related data with addresses translated by the S2 iommu_domain.
The kernel only permits userspace to control certain allowed bits of the STE that are safe for user/guest control.
IOTLB maintenance is a bit subtle here, the S1 implicitly includes the S2 translation, but there is no way of knowing which S1 entries refer to a range of S2.
For the IOTLB we follow ARM's guidance and issue a CMDQ_OP_TLBI_NH_ALL to flush all ASIDs from the VMID after flushing the S2 on any change to the S2.
The IOMMU_DOMAIN_NESTED can only be created from inside a VIOMMU as the invalidation path relies on the VIOMMU to translate virtual stream ID used in the invalidation commands for the CD table and ATS.
Reviewed-by: Nicolin Chen nicolinc@nvidia.com Reviewed-by: Kevin Tian kevin.tian@intel.com Reviewed-by: Jerry Snitselaar jsnitsel@redhat.com Tested-by: Nicolin Chen nicolinc@nvidia.com Signed-off-by: Nicolin Chen nicolinc@nvidia.com Signed-off-by: Jason Gunthorpe jgg@nvidia.com Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com --- .../arm/arm-smmu-v3/arm-smmu-v3-iommufd.c | 163 ++++++++++++++++++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 17 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 26 +++ include/uapi/linux/iommufd.h | 20 +++ 4 files changed, 225 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c index 60dd9e907595..91247a2a2d2c 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c @@ -30,7 +30,170 @@ void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type) return info; }
+static void arm_smmu_make_nested_cd_table_ste( + struct arm_smmu_ste *target, struct arm_smmu_master *master, + struct arm_smmu_nested_domain *nested_domain, bool ats_enabled) +{ + arm_smmu_make_s2_domain_ste( + target, master, nested_domain->vsmmu->s2_parent, ats_enabled); + + target->data[0] = cpu_to_le64(STRTAB_STE_0_V | + FIELD_PREP(STRTAB_STE_0_CFG, + STRTAB_STE_0_CFG_NESTED)); + target->data[0] |= nested_domain->ste[0] & + ~cpu_to_le64(STRTAB_STE_0_CFG); + target->data[1] |= nested_domain->ste[1]; +} + +/* + * Create a physical STE from the virtual STE that userspace provided when it + * created the nested domain. Using the vSTE userspace can request: + * - Non-valid STE + * - Abort STE + * - Bypass STE (install the S2, no CD table) + * - CD table STE (install the S2 and the userspace CD table) + */ +static void arm_smmu_make_nested_domain_ste( + struct arm_smmu_ste *target, struct arm_smmu_master *master, + struct arm_smmu_nested_domain *nested_domain, bool ats_enabled) +{ + unsigned int cfg = + FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(nested_domain->ste[0])); + + /* + * Userspace can request a non-valid STE through the nesting interface. + * We relay that into an abort physical STE with the intention that + * C_BAD_STE for this SID can be generated to userspace. + */ + if (!(nested_domain->ste[0] & cpu_to_le64(STRTAB_STE_0_V))) + cfg = STRTAB_STE_0_CFG_ABORT; + + switch (cfg) { + case STRTAB_STE_0_CFG_S1_TRANS: + arm_smmu_make_nested_cd_table_ste(target, master, nested_domain, + ats_enabled); + break; + case STRTAB_STE_0_CFG_BYPASS: + arm_smmu_make_s2_domain_ste(target, master, + nested_domain->vsmmu->s2_parent, + ats_enabled); + break; + case STRTAB_STE_0_CFG_ABORT: + default: + arm_smmu_make_abort_ste(target); + break; + } +} + +static int arm_smmu_attach_dev_nested(struct iommu_domain *domain, + struct device *dev) +{ + struct arm_smmu_nested_domain *nested_domain = + to_smmu_nested_domain(domain); + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_attach_state state = { + .master = master, + .old_domain = iommu_get_domain_for_dev(dev), + .ssid = IOMMU_NO_PASID, + /* Currently invalidation of ATC is not supported */ + .disable_ats = true, + }; + struct arm_smmu_ste ste; + int ret; + + if (nested_domain->vsmmu->smmu != master->smmu) + return -EINVAL; + if (arm_smmu_ssids_in_use(&master->cd_table)) + return -EBUSY; + + mutex_lock(&arm_smmu_asid_lock); + ret = arm_smmu_attach_prepare(&state, domain); + if (ret) { + mutex_unlock(&arm_smmu_asid_lock); + return ret; + } + + arm_smmu_make_nested_domain_ste(&ste, master, nested_domain, + state.ats_enabled); + arm_smmu_install_ste_for_dev(master, &ste); + arm_smmu_attach_commit(&state); + mutex_unlock(&arm_smmu_asid_lock); + return 0; +} + +static void arm_smmu_domain_nested_free(struct iommu_domain *domain) +{ + kfree(to_smmu_nested_domain(domain)); +} + +static const struct iommu_domain_ops arm_smmu_nested_ops = { + .attach_dev = arm_smmu_attach_dev_nested, + .free = arm_smmu_domain_nested_free, +}; + +static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg) +{ + unsigned int cfg; + + if (!(arg->ste[0] & cpu_to_le64(STRTAB_STE_0_V))) { + memset(arg->ste, 0, sizeof(arg->ste)); + return 0; + } + + /* EIO is reserved for invalid STE data. */ + if ((arg->ste[0] & ~STRTAB_STE_0_NESTING_ALLOWED) || + (arg->ste[1] & ~STRTAB_STE_1_NESTING_ALLOWED)) + return -EIO; + + cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(arg->ste[0])); + if (cfg != STRTAB_STE_0_CFG_ABORT && cfg != STRTAB_STE_0_CFG_BYPASS && + cfg != STRTAB_STE_0_CFG_S1_TRANS) + return -EIO; + return 0; +} + +static struct iommu_domain * +arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags, + const struct iommu_user_data *user_data) +{ + struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core); + const u32 SUPPORTED_FLAGS = IOMMU_HWPT_FAULT_ID_VALID; + struct arm_smmu_nested_domain *nested_domain; + struct iommu_hwpt_arm_smmuv3 arg; + int ret; + + /* + * Faults delivered to the nested domain are faults that originated by + * the S1 in the domain. The core code will match all PASIDs when + * delivering the fault due to user_pasid_table + */ + if (flags & ~SUPPORTED_FLAGS) + return ERR_PTR(-EOPNOTSUPP); + + ret = iommu_copy_struct_from_user(&arg, user_data, + IOMMU_HWPT_DATA_ARM_SMMUV3, ste); + if (ret) + return ERR_PTR(ret); + + ret = arm_smmu_validate_vste(&arg); + if (ret) + return ERR_PTR(ret); + + nested_domain = kzalloc(sizeof(*nested_domain), GFP_KERNEL_ACCOUNT); + if (!nested_domain) + return ERR_PTR(-ENOMEM); + + nested_domain->domain.type = IOMMU_DOMAIN_NESTED; + nested_domain->domain.ops = &arm_smmu_nested_ops; + nested_domain->vsmmu = vsmmu; + nested_domain->ste[0] = arg.ste[0]; + nested_domain->ste[1] = arg.ste[1] & ~cpu_to_le64(STRTAB_STE_1_EATS); + + return &nested_domain->domain; +} + static const struct iommufd_viommu_ops arm_vsmmu_ops = { + .alloc_domain_nested = arm_vsmmu_alloc_domain_nested, };
struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 33282873d8bc..9a4476f6c5b2 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -377,6 +377,7 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) case CMDQ_OP_TLBI_NH_ASID: cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); fallthrough; + case CMDQ_OP_TLBI_NH_ALL: case CMDQ_OP_TLBI_S12_VMALL: cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); break; @@ -2434,6 +2435,15 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size, } __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
+ if (smmu_domain->nest_parent) { + /* + * When the S2 domain changes all the nested S1 ASIDs have to be + * flushed too. + */ + cmd.opcode = CMDQ_OP_TLBI_NH_ALL; + arm_smmu_cmdq_issue_cmd_with_sync(smmu_domain->smmu, &cmd); + } + /* * Unfortunately, this can't be leaf-only since we may have * zapped an entire table. @@ -2869,6 +2879,8 @@ to_smmu_domain_devices(struct iommu_domain *domain) if ((domain->type & __IOMMU_DOMAIN_PAGING) || domain->type == IOMMU_DOMAIN_SVA) return to_smmu_domain(domain); + if (domain->type == IOMMU_DOMAIN_NESTED) + return to_smmu_nested_domain(domain)->vsmmu->s2_parent; return NULL; }
@@ -2941,7 +2953,8 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, * enabled if we have arm_smmu_domain, those always have page * tables. */ - state->ats_enabled = arm_smmu_ats_supported(master); + state->ats_enabled = !state->disable_ats && + arm_smmu_ats_supported(master); }
if (smmu_domain) { @@ -3347,6 +3360,7 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags, goto err_free; } smmu_domain->stage = ARM_SMMU_DOMAIN_S2; + smmu_domain->nest_parent = true; }
smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED; @@ -3966,6 +3980,7 @@ static struct iommu_ops arm_smmu_ops = { .page_response = arm_smmu_page_response, .def_domain_type = arm_smmu_def_domain_type, .viommu_alloc = arm_vsmmu_alloc, + .user_pasid_table = 1, .pgsize_bitmap = -1UL, /* Restricted during device attach */ .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 6f99f4fb5fc4..3bb9c157a6c8 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -274,6 +274,7 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid) #define STRTAB_STE_0_CFG_BYPASS 4 #define STRTAB_STE_0_CFG_S1_TRANS 5 #define STRTAB_STE_0_CFG_S2_TRANS 6 +#define STRTAB_STE_0_CFG_NESTED 7
#define STRTAB_STE_0_S1FMT GENMASK_ULL(5, 4) #define STRTAB_STE_0_S1FMT_LINEAR 0 @@ -324,6 +325,15 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid)
#define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4)
+/* These bits can be controlled by userspace for STRTAB_STE_0_CFG_NESTED */ +#define STRTAB_STE_0_NESTING_ALLOWED \ + cpu_to_le64(STRTAB_STE_0_V | STRTAB_STE_0_CFG | STRTAB_STE_0_S1FMT | \ + STRTAB_STE_0_S1CTXPTR_MASK | STRTAB_STE_0_S1CDMAX) +#define STRTAB_STE_1_NESTING_ALLOWED \ + cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR | \ + STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH | \ + STRTAB_STE_1_S1STALLD) + /* * Context descriptors. * @@ -543,6 +553,7 @@ struct arm_smmu_cmdq_ent { }; } cfgi;
+ #define CMDQ_OP_TLBI_NH_ALL 0x10 #define CMDQ_OP_TLBI_NH_ASID 0x11 #define CMDQ_OP_TLBI_NH_VA 0x12 #define CMDQ_OP_TLBI_EL2_ALL 0x20 @@ -860,6 +871,7 @@ struct arm_smmu_domain { struct list_head devices; spinlock_t devices_lock; bool enforce_cache_coherency : 1; + bool nest_parent : 1;
struct mmu_notifier mmu_notifier;
@@ -869,6 +881,13 @@ struct arm_smmu_domain { #endif };
+struct arm_smmu_nested_domain { + struct iommu_domain domain; + struct arm_vsmmu *vsmmu; + + __le64 ste[2]; +}; + /* The following are exposed for testing purposes. */ struct arm_smmu_entry_writer_ops; struct arm_smmu_entry_writer { @@ -913,6 +932,12 @@ static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) return container_of(dom, struct arm_smmu_domain, domain); }
+static inline struct arm_smmu_nested_domain * +to_smmu_nested_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct arm_smmu_nested_domain, domain); +} + extern struct xarray arm_smmu_asid_xa; extern struct mutex arm_smmu_asid_lock;
@@ -950,6 +975,7 @@ struct arm_smmu_attach_state { struct iommu_domain *old_domain; struct arm_smmu_master *master; bool cd_needs_ats; + bool disable_ats; ioasid_t ssid; /* Resulting state */ bool ats_enabled; diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 27c5117db985..47ee35ce050b 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -396,6 +396,26 @@ struct iommu_hwpt_vtd_s1 { __u32 __reserved; };
+/** + * struct iommu_hwpt_arm_smmuv3 - ARM SMMUv3 nested STE + * (IOMMU_HWPT_DATA_ARM_SMMUV3) + * + * @ste: The first two double words of the user space Stream Table Entry for + * the translation. Must be little-endian. + * Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec) + * - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax + * - word-1: S1DSS, S1CIR, S1COR, S1CSH, S1STALLD + * + * -EIO will be returned if @ste is not legal or contains any non-allowed field. + * Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass + * nested domain will translate the same as the nesting parent. The S1 will + * install a Context Descriptor Table pointing at userspace memory translated + * by the nesting parent. + */ +struct iommu_hwpt_arm_smmuv3 { + __aligned_le64 ste[2]; +}; + /** * enum iommu_hwpt_data_type - IOMMU HWPT Data Type * @IOMMU_HWPT_DATA_NONE: no data