From: Jason Gunthorpe jgg@nvidia.com
Pull all the calculations for building the CD table entry for a mmu_struct into arm_smmu_make_sva_cd().
Call it in the two places installing the SVA CD table entry.
Open code the last caller of arm_smmu_update_ctx_desc_devices() and remove the function.
Remove arm_smmu_write_ctx_desc() since all callers are gone.
Remove quiet_cd since all users are gone, arm_smmu_make_sva_cd() creates the same value.
The behavior of quiet_cd changes slightly, the old implementation edited the CD in place to set CTXDESC_CD_0_TCR_EPD0 assuming it was a SVA CD entry. This version generates a full CD entry with a 0 TTB0 and relies on arm_smmu_write_cd_entry() to install it hitlessly.
Remove no_used_check since this was the flow that could result in an used bit inconsistent CD.
Tested-by: Nicolin Chen nicolinc@nvidia.com Signed-off-by: Jason Gunthorpe jgg@nvidia.com
conflict with 5b658bc56069b: drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 163 +++++++++++------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 103 +---------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 5 - 3 files changed, 110 insertions(+), 161 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index e90aa1dd71fd..5b135ad19252 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -34,25 +34,6 @@ struct arm_smmu_bond {
static DEFINE_MUTEX(sva_lock);
-/* - * Write the CD to the CD tables for all masters that this domain is attached - * to. Note that this is only used to update existing CD entries in the target - * CD table, for which it's assumed that arm_smmu_write_ctx_desc can't fail. - */ -static void arm_smmu_update_ctx_desc_devices(struct arm_smmu_domain *smmu_domain, - int ssid, - struct arm_smmu_ctx_desc *cd) -{ - struct arm_smmu_master *master; - unsigned long flags; - - spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_for_each_entry(master, &smmu_domain->devices, domain_head) { - arm_smmu_write_ctx_desc(master, ssid, cd); - } - spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); -} - static void arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain) { @@ -128,11 +109,91 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid) return NULL; }
+static u64 page_size_to_cd(void) +{ + static_assert(PAGE_SIZE == SZ_4K || PAGE_SIZE == SZ_16K || + PAGE_SIZE == SZ_64K); + if (PAGE_SIZE == SZ_64K) + return ARM_LPAE_TCR_TG0_64K; + if (PAGE_SIZE == SZ_16K) + return ARM_LPAE_TCR_TG0_16K; + return ARM_LPAE_TCR_TG0_4K; +} + +static void arm_smmu_make_sva_cd(struct arm_smmu_cd *target, + struct arm_smmu_master *master, + struct mm_struct *mm, u16 asid) +{ + u64 par; + + memset(target, 0, sizeof(*target)); + + par = cpuid_feature_extract_unsigned_field( + read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1), + ID_AA64MMFR0_EL1_PARANGE_SHIFT); + + target->data[0] = cpu_to_le64( + CTXDESC_CD_0_TCR_EPD1 | +#ifdef __BIG_ENDIAN + CTXDESC_CD_0_ENDI | +#endif + CTXDESC_CD_0_V | + FIELD_PREP(CTXDESC_CD_0_TCR_IPS, par) | + CTXDESC_CD_0_AA64 | + (master->stall_enabled ? CTXDESC_CD_0_S : 0) | + CTXDESC_CD_0_R | + CTXDESC_CD_0_A | + CTXDESC_CD_0_ASET | + FIELD_PREP(CTXDESC_CD_0_ASID, asid)); + + if (master->smmu->features & ARM_SMMU_FEAT_HD) + target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_HD); + if (master->smmu->features & ARM_SMMU_FEAT_HA) + target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_HA); + + /* + * If no MM is passed then this creates a SVA entry that faults + * everything. arm_smmu_write_cd_entry() can hitlessly go between these + * two entries types since TTB0 is ignored by HW when EPD0 is set. + */ + if (mm) { + target->data[0] |= cpu_to_le64( + FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, + 64ULL - vabits_actual) | + FIELD_PREP(CTXDESC_CD_0_TCR_TG0, page_size_to_cd()) | + FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, + ARM_LPAE_TCR_RGN_WBWA) | + FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, + ARM_LPAE_TCR_RGN_WBWA) | + FIELD_PREP(CTXDESC_CD_0_TCR_SH0, ARM_LPAE_TCR_SH_IS)); + + target->data[1] = cpu_to_le64(virt_to_phys(mm->pgd) & + CTXDESC_CD_1_TTB0_MASK); + } else { + target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_EPD0); + + /* + * Disable stall and immediately generate an abort if stall + * disable is permitted. This speeds up cleanup for an unclean + * exit if the device is still doing a lot of DMA. + */ + if (master->stall_enabled && + !(master->smmu->features & ARM_SMMU_FEAT_STALL_FORCE)) + target->data[0] &= + cpu_to_le64(~(CTXDESC_CD_0_S | CTXDESC_CD_0_R)); + } + + /* + * MAIR value is pretty much constant and global, so we can just get it + * from the current CPU register + */ + target->data[3] = cpu_to_le64(read_sysreg(mair_el1)); +} + static struct arm_smmu_ctx_desc *arm_smmu_alloc_shared_cd(struct mm_struct *mm) { u16 asid; int err = 0; - u64 tcr, par, reg; struct arm_smmu_ctx_desc *cd; struct arm_smmu_ctx_desc *ret = NULL;
@@ -166,41 +227,6 @@ static struct arm_smmu_ctx_desc *arm_smmu_alloc_shared_cd(struct mm_struct *mm) if (err) goto out_free_asid;
- /* HA and HD will be filtered out later if not supported by the SMMU */ - tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, 64ULL - vabits_actual) | - FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, ARM_LPAE_TCR_RGN_WBWA) | - FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, ARM_LPAE_TCR_RGN_WBWA) | - FIELD_PREP(CTXDESC_CD_0_TCR_SH0, ARM_LPAE_TCR_SH_IS) | - CTXDESC_CD_0_TCR_HA | CTXDESC_CD_0_TCR_HD | - CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64; - - switch (PAGE_SIZE) { - case SZ_4K: - tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_TG0, ARM_LPAE_TCR_TG0_4K); - break; - case SZ_16K: - tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_TG0, ARM_LPAE_TCR_TG0_16K); - break; - case SZ_64K: - tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_TG0, ARM_LPAE_TCR_TG0_64K); - break; - default: - WARN_ON(1); - err = -EINVAL; - goto out_free_asid; - } - - reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); - par = cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR0_EL1_PARANGE_SHIFT); - tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_IPS, par); - - cd->ttbr = virt_to_phys(mm->pgd); - cd->tcr = tcr; - /* - * MAIR value is pretty much constant and global, so we can just get it - * from the current CPU register - */ - cd->mair = read_sysreg(mair_el1); cd->asid = asid; cd->mm = mm;
@@ -278,6 +304,8 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn); struct arm_smmu_domain *smmu_domain = smmu_mn->domain; + struct arm_smmu_master *master; + unsigned long flags;
mutex_lock(&sva_lock); if (smmu_mn->cleared) { @@ -289,8 +317,19 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) * DMA may still be running. Keep the cd valid to avoid C_BAD_CD events, * but disable translation. */ - arm_smmu_update_ctx_desc_devices(smmu_domain, mm_get_enqcmd_pasid(mm), - &quiet_cd); + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + list_for_each_entry(master, &smmu_domain->devices, domain_head) { + struct arm_smmu_cd target; + struct arm_smmu_cd *cdptr; + + cdptr = arm_smmu_get_cd_ptr(master, mm_get_enqcmd_pasid(mm)); + if (WARN_ON(!cdptr)) + continue; + arm_smmu_make_sva_cd(&target, master, NULL, smmu_mn->cd->asid); + arm_smmu_write_cd_entry(master, mm_get_enqcmd_pasid(mm), cdptr, + &target); + } + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_mn->cd->asid); arm_smmu_atc_inv_domain(smmu_domain, mm_get_enqcmd_pasid(mm), 0, 0); @@ -385,6 +424,8 @@ static int __arm_smmu_sva_bind(struct device *dev, ioasid_t pasid, struct mm_struct *mm) { int ret; + struct arm_smmu_cd target; + struct arm_smmu_cd *cdptr; struct arm_smmu_bond *bond; struct arm_smmu_master *master = dev_iommu_priv_get(dev); struct iommu_domain *domain = iommu_get_domain_for_dev(dev); @@ -411,9 +452,13 @@ static int __arm_smmu_sva_bind(struct device *dev, ioasid_t pasid, goto err_free_bond; }
- ret = arm_smmu_write_ctx_desc(master, pasid, bond->smmu_mn->cd); - if (ret) + cdptr = arm_smmu_get_cd_ptr(master, mm_get_enqcmd_pasid(mm)); + if (!cdptr) { + ret = -ENOMEM; goto err_put_notifier; + } + arm_smmu_make_sva_cd(&target, master, mm, bond->smmu_mn->cd->asid); + arm_smmu_write_cd_entry(master, pasid, cdptr, &target);
list_add(&bond->list, &master->bonds); return 0; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 723d41ae8105..ad37423f7323 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -115,7 +115,6 @@ struct arm_smmu_entry_writer { struct arm_smmu_entry_writer_ops { unsigned int num_entry_qwords; __le64 v_bit; - bool no_used_check; void (*get_used)(const __le64 *entry, __le64 *used); void (*sync)(struct arm_smmu_entry_writer *writer); }; @@ -150,12 +149,6 @@ struct arm_smmu_option_prop { DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa); DEFINE_MUTEX(arm_smmu_asid_lock);
-/* - * Special value used by SVA when a process dies, to quiesce a CD without - * disabling it. - */ -struct arm_smmu_ctx_desc quiet_cd = { 0 }; - static struct arm_smmu_option_prop arm_smmu_options[] = { { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" }, { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"}, @@ -1271,8 +1264,7 @@ static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer, * allowed to set a bit to 1 if the used function doesn't say it * is used. */ - if (!writer->ops->no_used_check) - WARN_ON_ONCE(target[i] & ~target_used[i]); + WARN_ON_ONCE(target[i] & ~target_used[i]);
/* Bits can change because they are not currently being used */ unused_update[i] = (entry[i] & cur_used[i]) | @@ -1281,8 +1273,7 @@ static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer, * Each bit indicates that a used bit in a qword needs to be * changed after unused_update is applied. */ - if ((unused_update[i] & target_used[i]) != - (target[i] & target_used[i])) + if ((unused_update[i] & target_used[i]) != target[i]) used_qword_diff |= 1 << i; } return used_qword_diff; @@ -1378,11 +1369,8 @@ static void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, * in the entry. The target was already sanity checked by * compute_qword_diff(). */ - if (writer->ops->no_used_check) - entry_set(writer, entry, target, 0, num_entry_qwords); - else - WARN_ON_ONCE(entry_set(writer, entry, target, 0, - num_entry_qwords)); + WARN_ON_ONCE( + entry_set(writer, entry, target, 0, num_entry_qwords)); } }
@@ -1433,7 +1421,7 @@ static void arm_smmu_write_cd_l1_desc(__le64 *dst, u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | CTXDESC_L1_DESC_V;
- /* See comment in arm_smmu_write_ctx_desc() */ + /* The HW has 64 bit atomicity with stores to the L2 CD table */ WRITE_ONCE(*dst, cpu_to_le64(val)); }
@@ -1504,7 +1492,6 @@ static const struct arm_smmu_entry_writer_ops arm_smmu_cd_writer_ops = { .sync = arm_smmu_cd_writer_sync_entry, .get_used = arm_smmu_get_cd_used, .v_bit = cpu_to_le64(CTXDESC_CD_0_V), - .no_used_check = true, .num_entry_qwords = sizeof(struct arm_smmu_cd) / sizeof(u64), };
@@ -1567,83 +1554,6 @@ void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid) arm_smmu_write_cd_entry(master, ssid, cdptr, &target); }
-int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, - struct arm_smmu_ctx_desc *cd) -{ - /* - * This function handles the following cases: - * - * (1) Install primary CD, for normal DMA traffic (SSID = IOMMU_NO_PASID = 0). - * (2) Install a secondary CD, for SID+SSID traffic. - * (3) Update ASID of a CD. Atomically write the first 64 bits of the - * CD, then invalidate the old entry and mappings. - * (4) Quiesce the context without clearing the valid bit. Disable - * translation, and ignore any translation fault. - * (5) Remove a secondary CD. - */ - u64 val; - bool cd_live; - struct arm_smmu_cd *cdptr; - struct arm_smmu_cd target; - struct arm_smmu_cd *cdptr = ⌖ - struct arm_smmu_cd *cd_table_entry; - struct arm_smmu_device *smmu = master->smmu; - struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; - - if (WARN_ON(ssid >= (1 << cd_table->s1cdmax))) - return -E2BIG; - - cd_table_entry = arm_smmu_get_cd_ptr(master, ssid); - if (!cd_table_entry) - return -ENOMEM; - - target = *cd_table_entry; - val = le64_to_cpu(cdptr->data[0]); - cd_live = !!(val & CTXDESC_CD_0_V); - - if (!cd) { /* (5) */ - val = 0; - } else if (cd == &quiet_cd) { /* (4) */ - if (!(smmu->features & ARM_SMMU_FEAT_STALL_FORCE)) - val &= ~(CTXDESC_CD_0_S | CTXDESC_CD_0_R); - val |= CTXDESC_CD_0_TCR_EPD0; - } else if (cd_live) { /* (3) */ - val &= ~CTXDESC_CD_0_ASID; - val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid); - /* - * Until CD+TLB invalidation, both ASIDs may be used for tagging - * this substream's traffic - */ - } else { /* (1) and (2) */ - u64 tcr = cd->tcr; - - cdptr->data[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK); - cdptr->data[2] = 0; - cdptr->data[3] = cpu_to_le64(cd->mair); - - if (!(smmu->features & ARM_SMMU_FEAT_HD)) - tcr &= ~CTXDESC_CD_0_TCR_HD; - if (!(smmu->features & ARM_SMMU_FEAT_HA)) - tcr &= ~CTXDESC_CD_0_TCR_HA; - - val = tcr | -#ifdef __BIG_ENDIAN - CTXDESC_CD_0_ENDI | -#endif - CTXDESC_CD_0_R | CTXDESC_CD_0_A | - (cd->mm ? 0 : CTXDESC_CD_0_ASET) | - CTXDESC_CD_0_AA64 | - FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) | - CTXDESC_CD_0_V; - - if (cd_table->stall_enabled) - val |= CTXDESC_CD_0_S; - } - cdptr->data[0] = cpu_to_le64(val); - arm_smmu_write_cd_entry(master, ssid, cd_table_entry, &target); - return 0; -} - static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master) { int ret; @@ -1652,7 +1562,6 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master) struct arm_smmu_device *smmu = master->smmu; struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
- cd_table->stall_enabled = master->stall_enabled; cd_table->s1cdmax = master->ssid_bits; max_contexts = 1 << cd_table->s1cdmax;
@@ -1750,7 +1659,7 @@ arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc) val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span); val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
- /* See comment in arm_smmu_write_ctx_desc() */ + /* The HW has 64 bit atomicity with stores to the L2 STE table */ WRITE_ONCE(*dst, cpu_to_le64(val)); }
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 0fb18cc8cbeb..4374703eae52 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -655,8 +655,6 @@ struct arm_smmu_ctx_desc_cfg { u8 s1fmt; /* log2 of the maximum number of CDs supported by this table */ u8 s1cdmax; - /* Whether CD entries in this table have the stall bit set. */ - u8 stall_enabled:1; };
struct arm_smmu_s2_cfg { @@ -812,7 +810,6 @@ static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
extern struct xarray arm_smmu_asid_xa; extern struct mutex arm_smmu_asid_lock; -extern struct arm_smmu_ctx_desc quiet_cd;
void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid); struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, @@ -824,8 +821,6 @@ void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, struct arm_smmu_cd *cdptr, const struct arm_smmu_cd *target);
-int arm_smmu_write_ctx_desc(struct arm_smmu_master *smmu_master, int ssid, - struct arm_smmu_ctx_desc *cd); void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid); void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, size_t granule, bool leaf,