From: Jason Gunthorpe jgg@nvidia.com
CD table entries and STE's have the same essential programming sequence, just with different types and sizes.
Have arm_smmu_write_ctx_desc() generate a target CD and call arm_smmu_write_entry() to do the programming. Due to the way the target CD is generated by modifying the existing CD this alone is not enough for the CD callers to be freed of the ordering requirements.
The following patches will make the rest of the CD flow mirror the STE flow with precise CD contents generated in all cases.
Currently the logic can't ensure that the CD always conforms to the used requirements until all the CD generation is moved to the new method. Add a temporary no_used_check to disable the assertions.
Signed-off-by: Michael Shavit mshavit@google.com Tested-by: Nicolin Chen nicolinc@nvidia.com Signed-off-by: Jason Gunthorpe jgg@nvidia.com Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 100 +++++++++++++++----- 1 file changed, 74 insertions(+), 26 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index b406ad5e9975..4e575749a0d6 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -115,11 +115,14 @@ struct arm_smmu_entry_writer { struct arm_smmu_entry_writer_ops { unsigned int num_entry_qwords; __le64 v_bit; + bool no_used_check; void (*get_used)(const __le64 *entry, __le64 *used); void (*sync)(struct arm_smmu_entry_writer *writer); };
-#define NUM_ENTRY_QWORDS (sizeof(struct arm_smmu_ste) / sizeof(u64)) +#define NUM_ENTRY_QWORDS \ + (max(sizeof(struct arm_smmu_ste), sizeof(struct arm_smmu_cd)) / \ + sizeof(u64))
static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = { [EVTQ_MSI_INDEX] = { @@ -1267,7 +1270,8 @@ static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer, * allowed to set a bit to 1 if the used function doesn't say it * is used. */ - WARN_ON_ONCE(target[i] & ~target_used[i]); + if (!writer->ops->no_used_check) + WARN_ON_ONCE(target[i] & ~target_used[i]);
/* Bits can change because they are not currently being used */ unused_update[i] = (entry[i] & cur_used[i]) | @@ -1276,7 +1280,8 @@ static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer, * Each bit indicates that a used bit in a qword needs to be * changed after unused_update is applied. */ - if ((unused_update[i] & target_used[i]) != target[i]) + if ((unused_update[i] & target_used[i]) != + (target[i] & target_used[i])) used_qword_diff |= 1 << i; } return used_qword_diff; @@ -1372,8 +1377,11 @@ static void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, * in the entry. The target was already sanity checked by * compute_qword_diff(). */ - WARN_ON_ONCE( - entry_set(writer, entry, target, 0, num_entry_qwords)); + if (writer->ops->no_used_check) + entry_set(writer, entry, target, 0, num_entry_qwords); + else + WARN_ON_ONCE(entry_set(writer, entry, target, 0, + num_entry_qwords)); } }
@@ -1456,6 +1464,59 @@ static struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, return &l1_desc->l2ptr[idx]; }
+struct arm_smmu_cd_writer { + struct arm_smmu_entry_writer writer; + unsigned int ssid; +}; + +static void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits) +{ + used_bits[0] = cpu_to_le64(CTXDESC_CD_0_V); + if (!(ent[0] & cpu_to_le64(CTXDESC_CD_0_V))) + return; + memset(used_bits, 0xFF, sizeof(struct arm_smmu_cd)); + + /* EPD0 means T0SZ/TG0/IR0/OR0/SH0/TTB0 are IGNORED */ + if (ent[0] & cpu_to_le64(CTXDESC_CD_0_TCR_EPD0)) { + used_bits[0] &= ~cpu_to_le64( + CTXDESC_CD_0_TCR_T0SZ | CTXDESC_CD_0_TCR_TG0 | + CTXDESC_CD_0_TCR_IRGN0 | CTXDESC_CD_0_TCR_ORGN0 | + CTXDESC_CD_0_TCR_SH0); + used_bits[1] &= ~cpu_to_le64(CTXDESC_CD_1_TTB0_MASK); + } +} + +static void arm_smmu_cd_writer_sync_entry(struct arm_smmu_entry_writer *writer) +{ + struct arm_smmu_cd_writer *cd_writer = + container_of(writer, struct arm_smmu_cd_writer, writer); + + arm_smmu_sync_cd(writer->master, cd_writer->ssid, true); +} + +static const struct arm_smmu_entry_writer_ops arm_smmu_cd_writer_ops = { + .sync = arm_smmu_cd_writer_sync_entry, + .get_used = arm_smmu_get_cd_used, + .v_bit = cpu_to_le64(CTXDESC_CD_0_V), + .no_used_check = true, + .num_entry_qwords = sizeof(struct arm_smmu_cd) / sizeof(u64), +}; + +static void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, + struct arm_smmu_cd *cdptr, + const struct arm_smmu_cd *target) +{ + struct arm_smmu_cd_writer cd_writer = { + .writer = { + .ops = &arm_smmu_cd_writer_ops, + .master = master, + }, + .ssid = ssid, + }; + + arm_smmu_write_entry(&cd_writer.writer, cdptr->data, target->data); +} + int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, struct arm_smmu_ctx_desc *cd) { @@ -1473,16 +1534,20 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, u64 val; bool cd_live; struct arm_smmu_cd *cdptr; + struct arm_smmu_cd target; + struct arm_smmu_cd *cdptr = ⌖ + struct arm_smmu_cd *cd_table_entry; struct arm_smmu_device *smmu = master->smmu; struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
if (WARN_ON(ssid >= (1 << cd_table->s1cdmax))) return -E2BIG;
- cdptr = arm_smmu_get_cd_ptr(master, ssid); - if (!cdptr) + cd_table_entry = arm_smmu_get_cd_ptr(master, ssid); + if (!cd_table_entry) return -ENOMEM;
+ target = *cd_table_entry; val = le64_to_cpu(cdptr->data[0]); cd_live = !!(val & CTXDESC_CD_0_V);
@@ -1511,13 +1576,6 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, if (!(smmu->features & ARM_SMMU_FEAT_HA)) tcr &= ~CTXDESC_CD_0_TCR_HA;
- /* - * STE may be live, and the SMMU might read dwords of this CD in any - * order. Ensure that it observes valid values before reading - * V=1. - */ - arm_smmu_sync_cd(master, ssid, true); - val = tcr | #ifdef __BIG_ENDIAN CTXDESC_CD_0_ENDI | @@ -1531,18 +1589,8 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, if (cd_table->stall_enabled) val |= CTXDESC_CD_0_S; } - - /* - * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3 - * "Configuration structures and configuration invalidation completion" - * - * The size of single-copy atomic reads made by the SMMU is - * IMPLEMENTATION DEFINED but must be at least 64 bits. Any single - * field within an aligned 64-bit span of a structure can be altered - * without first making the structure invalid. - */ - WRITE_ONCE(cdptr->data[0], cpu_to_le64(val)); - arm_smmu_sync_cd(master, ssid, true); + cdptr->data[0] = cpu_to_le64(val); + arm_smmu_write_cd_entry(master, ssid, cd_table_entry, &target); return 0; }