v2 --> v3: 1. Discard the patches(support gather) that affects the kabi. These patches are not critical to performance optimization.
---> openeuler_defconfig, 11 SSD
non-strict 1313.4 (discard gather patches) 1314.7 1326.6 (all, contain gather patches) 1363.6
strict (after: all) 1357.3 1355.8 1311.1
strict (after: discard) 1248.7 1188.7 1218.5
strict (before) 716.7 728.9 721.6
v1 --> v2: 1. Fix compile error: drivers/uacce/dummy_drv/dummy_wd_v2.c:202:11: error: initialization from incompatible pointer type [-Werror=incompatible-pointer-types] .unmap = dummy_wd2_iommu_unmap, 2. Add a new fix: 862c3715de8f ("iommu: Switch gather->end to the inclusive end") 3. Leave member iotlb_range_add in "struct iommu_ops" as a placeholder
v1: For more information, please refer: http://openeuler.huawei.com/bugzilla/show_bug.cgi?id=21306
Rob Herring (1): iommu: Allow io-pgtable to be used outside of drivers/iommu/
Tom Murphy (1): iommu: Fix flush_tlb_all typo
Will Deacon (9): iommu/io-pgtable: Replace IO_PGTABLE_QUIRK_NO_DMA with specific flag iommu/arm-smmu-v3: Increase maximum size of queues iommu/io-pgtable-arm: Remove redundant call to io_pgtable_tlb_sync() iommu/io-pgtable: Rename iommu_gather_ops to iommu_flush_ops iommu/arm-smmu-v3: Separate s/w and h/w views of prod and cons indexes iommu/arm-smmu-v3: Drop unused 'q' argument from Q_OVF macro iommu/arm-smmu-v3: Move low-level queue fields out of arm_smmu_queue iommu/arm-smmu-v3: Operate directly on low-level queue where possible iommu/arm-smmu-v3: Reduce contention during command-queue insertion
tom (1): iommu: Change tlb_range_add to iotlb_range_add and tlb_sync to iotlb_sync
drivers/iommu/arm-smmu-v3.c | 853 +++++++++++++----- drivers/iommu/arm-smmu.c | 14 +- drivers/iommu/io-pgtable-arm-v7s.c | 16 +- drivers/iommu/io-pgtable-arm.c | 24 +- drivers/iommu/io-pgtable.c | 5 +- drivers/iommu/iommu-pasid-table.h | 2 +- drivers/iommu/ipmmu-vmsa.c | 8 +- drivers/iommu/msm_iommu.c | 6 +- drivers/iommu/mtk_iommu.c | 4 +- drivers/iommu/mtk_iommu.h | 3 +- drivers/iommu/qcom_iommu.c | 6 +- {drivers/iommu => include/linux}/io-pgtable.h | 17 +- include/linux/iommu.h | 6 +- 13 files changed, 685 insertions(+), 279 deletions(-) rename {drivers/iommu => include/linux}/io-pgtable.h (93%)
From: tom murphyt7@tcd.ie
mainline inclusion from mainline-v5.0-rc1 commit 51eb78098ab79bba8b1df24da2304e61deb74629 category: bugfix bugzilla: 21306 CVE: NA
-------------------------------------------------------------------------
Someone forgot to update this comment.
Signed-off-by: Tom Murphy murphyt7@tcd.ie Signed-off-by: Joerg Roedel jroedel@suse.de Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- include/linux/iommu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h index c5cc3c9e393c..bf6e7f736a4a 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -269,8 +269,8 @@ struct iommu_sva_param { * @map: map a physically contiguous memory region to an iommu domain * @unmap: unmap a physically contiguous memory region from an iommu domain * @flush_tlb_all: Synchronously flush all hardware TLBs for this domain - * @tlb_range_add: Add a given iova range to the flush queue for this domain - * @tlb_sync: Flush all queued ranges from the hardware TLBs and empty flush + * @iotlb_range_add: Add a given iova range to the flush queue for this domain + * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush * queue * @iova_to_phys: translate iova to physical address * @add_device: add device to iommu grouping
From: Tom Murphy murphyt7@tcd.ie
mainline inclusion from mainline-v5.1-rc1 commit db04d4a3d72f0c5ee34609559f535d11ab47303c category: bugfix bugzilla: 21306 CVE: NA
-------------------------------------------------------------------------
Fix typo, flush_tlb_all should be flush_iotlb_all.
Signed-off-by: Tom Murphy murphyt7@tcd.ie Signed-off-by: Joerg Roedel jroedel@suse.de Conflicts: include/linux/iommu.h
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- include/linux/iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h index bf6e7f736a4a..d44f3a6762be 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -268,7 +268,7 @@ struct iommu_sva_param { * @mm_invalidate: Invalidate a range of mappings for an mm * @map: map a physically contiguous memory region to an iommu domain * @unmap: unmap a physically contiguous memory region from an iommu domain - * @flush_tlb_all: Synchronously flush all hardware TLBs for this domain + * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain * @iotlb_range_add: Add a given iova range to the flush queue for this domain * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush * queue
From: Rob Herring robh@kernel.org
mainline inclusion from mainline-v5.1-rc1 commit b77cf11f094136a9d7d0ee6a56cf49db1f412871 category: bugfix bugzilla: 21306 CVE: NA
-------------------------------------------------------------------------
Move io-pgtable.h to include/linux/ and export alloc_io_pgtable_ops and free_io_pgtable_ops. This enables drivers outside drivers/iommu/ to use the page table library. Specifically, some ARM Mali GPUs use the ARM page table formats.
Cc: Will Deacon will.deacon@arm.com Cc: Robin Murphy robin.murphy@arm.com Cc: Joerg Roedel joro@8bytes.org Cc: Matthias Brugger matthias.bgg@gmail.com Cc: Rob Clark robdclark@gmail.com Cc: linux-arm-kernel@lists.infradead.org Cc: iommu@lists.linux-foundation.org Cc: linux-mediatek@lists.infradead.org Cc: linux-arm-msm@vger.kernel.org Signed-off-by: Rob Herring robh@kernel.org Signed-off-by: Joerg Roedel jroedel@suse.de Conflicts: drivers/iommu/arm-smmu-v3.c drivers/iommu/io-pgtable-arm.c drivers/iommu/iommu-pasid-table.h
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- drivers/iommu/arm-smmu-v3.c | 2 +- drivers/iommu/arm-smmu.c | 2 +- drivers/iommu/io-pgtable-arm-v7s.c | 3 +-- drivers/iommu/io-pgtable-arm.c | 2 +- drivers/iommu/io-pgtable.c | 5 +++-- drivers/iommu/iommu-pasid-table.h | 2 +- drivers/iommu/ipmmu-vmsa.c | 3 +-- drivers/iommu/msm_iommu.c | 2 +- drivers/iommu/mtk_iommu.h | 3 +-- drivers/iommu/qcom_iommu.c | 2 +- {drivers/iommu => include/linux}/io-pgtable.h | 0 11 files changed, 12 insertions(+), 14 deletions(-) rename {drivers/iommu => include/linux}/io-pgtable.h (100%)
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 940b0138a22c..0f35f5618443 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -30,6 +30,7 @@ #include <linux/dma-iommu.h> #include <linux/err.h> #include <linux/interrupt.h> +#include <linux/io-pgtable.h> #include <linux/iommu.h> #include <linux/iopoll.h> #include <linux/mmu_context.h> @@ -46,7 +47,6 @@ #include <linux/irq.h> #include <linux/amba/bus.h>
-#include "io-pgtable.h" #include "iommu-pasid-table.h"
/* MMIO registers */ diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 5bca4dc638c5..54c7c2c39aed 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -39,6 +39,7 @@ #include <linux/interrupt.h> #include <linux/io.h> #include <linux/io-64-nonatomic-hi-lo.h> +#include <linux/io-pgtable.h> #include <linux/iommu.h> #include <linux/iopoll.h> #include <linux/module.h> @@ -53,7 +54,6 @@
#include <linux/amba/bus.h>
-#include "io-pgtable.h" #include "arm-smmu-regs.h"
/* diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 18a8330e1882..9a8a8870e267 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -35,6 +35,7 @@ #include <linux/atomic.h> #include <linux/dma-mapping.h> #include <linux/gfp.h> +#include <linux/io-pgtable.h> #include <linux/iommu.h> #include <linux/kernel.h> #include <linux/kmemleak.h> @@ -45,8 +46,6 @@
#include <asm/barrier.h>
-#include "io-pgtable.h" - /* Struct accessors */ #define io_pgtable_to_data(x) \ container_of((x), struct arm_v7s_io_pgtable, iop) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index e19d1cf40c09..81edeb047859 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -22,6 +22,7 @@
#include <linux/atomic.h> #include <linux/bitops.h> +#include <linux/io-pgtable.h> #include <linux/iommu.h> #include <linux/kernel.h> #include <linux/sizes.h> @@ -31,7 +32,6 @@
#include <asm/barrier.h>
-#include "io-pgtable.h" #include "io-pgtable-arm.h"
#define ARM_LPAE_MAX_ADDR_BITS 52 diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c index 127558d83667..93f2880be6c6 100644 --- a/drivers/iommu/io-pgtable.c +++ b/drivers/iommu/io-pgtable.c @@ -19,11 +19,10 @@ */
#include <linux/bug.h> +#include <linux/io-pgtable.h> #include <linux/kernel.h> #include <linux/types.h>
-#include "io-pgtable.h" - static const struct io_pgtable_init_fns * io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = { #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE @@ -61,6 +60,7 @@ struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt,
return &iop->ops; } +EXPORT_SYMBOL_GPL(alloc_io_pgtable_ops);
/* * It is the IOMMU driver's responsibility to ensure that the page table @@ -77,3 +77,4 @@ void free_io_pgtable_ops(struct io_pgtable_ops *ops) io_pgtable_tlb_flush_all(iop); io_pgtable_init_table[iop->fmt]->free(iop); } +EXPORT_SYMBOL_GPL(free_io_pgtable_ops); diff --git a/drivers/iommu/iommu-pasid-table.h b/drivers/iommu/iommu-pasid-table.h index a7243579a4cb..1401e0038318 100644 --- a/drivers/iommu/iommu-pasid-table.h +++ b/drivers/iommu/iommu-pasid-table.h @@ -9,7 +9,7 @@
#include <linux/bug.h> #include <linux/types.h> -#include "io-pgtable.h" +#include "linux/io-pgtable.h"
struct mm_struct;
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index d8598e44e381..afeeb7c9753b 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -16,6 +16,7 @@ #include <linux/export.h> #include <linux/interrupt.h> #include <linux/io.h> +#include <linux/io-pgtable.h> #include <linux/iommu.h> #include <linux/module.h> #include <linux/of.h> @@ -37,8 +38,6 @@ #define arm_iommu_detach_device(...) do {} while (0) #endif
-#include "io-pgtable.h" - #define IPMMU_CTX_MAX 8
struct ipmmu_features { diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index fc5f0b53adaf..8419217493bc 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -21,6 +21,7 @@ #include <linux/platform_device.h> #include <linux/errno.h> #include <linux/io.h> +#include <linux/io-pgtable.h> #include <linux/interrupt.h> #include <linux/list.h> #include <linux/spinlock.h> @@ -35,7 +36,6 @@
#include "msm_iommu_hw-8xxx.h" #include "msm_iommu.h" -#include "io-pgtable.h"
#define MRC(reg, processor, op1, crn, crm, op2) \ __asm__ __volatile__ ( \ diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index 778498b8633f..62c2c3e8c5df 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -19,13 +19,12 @@ #include <linux/component.h> #include <linux/device.h> #include <linux/io.h> +#include <linux/io-pgtable.h> #include <linux/iommu.h> #include <linux/list.h> #include <linux/spinlock.h> #include <soc/mediatek/smi.h>
-#include "io-pgtable.h" - struct mtk_iommu_suspend_reg { u32 standard_axi_mode; u32 dcm_dis; diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index ee70e9921cf1..296dbcef856e 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -26,6 +26,7 @@ #include <linux/interrupt.h> #include <linux/io.h> #include <linux/io-64-nonatomic-hi-lo.h> +#include <linux/io-pgtable.h> #include <linux/iommu.h> #include <linux/iopoll.h> #include <linux/kconfig.h> @@ -42,7 +43,6 @@ #include <linux/slab.h> #include <linux/spinlock.h>
-#include "io-pgtable.h" #include "arm-smmu-regs.h"
#define SMMU_INTR_SEL_NS 0x2000 diff --git a/drivers/iommu/io-pgtable.h b/include/linux/io-pgtable.h similarity index 100% rename from drivers/iommu/io-pgtable.h rename to include/linux/io-pgtable.h
From: Will Deacon will@kernel.org
mainline inclusion from mainline-v5.3-rc1 commit 4f41845b340783eaec9cc2840fe3cb9a00574054 category: bugfix bugzilla: 21306 CVE: NA
-------------------------------------------------------------------------
IO_PGTABLE_QUIRK_NO_DMA is a bit of a misnomer, since it's really just an indication of whether or not the page-table walker for the IOMMU is coherent with the CPU caches. Since cache coherency is more than just a quirk, replace the flag with its own field in the io_pgtable_cfg structure.
Cc: Bjorn Andersson bjorn.andersson@linaro.org Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- drivers/iommu/arm-smmu-v3.c | 4 +--- drivers/iommu/arm-smmu.c | 4 +--- drivers/iommu/io-pgtable-arm-v7s.c | 10 +++++----- drivers/iommu/io-pgtable-arm.c | 19 ++++++++----------- drivers/iommu/ipmmu-vmsa.c | 1 + include/linux/io-pgtable.h | 11 ++++------- 6 files changed, 20 insertions(+), 29 deletions(-)
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 0f35f5618443..fd0c0fa991c3 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2039,13 +2039,11 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain, .pgsize_bitmap = smmu->pgsize_bitmap, .ias = ias, .oas = oas, + .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY, .tlb = &arm_smmu_gather_ops, .iommu_dev = smmu->dev, };
- if (smmu->features & ARM_SMMU_FEAT_COHERENCY) - pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; - if (smmu_domain->non_strict) pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 54c7c2c39aed..c61369e5430f 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -878,13 +878,11 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, .pgsize_bitmap = smmu->pgsize_bitmap, .ias = ias, .oas = oas, + .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK, .tlb = smmu_domain->tlb_ops, .iommu_dev = smmu->dev, };
- if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) - pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; - if (smmu_domain->non_strict) pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 9a8a8870e267..8454de93e356 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -215,7 +215,7 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, dev_err(dev, "Page table does not fit in PTE: %pa", &phys); goto out_free; } - if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { + if (table && !cfg->coherent_walk) { dma = dma_map_single(dev, table, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) goto out_free; @@ -249,7 +249,7 @@ static void __arm_v7s_free_table(void *table, int lvl, struct device *dev = cfg->iommu_dev; size_t size = ARM_V7S_TABLE_SIZE(lvl);
- if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) + if (!cfg->coherent_walk) dma_unmap_single(dev, __arm_v7s_dma_addr(table), size, DMA_TO_DEVICE); if (lvl == 1) @@ -261,7 +261,7 @@ static void __arm_v7s_free_table(void *table, int lvl, static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries, struct io_pgtable_cfg *cfg) { - if (cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) + if (cfg->coherent_walk) return;
dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep), @@ -727,7 +727,6 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, IO_PGTABLE_QUIRK_NO_PERMS | IO_PGTABLE_QUIRK_TLBI_ON_MAP | IO_PGTABLE_QUIRK_ARM_MTK_4GB | - IO_PGTABLE_QUIRK_NO_DMA | IO_PGTABLE_QUIRK_NON_STRICT)) return NULL;
@@ -846,7 +845,8 @@ static int __init arm_v7s_do_selftests(void) .tlb = &dummy_tlb_ops, .oas = 32, .ias = 32, - .quirks = IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA, + .coherent_walk = true, + .quirks = IO_PGTABLE_QUIRK_ARM_NS, .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, }; unsigned int iova, size, iova_start; diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 81edeb047859..c6f8d749a566 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -196,7 +196,7 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp, return NULL;
pages = page_address(p); - if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { + if (!cfg->coherent_walk) { dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) goto out_free; @@ -222,7 +222,7 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp, static void __arm_lpae_free_pages(void *pages, size_t size, struct io_pgtable_cfg *cfg) { - if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) + if (!cfg->coherent_walk) dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages), size, DMA_TO_DEVICE); free_pages((unsigned long)pages, get_order(size)); @@ -240,7 +240,7 @@ static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte, { *ptep = pte;
- if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) + if (!cfg->coherent_walk) __arm_lpae_sync_pte(ptep, cfg); }
@@ -316,8 +316,7 @@ static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
old = cmpxchg64_relaxed(ptep, curr, new);
- if ((cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) || - (old & ARM_LPAE_PTE_SW_SYNC)) + if (cfg->coherent_walk || (old & ARM_LPAE_PTE_SW_SYNC)) return old;
/* Even if it's not ours, there's no point waiting; just kick it */ @@ -358,8 +357,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, pte = arm_lpae_install_table(cptep, ptep, 0, cfg); if (pte) __arm_lpae_free_pages(cptep, tblsz, cfg); - } else if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) && - !(pte & ARM_LPAE_PTE_SW_SYNC)) { + } else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) { __arm_lpae_sync_pte(ptep, cfg); }
@@ -732,7 +730,7 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) u64 reg; struct arm_lpae_io_pgtable *data;
- if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA | + if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NON_STRICT)) return NULL;
@@ -825,8 +823,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) struct arm_lpae_io_pgtable *data;
/* The NS quirk doesn't apply at stage 2 */ - if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NO_DMA | - IO_PGTABLE_QUIRK_NON_STRICT)) + if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NON_STRICT)) return NULL;
data = arm_lpae_alloc_pgtable(cfg); @@ -1130,7 +1127,7 @@ static int __init arm_lpae_do_selftests(void) struct io_pgtable_cfg cfg = { .tlb = &dummy_tlb_ops, .oas = 48, - .quirks = IO_PGTABLE_QUIRK_NO_DMA, + .coherent_walk = true, };
for (i = 0; i < ARRAY_SIZE(pgsize); ++i) { diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index afeeb7c9753b..17fa1a739b82 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -431,6 +431,7 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) * TODO: Add support for coherent walk through CCI with DVM and remove * cache handling. For now, delegate it to the io-pgtable code. */ + domain->cfg.coherent_walk = false; domain->cfg.iommu_dev = domain->mmu->root->dev;
/* diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 47d5ae559329..39a1e115aa67 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -43,6 +43,8 @@ struct iommu_gather_ops { * tables. * @ias: Input address (iova) size, in bits. * @oas: Output address (paddr) size, in bits. + * @coherent_walk A flag to indicate whether or not page table walks made + * by the IOMMU are coherent with the CPU caches. * @tlb: TLB management callbacks for this set of tables. * @iommu_dev: The device representing the DMA configuration for the * page table walker. @@ -67,11 +69,6 @@ struct io_pgtable_cfg { * when the SoC is in "4GB mode" and they can only access the high * remap of DRAM (0x1_00000000 to 0x1_ffffffff). * - * IO_PGTABLE_QUIRK_NO_DMA: Guarantees that the tables will only ever - * be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a - * software-emulated IOMMU), such that pagetable updates need not - * be treated as explicit DMA data. - * * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs * on unmap, for DMA domains using the flush queue mechanism for * delayed invalidation. @@ -80,12 +77,12 @@ struct io_pgtable_cfg { #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) #define IO_PGTABLE_QUIRK_TLBI_ON_MAP BIT(2) #define IO_PGTABLE_QUIRK_ARM_MTK_4GB BIT(3) - #define IO_PGTABLE_QUIRK_NO_DMA BIT(4) - #define IO_PGTABLE_QUIRK_NON_STRICT BIT(5) + #define IO_PGTABLE_QUIRK_NON_STRICT BIT(4) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; unsigned int oas; + bool coherent_walk; const struct iommu_gather_ops *tlb; struct device *iommu_dev;
From: Will Deacon will.deacon@arm.com
mainline inclusion from mainline-v5.3-rc1 commit d25f6ead162eab3f51b6616be23691ac42e141b5 category: bugfix bugzilla: 21306 CVE: NA
-------------------------------------------------------------------------
We've been artificially limiting the size of our queues to 4k so that we don't end up allocating huge amounts of physically-contiguous memory at probe time. However, 4k is only enough for 256 commands in the command queue, so instead let's try to allocate the largest queue that the SMMU supports, retrying with a smaller size if the allocation fails.
The caveat here is that we have to limit our upper bound based on CONFIG_CMA_ALIGNMENT to ensure that our queue allocations remain natually aligned, which is required by the SMMU architecture.
Signed-off-by: Will Deacon will.deacon@arm.com Conflicts: drivers/iommu/arm-smmu-v3.c
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- drivers/iommu/arm-smmu-v3.c | 54 ++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 16 deletions(-)
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index fd0c0fa991c3..1d27e7cd2bed 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -208,6 +208,7 @@ #define Q_BASE_RWA (1UL << 62) #define Q_BASE_ADDR_MASK GENMASK_ULL(51, 5) #define Q_BASE_LOG2SIZE GENMASK(4, 0) +#define Q_MAX_SZ_SHIFT (PAGE_SHIFT + CONFIG_CMA_ALIGNMENT)
/* * Stream table. @@ -272,8 +273,9 @@ #define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4)
/* Command queue */ -#define CMDQ_ENT_DWORDS 2 -#define CMDQ_MAX_SZ_SHIFT 8 +#define CMDQ_ENT_SZ_SHIFT 4 +#define CMDQ_ENT_DWORDS ((1 << CMDQ_ENT_SZ_SHIFT) >> 3) +#define CMDQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - CMDQ_ENT_SZ_SHIFT)
#define CMDQ_CONS_ERR GENMASK(30, 24) #define CMDQ_ERR_CERROR_NONE_IDX 0 @@ -318,8 +320,9 @@ #define CMDQ_SYNC_1_MSIADDR_MASK GENMASK_ULL(51, 2)
/* Event queue */ -#define EVTQ_ENT_DWORDS 4 -#define EVTQ_MAX_SZ_SHIFT 7 +#define EVTQ_ENT_SZ_SHIFT 5 +#define EVTQ_ENT_DWORDS ((1 << EVTQ_ENT_SZ_SHIFT) >> 3) +#define EVTQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT)
#define EVTQ_0_ID GENMASK_ULL(7, 0) #define EVT_ID_TRANSLATION_FAULT 0x10 @@ -342,8 +345,9 @@ #define EVTQ_3_IPA GENMASK_ULL(51, 12)
/* PRI queue */ -#define PRIQ_ENT_DWORDS 2 -#define PRIQ_MAX_SZ_SHIFT 8 +#define PRIQ_ENT_SZ_SHIFT 4 +#define PRIQ_ENT_DWORDS ((1 << PRIQ_ENT_SZ_SHIFT) >> 3) +#define PRIQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT)
#define PRIQ_0_SID GENMASK_ULL(31, 0) #define PRIQ_0_SSID GENMASK_ULL(51, 32) @@ -888,7 +892,7 @@ static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent) /* High-level queue accessors */ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) { - memset(cmd, 0, CMDQ_ENT_DWORDS << 3); + memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT); cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
switch (ent->opcode) { @@ -2736,17 +2740,32 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, struct arm_smmu_queue *q, unsigned long prod_off, unsigned long cons_off, - size_t dwords) + size_t dwords, const char *name) { - size_t qsz = ((1 << q->max_n_shift) * dwords) << 3; + size_t qsz; + + do { + qsz = ((1 << q->max_n_shift) * dwords) << 3; + q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, + GFP_KERNEL); + if (q->base || qsz < PAGE_SIZE) + break; + + q->max_n_shift--; + } while (1);
- q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL); if (!q->base) { - dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n", - qsz); + dev_err(smmu->dev, + "failed to allocate queue (0x%zx bytes) for %s\n", + qsz, name); return -ENOMEM; }
+ if (!WARN_ON(q->base_dma & (qsz - 1))) { + dev_info(smmu->dev, "allocated %u entries for %s\n", + 1 << q->max_n_shift, name); + } + q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu); q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu); q->ent_dwords = dwords; @@ -2770,13 +2789,15 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) /* cmdq */ spin_lock_init(&smmu->cmdq.lock); ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD, - ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS); + ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS, + "cmdq"); if (ret) return ret;
/* evtq */ ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD, - ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS); + ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS, + "evtq"); if (ret) return ret;
@@ -2785,7 +2806,8 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) return 0;
return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD, - ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS); + ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS, + "priq"); }
static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu) @@ -3532,7 +3554,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) return -ENXIO; }
- /* Queue sizes, capped at 4k */ + /* Queue sizes, capped to ensure natural alignment */ smmu->cmdq.q.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT, FIELD_GET(IDR1_CMDQS, reg)); if (!smmu->cmdq.q.max_n_shift) {
From: Will Deacon will@kernel.org
mainline inclusion from mainline-v5.4-rc1 commit f71da46719460acd5afa411e52dc8cdf1cb9b0ce category: bugfix bugzilla: 21306 CVE: NA
-------------------------------------------------------------------------
Commit b6b65ca20bc9 ("iommu/io-pgtable-arm: Add support for non-strict mode") added an unconditional call to io_pgtable_tlb_sync() immediately after the case where we replace a block entry with a table entry during an unmap() call. This is redundant, since the IOMMU API will call iommu_tlb_sync() on this path and the patch in question mentions this:
| To save having to reason about it too much, make sure the invalidation | in arm_lpae_split_blk_unmap() just performs its own unconditional sync | to minimise the window in which we're technically violating the break- | before-make requirement on a live mapping. This might work out redundant | with an outer-level sync for strict unmaps, but we'll never be splitting | blocks on a DMA fastpath anyway.
However, this sync gets in the way of deferred TLB invalidation for leaf entries and is at best a questionable, unproven hack. Remove it.
Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- drivers/iommu/io-pgtable-arm-v7s.c | 1 - drivers/iommu/io-pgtable-arm.c | 1 - 2 files changed, 2 deletions(-)
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 8454de93e356..3a43fa3982d5 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -598,7 +598,6 @@ static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data, }
io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); - io_pgtable_tlb_sync(&data->iop); return size; }
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index c6f8d749a566..a726ba694ee3 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -527,7 +527,6 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, tablep = iopte_deref(pte, data); } else if (unmap_idx >= 0) { io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); - io_pgtable_tlb_sync(&data->iop); return size; }
From: Will Deacon will@kernel.org
mainline inclusion from mainline-v5.4-rc1 commit 298f78895b081911e0b3605f07d79ebd3d4cf7b0 category: bugfix bugzilla: 21306 CVE: NA
-------------------------------------------------------------------------
In preparation for TLB flush gathering in the IOMMU API, rename the iommu_gather_ops structure in io-pgtable to iommu_flush_ops, which better describes its purpose and avoids the potential for confusion between different levels of the API.
$ find linux/ -type f -name '*.[ch]' | xargs sed -i 's/gather_ops/flush_ops/g'
Signed-off-by: Will Deacon will@kernel.org Conflicts: drivers/gpu/drm/panfrost/panfrost_mmu.c not exist
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- drivers/iommu/arm-smmu-v3.c | 4 ++-- drivers/iommu/arm-smmu.c | 8 ++++---- drivers/iommu/io-pgtable-arm-v7s.c | 2 +- drivers/iommu/io-pgtable-arm.c | 2 +- drivers/iommu/ipmmu-vmsa.c | 4 ++-- drivers/iommu/msm_iommu.c | 4 ++-- drivers/iommu/mtk_iommu.c | 4 ++-- drivers/iommu/qcom_iommu.c | 4 ++-- include/linux/io-pgtable.h | 6 +++--- 9 files changed, 19 insertions(+), 19 deletions(-)
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 1d27e7cd2bed..b27460b7b709 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1769,7 +1769,7 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, } while (size -= granule); }
-static const struct iommu_gather_ops arm_smmu_gather_ops = { +static const struct iommu_flush_ops arm_smmu_flush_ops = { .tlb_flush_all = arm_smmu_tlb_inv_context, .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, .tlb_sync = arm_smmu_tlb_sync, @@ -2044,7 +2044,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain, .ias = ias, .oas = oas, .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY, - .tlb = &arm_smmu_gather_ops, + .tlb = &arm_smmu_flush_ops, .iommu_dev = smmu->dev, };
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index c61369e5430f..e2a72c71e20d 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -253,7 +253,7 @@ enum arm_smmu_domain_stage { struct arm_smmu_domain { struct arm_smmu_device *smmu; struct io_pgtable_ops *pgtbl_ops; - const struct iommu_gather_ops *tlb_ops; + const struct iommu_flush_ops *tlb_ops; struct arm_smmu_cfg cfg; enum arm_smmu_domain_stage stage; bool non_strict; @@ -535,19 +535,19 @@ static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size, writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); }
-static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = { +static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = { .tlb_flush_all = arm_smmu_tlb_inv_context_s1, .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, .tlb_sync = arm_smmu_tlb_sync_context, };
-static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = { +static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = { .tlb_flush_all = arm_smmu_tlb_inv_context_s2, .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, .tlb_sync = arm_smmu_tlb_sync_context, };
-static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = { +static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = { .tlb_flush_all = arm_smmu_tlb_inv_context_s2, .tlb_add_flush = arm_smmu_tlb_inv_vmid_nosync, .tlb_sync = arm_smmu_tlb_sync_vmid, diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 3a43fa3982d5..f1f781498c37 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -825,7 +825,7 @@ static void dummy_tlb_sync(void *cookie) WARN_ON(cookie != cfg_cookie); }
-static const struct iommu_gather_ops dummy_tlb_ops = { +static const struct iommu_flush_ops dummy_tlb_ops = { .tlb_flush_all = dummy_tlb_flush_all, .tlb_add_flush = dummy_tlb_add_flush, .tlb_sync = dummy_tlb_sync, diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index a726ba694ee3..efd6e994678f 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -986,7 +986,7 @@ static void dummy_tlb_sync(void *cookie) WARN_ON(cookie != cfg_cookie); }
-static const struct iommu_gather_ops dummy_tlb_ops __initconst = { +static const struct iommu_flush_ops dummy_tlb_ops __initconst = { .tlb_flush_all = dummy_tlb_flush_all, .tlb_add_flush = dummy_tlb_add_flush, .tlb_sync = dummy_tlb_sync, diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 17fa1a739b82..8e1601e05fed 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -360,7 +360,7 @@ static void ipmmu_tlb_add_flush(unsigned long iova, size_t size, /* The hardware doesn't support selective TLB flush. */ }
-static const struct iommu_gather_ops ipmmu_gather_ops = { +static const struct iommu_flush_ops ipmmu_flush_ops = { .tlb_flush_all = ipmmu_tlb_flush_all, .tlb_add_flush = ipmmu_tlb_add_flush, .tlb_sync = ipmmu_tlb_flush_all, @@ -424,7 +424,7 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K; domain->cfg.ias = 32; domain->cfg.oas = 40; - domain->cfg.tlb = &ipmmu_gather_ops; + domain->cfg.tlb = &ipmmu_flush_ops; domain->io_domain.geometry.aperture_end = DMA_BIT_MASK(32); domain->io_domain.geometry.force_aperture = true; /* diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 8419217493bc..15bd2a35472f 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -189,7 +189,7 @@ static void __flush_iotlb_sync(void *cookie) */ }
-static const struct iommu_gather_ops msm_iommu_gather_ops = { +static const struct iommu_flush_ops msm_iommu_flush_ops = { .tlb_flush_all = __flush_iotlb, .tlb_add_flush = __flush_iotlb_range, .tlb_sync = __flush_iotlb_sync, @@ -356,7 +356,7 @@ static int msm_iommu_domain_config(struct msm_priv *priv) .pgsize_bitmap = msm_iommu_ops.pgsize_bitmap, .ias = 32, .oas = 32, - .tlb = &msm_iommu_gather_ops, + .tlb = &msm_iommu_flush_ops, .iommu_dev = priv->dev, };
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index f9f69f7111a9..998aa6f3c768 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -196,7 +196,7 @@ static void mtk_iommu_tlb_sync(void *cookie) } }
-static const struct iommu_gather_ops mtk_iommu_gather_ops = { +static const struct iommu_flush_ops mtk_iommu_flush_ops = { .tlb_flush_all = mtk_iommu_tlb_flush_all, .tlb_add_flush = mtk_iommu_tlb_add_flush_nosync, .tlb_sync = mtk_iommu_tlb_sync, @@ -275,7 +275,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom) .pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap, .ias = 32, .oas = 32, - .tlb = &mtk_iommu_gather_ops, + .tlb = &mtk_iommu_flush_ops, .iommu_dev = data->dev, };
diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index 296dbcef856e..bd44dbaae895 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -175,7 +175,7 @@ static void qcom_iommu_tlb_inv_range_nosync(unsigned long iova, size_t size, } }
-static const struct iommu_gather_ops qcom_gather_ops = { +static const struct iommu_flush_ops qcom_flush_ops = { .tlb_flush_all = qcom_iommu_tlb_inv_context, .tlb_add_flush = qcom_iommu_tlb_inv_range_nosync, .tlb_sync = qcom_iommu_tlb_sync, @@ -226,7 +226,7 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain, .pgsize_bitmap = qcom_iommu_ops.pgsize_bitmap, .ias = 32, .oas = 40, - .tlb = &qcom_gather_ops, + .tlb = &qcom_flush_ops, .iommu_dev = qcom_iommu->dev, };
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 39a1e115aa67..989cc89800b5 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -16,7 +16,7 @@ enum io_pgtable_fmt { };
/** - * struct iommu_gather_ops - IOMMU callbacks for TLB and page table management. + * struct iommu_flush_ops - IOMMU callbacks for TLB and page table management. * * @tlb_flush_all: Synchronously invalidate the entire TLB context. * @tlb_add_flush: Queue up a TLB invalidation for a virtual address range. @@ -27,7 +27,7 @@ enum io_pgtable_fmt { * Note that these can all be called in atomic context and must therefore * not block. */ -struct iommu_gather_ops { +struct iommu_flush_ops { void (*tlb_flush_all)(void *cookie); void (*tlb_add_flush)(unsigned long iova, size_t size, size_t granule, bool leaf, void *cookie); @@ -83,7 +83,7 @@ struct io_pgtable_cfg { unsigned int ias; unsigned int oas; bool coherent_walk; - const struct iommu_gather_ops *tlb; + const struct iommu_flush_ops *tlb; struct device *iommu_dev;
/* Low-level data specific to the table format */
From: Will Deacon will@kernel.org
mainline inclusion from mainline-v5.4-rc1 commit 2a8868f16e6b1987cf43f1f46d2a12b7b6ddcd88 category: bugfix bugzilla: 21306 CVE: NA
-------------------------------------------------------------------------
In preparation for rewriting the command queue insertion code to use a new algorithm, separate the software and hardware views of the prod and cons indexes so that manipulating the software state doesn't automatically update the hardware state at the same time.
No functional change.
Tested-by: Ganapatrao Kulkarni gkulkarni@marvell.com Signed-off-by: Will Deacon will@kernel.org Conflicts: drivers/iommu/arm-smmu-v3.c
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- drivers/iommu/arm-smmu-v3.c | 38 ++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-)
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index b27460b7b709..48e97d6eee3f 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -781,17 +781,13 @@ static bool queue_empty(struct arm_smmu_queue *q) Q_WRP(q, q->prod) == Q_WRP(q, q->cons); }
-static void queue_sync_cons(struct arm_smmu_queue *q) +static void queue_sync_cons_in(struct arm_smmu_queue *q) { q->cons = readl_relaxed(q->cons_reg); }
-static void queue_inc_cons(struct arm_smmu_queue *q) +static void queue_sync_cons_out(struct arm_smmu_queue *q) { - u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1; - - q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons); - /* * Ensure that all CPU accesses (reads and writes) to the queue * are complete before we update the cons pointer. @@ -800,7 +796,13 @@ static void queue_inc_cons(struct arm_smmu_queue *q) writel_relaxed(q->cons, q->cons_reg); }
-static int queue_sync_prod(struct arm_smmu_queue *q) +static void queue_inc_cons(struct arm_smmu_queue *q) +{ + u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1; + q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons); +} + +static int queue_sync_prod_in(struct arm_smmu_queue *q) { int ret = 0; u32 prod = readl_relaxed(q->prod_reg); @@ -812,12 +814,15 @@ static int queue_sync_prod(struct arm_smmu_queue *q) return ret; }
+static void queue_sync_prod_out(struct arm_smmu_queue *q) +{ + writel(q->prod, q->prod_reg); +} + static void queue_inc_prod(struct arm_smmu_queue *q) { u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1; - q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod); - writel(q->prod, q->prod_reg); }
/* @@ -834,7 +839,7 @@ static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe) ARM_SMMU_CMDQ_SYNC_TIMEOUT_US : ARM_SMMU_POLL_TIMEOUT_US);
- while (queue_sync_cons(q), (sync ? !queue_empty(q) : queue_full(q))) { + while (queue_sync_cons_in(q), (sync ? !queue_empty(q) : queue_full(q))) { if (ktime_compare(ktime_get(), timeout) > 0) return -ETIMEDOUT;
@@ -868,6 +873,7 @@ static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords); queue_inc_prod(q); + queue_sync_prod_out(q); return 0; }
@@ -886,6 +892,7 @@ static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords); queue_inc_cons(q); + queue_sync_cons_out(q); return 0; }
@@ -1498,7 +1505,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) * Not much we can do on overflow, so scream and pretend we're * trying harder. */ - if (queue_sync_prod(q) == -EOVERFLOW) + if (queue_sync_prod_in(q) == -EOVERFLOW) dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n"); } while (!queue_empty(q));
@@ -1571,7 +1578,7 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev) } }
- if (queue_sync_prod(q) == -EOVERFLOW) + if (queue_sync_prod_in(q) == -EOVERFLOW) dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n"); } while (!queue_empty(q));
@@ -1601,7 +1608,7 @@ static int arm_smmu_flush_queue(struct arm_smmu_device *smmu, u64 batch;
spin_lock(&q->wq.lock); - if (queue_sync_prod(q) == -EOVERFLOW) + if (queue_sync_prod_in(q) == -EOVERFLOW) dev_err(smmu->dev, "%s overflow detected -- requests lost\n", name);
@@ -1733,8 +1740,9 @@ static void arm_smmu_tlb_inv_context(void *cookie) /* * NOTE: when io-pgtable is in non-strict mode, we may get here with * PTEs previously cleared by unmaps on the current CPU not yet visible - * to the SMMU. We are relying on the DSB implicit in queue_inc_prod() - * to guarantee those are observed before the TLBI. Do be careful, 007. + * to the SMMU. We are relying on the DSB implicit in + * queue_sync_prod_out() to guarantee those are observed before the + * TLBI. Do be careful, 007. */ arm_smmu_cmdq_issue_cmd(smmu, &cmd); __arm_smmu_tlb_sync(smmu);
From: Will Deacon will@kernel.org
mainline inclusion from mainline-v5.4-rc1 commit 8a073da07bac169601a1874606e09bdb62811978 category: bugfix bugzilla: 21306 CVE: NA
-------------------------------------------------------------------------
The Q_OVF macro doesn't need to access the arm_smmu_queue structure, so drop the unused macro argument.
No functional change.
Tested-by: Ganapatrao Kulkarni gkulkarni@marvell.com Signed-off-by: Will Deacon will@kernel.org Conflicts: drivers/iommu/arm-smmu-v3.c
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- drivers/iommu/arm-smmu-v3.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 48e97d6eee3f..a9b154e1b5d5 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -201,7 +201,7 @@ #define Q_IDX(q, p) ((p) & ((1 << (q)->max_n_shift) - 1)) #define Q_WRP(q, p) ((p) & (1 << (q)->max_n_shift)) #define Q_OVERFLOW_FLAG (1 << 31) -#define Q_OVF(q, p) ((p) & Q_OVERFLOW_FLAG) +#define Q_OVF(p) ((p) & Q_OVERFLOW_FLAG) #define Q_ENT(q, p) ((q)->base + \ Q_IDX(q, p) * (q)->ent_dwords)
@@ -799,7 +799,7 @@ static void queue_sync_cons_out(struct arm_smmu_queue *q) static void queue_inc_cons(struct arm_smmu_queue *q) { u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1; - q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons); + q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons); }
static int queue_sync_prod_in(struct arm_smmu_queue *q) @@ -807,7 +807,7 @@ static int queue_sync_prod_in(struct arm_smmu_queue *q) int ret = 0; u32 prod = readl_relaxed(q->prod_reg);
- if (Q_OVF(q, prod) != Q_OVF(q, q->prod)) + if (Q_OVF(prod) != Q_OVF(q->prod)) ret = -EOVERFLOW;
q->prod = prod; @@ -822,7 +822,7 @@ static void queue_sync_prod_out(struct arm_smmu_queue *q) static void queue_inc_prod(struct arm_smmu_queue *q) { u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1; - q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod); + q->prod = Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod); }
/* @@ -1510,7 +1510,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) } while (!queue_empty(q));
/* Sync our overflow flag, as we believe we're up to speed */ - q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons); + q->cons = Q_OVF(q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
q->batch++; wake_up_all_locked(&q->wq); @@ -1583,7 +1583,7 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev) } while (!queue_empty(q));
/* Sync our overflow flag, as we believe we're up to speed */ - q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons); + q->cons = Q_OVF(q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons); writel(q->cons, q->cons_reg);
q->batch++;