SMMU HTTU sync from 5.10
Jean-Philippe Brucker (1): iommu/arm-smmu-v3: Add support for Hardware Translation Table Update
Keqian Zhu (3): iommu: Introduce dirty log tracking framework vfio/iommu_type1: Fix the logic of updating num_non_hwdbm_domains iommu: Stop tracking the dirty log status of iommu_domain
Kunkun Jiang (18): iommu/io-pgtable-arm: Add quirk ARM_HD and ARM_BBMLx iommu/io-pgtable-arm: Fix attach device failed when smmuv3 supports HTTU iommu/io-pgtable-arm: Add and realize split_block ops iommu/io-pgtable-arm: Add and realize merge_page ops iommu/io-pgtable-arm: Add and realize sync_dirty_log ops iommu/io-pgtable-arm: Add and realize clear_dirty_log ops iommu/arm-smmu-v3: Add feature detection for BBML iommu/arm-smmu-v3: Realize switch_dirty_log iommu ops iommu/arm-smmu-v3: Realize sync_dirty_log iommu ops iommu/arm-smmu-v3: Realize clear_dirty_log iommu ops iommu/arm-smmu-v3: Realize support_dirty_log iommu ops vfio/iommu_type1: Add HWDBM status maintenance vfio/iommu_type1: Optimize dirty bitmap population based on iommu HWDBM vfio/iommu_type1: Add support for manual dirty log clear vfio/iommu_type1: replace kvmalloc with kvzalloc and kfree with kvfree iommu/arm-smmu-v3: Enable HTTU for stage1 with io-pgtable mapping iommu: Fix compliation failure caused by iommu_device_register vfio/iommu: Fix uncorrect type parameters which used in bitmap operations
jiaqingtong (9): fix conflit with 41e1eb2546e9c8200d32f11e4b47d86d156a5a97("iommu/io-pgtable-arm: Prepare PTE methods for handling multiple entries") fix conflit with 41e1eb2546e9c8200d32f11e4b47d86d156a5a97("iommu/io-pgtable-arm: Prepare PTE methods for handling multiple entries") previes commits delete __arm_lpae_set_pte, add it back. Fixes: 1fe27be5ffec ("iommu/io-pgtable-arm: Implement arm_lpae_unmap_pages()") fix d86d9f910d53 ,change iommu_pgsize's para previes commits delete __arm_lpae_set_pte, add it back. Fixes: 1fe27be5ffec ("iommu/io-pgtable-arm: Implement arm_lpae_unmap_pages()") vfio_domain delete port member. Fixes: 71cfafda9c9b ("vfio: Move the Intel no-snoop control off of IOMMU_CACHE") iommu:Introduce dirty log tracking framework: conflit with 9abe2ac834851a7d0b0756e295cf7a292c45ca53 change to arm_lpae_install_table to accept arm_lpae_iopte previes commits delete __arm_lpae_set_pte, add it back. Fixes: 1fe27be5ffec ("iommu/io-pgtable-arm: Implement arm_lpae_unmap_pages()") conflit with 9a630a4b,need move to iommu_domain_ops conflit with ec288a2c, change func
.../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 2 + drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 268 +++++++++++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 17 +- drivers/iommu/io-pgtable-arm.c | 411 +++++++++++++++++- drivers/iommu/iommu.c | 188 +++++++- drivers/vfio/vfio_iommu_type1.c | 314 ++++++++++++- include/linux/io-pgtable.h | 23 + include/linux/iommu.h | 67 +++ include/trace/events/iommu.h | 63 +++ include/uapi/linux/vfio.h | 36 +- 10 files changed, 1365 insertions(+), 24 deletions(-)
base-commit: f898c7c8c095242ad10bfff5c1f8c52c64488f96
From: Kunkun Jiang jiangkunkun@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I3ZUKK CVE: NA
------------------------------
These features are essential to support dirty log tracking for SMMU with io-pgtable mapping.
The dirty state information is encoded using the access permission bits AP[2] (stage 1) or S2AP[1] (stage 2) in conjunction with the DBM (Dirty Bit Modifier) bit, where DBM means writable and AP[2]/ S2AP[1] means dirty.
When has ARM_HD, we set DBM bit for S1 mapping. As SMMU nested mode is not upstreamed for now, we just aim to support dirty log tracking for stage1 with io-pgtable mapping (means not support SVA).
Co-developed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com Reviewed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/iommu/io-pgtable-arm.c | 8 +++++++- include/linux/io-pgtable.h | 11 +++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 72dcdd468cf3..5c4033ada65a 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -75,6 +75,7 @@
#define ARM_LPAE_PTE_NSTABLE (((arm_lpae_iopte)1) << 63) #define ARM_LPAE_PTE_XN (((arm_lpae_iopte)3) << 53) +#define ARM_LPAE_PTE_DBM (((arm_lpae_iopte)1) << 51) #define ARM_LPAE_PTE_AF (((arm_lpae_iopte)1) << 10) #define ARM_LPAE_PTE_SH_NS (((arm_lpae_iopte)0) << 8) #define ARM_LPAE_PTE_SH_OS (((arm_lpae_iopte)2) << 8) @@ -84,7 +85,7 @@
#define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << 2) /* Ignore the contiguous bit for block splitting */ -#define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)6) << 52) +#define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)13) << 51) #define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK | \ ARM_LPAE_PTE_ATTR_HI_MASK) /* Software bit for solving coherency races */ @@ -400,6 +401,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, int prot) { + struct io_pgtable_cfg *cfg = &data->iop.cfg; arm_lpae_iopte pte;
if (data->iop.fmt == ARM_64_LPAE_S1 || @@ -407,6 +409,10 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, pte = ARM_LPAE_PTE_nG; if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ)) pte |= ARM_LPAE_PTE_AP_RDONLY; + else if (data->iop.fmt == ARM_64_LPAE_S1 && + cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD) + pte |= ARM_LPAE_PTE_DBM; + if (!(prot & IOMMU_PRIV)) pte |= ARM_LPAE_PTE_AP_UNPRIV; } else { diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 1b7a44b35616..84b39e820d0e 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -85,6 +85,14 @@ struct io_pgtable_cfg { * * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability * attributes set in the TCR for a non-coherent page-table walker. + * + * IO_PGTABLE_QUIRK_ARM_HD: Support hardware management of dirty status. + * + * IO_PGTABLE_QUIRK_ARM_BBML1: ARM SMMU supports BBM Level 1 behavior + * when changing block size. + * + * IO_PGTABLE_QUIRK_ARM_BBML2: ARM SMMU supports BBM Level 2 behavior + * when changing block size. */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) @@ -92,6 +100,9 @@ struct io_pgtable_cfg { #define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT BIT(4) #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) + #define IO_PGTABLE_QUIRK_ARM_HD BIT(7) + #define IO_PGTABLE_QUIRK_ARM_BBML1 BIT(8) + #define IO_PGTABLE_QUIRK_ARM_BBML2 BIT(9) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias;
From: Kunkun Jiang jiangkunkun@huawei.com
virt inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4V39D CVE: NA
------------------------------
We will add IO_PGTABLE_QUIRK_ARM_HD to pgtbl_cfg->quirks when smmuv3 supports HTTU. However, this flag is not processed in the arm_64_lpae_alloc_pgtable_s1. As a result, device can't be attached. There are two similar flags: IO_PGTABLE_QUIRK_ARM_BBML1 and IO_PGTABLE_QUIRK_ARM_BBML2. This patch solves this problem.
Fixes: 341497bb2613 (iommu/io-pgtable-arm: Add quirk ARM_HD and ARM_BBMLx) Reported-by: Junxin Chen chenjunxin1@huawei.com Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com Reviewed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/iommu/io-pgtable-arm.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 5c4033ada65a..825ac51a5559 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -810,7 +810,10 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_ARM_TTBR1 | - IO_PGTABLE_QUIRK_ARM_OUTER_WBWA)) + IO_PGTABLE_QUIRK_ARM_OUTER_WBWA | + IO_PGTABLE_QUIRK_ARM_HD | + IO_PGTABLE_QUIRK_ARM_BBML1 | + IO_PGTABLE_QUIRK_ARM_BBML2)) return NULL;
data = arm_lpae_alloc_pgtable(cfg); @@ -912,7 +915,9 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) typeof(&cfg->arm_lpae_s2_cfg.vtcr) vtcr = &cfg->arm_lpae_s2_cfg.vtcr;
/* The NS quirk doesn't apply at stage 2 */ - if (cfg->quirks) + if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_HD | + IO_PGTABLE_QUIRK_ARM_BBML1 | + IO_PGTABLE_QUIRK_ARM_BBML2)) return NULL;
data = arm_lpae_alloc_pgtable(cfg);
From: Kunkun Jiang jiangkunkun@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I3ZUKK CVE: NA
------------------------------
Block(largepage) mapping is not a proper granule for dirty log tracking. Take an extreme example, if DMA writes one byte, under 1G mapping, the dirty amount reported is 1G, but under 4K mapping, the dirty amount is just 4K.
This splits block descriptor to an span of page descriptors. BBML1 or BBML2 feature is required.
Spliting block is designed to be only used by dirty log tracking, which does not concurrently work with other pgtable ops that access underlying page table, so race condition does not exist.
Co-developed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com Reviewed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/iommu/io-pgtable-arm.c | 122 +++++++++++++++++++++++++++++++++ include/linux/io-pgtable.h | 2 + 2 files changed, 124 insertions(+)
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 825ac51a5559..89a73ef4b41e 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -82,6 +82,8 @@ #define ARM_LPAE_PTE_SH_IS (((arm_lpae_iopte)3) << 8) #define ARM_LPAE_PTE_NS (((arm_lpae_iopte)1) << 5) #define ARM_LPAE_PTE_VALID (((arm_lpae_iopte)1) << 0) +/* Block descriptor bits */ +#define ARM_LPAE_PTE_NT (((arm_lpae_iopte)1) << 16)
#define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << 2) /* Ignore the contiguous bit for block splitting */ @@ -717,6 +719,125 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, return iopte_to_paddr(pte, data) | iova; }
+static size_t __arm_lpae_split_block(struct arm_lpae_io_pgtable *data, + unsigned long iova, size_t size, int lvl, + arm_lpae_iopte *ptep); + +static size_t arm_lpae_do_split_blk(struct arm_lpae_io_pgtable *data, + unsigned long iova, size_t size, + arm_lpae_iopte blk_pte, int lvl, + arm_lpae_iopte *ptep) +{ + struct io_pgtable_cfg *cfg = &data->iop.cfg; + arm_lpae_iopte pte, *tablep; + phys_addr_t blk_paddr; + size_t tablesz = ARM_LPAE_GRANULE(data); + size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data); + int i; + + if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) + return 0; + + tablep = __arm_lpae_alloc_pages(tablesz, GFP_ATOMIC, cfg); + if (!tablep) + return 0; + + blk_paddr = iopte_to_paddr(blk_pte, data); + pte = iopte_prot(blk_pte); + for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) + __arm_lpae_init_pte(data, blk_paddr, pte, lvl, &tablep[i]); + + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_BBML1) { + /* Race does not exist */ + blk_pte |= ARM_LPAE_PTE_NT; + __arm_lpae_set_pte(ptep, blk_pte, cfg); + io_pgtable_tlb_flush_walk(&data->iop, iova, size, size); + } + /* Race does not exist */ + pte = arm_lpae_install_table(tablep, ptep, blk_pte, cfg); + + /* Have splited it into page? */ + if (lvl == (ARM_LPAE_MAX_LEVELS - 1)) + return size; + + /* Go back to lvl - 1 */ + ptep -= ARM_LPAE_LVL_IDX(iova, lvl - 1, data); + return __arm_lpae_split_block(data, iova, size, lvl - 1, ptep); +} + +static size_t __arm_lpae_split_block(struct arm_lpae_io_pgtable *data, + unsigned long iova, size_t size, int lvl, + arm_lpae_iopte *ptep) +{ + arm_lpae_iopte pte; + struct io_pgtable *iop = &data->iop; + size_t base, next_size, total_size; + + if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) + return 0; + + ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); + pte = READ_ONCE(*ptep); + if (WARN_ON(!pte)) + return 0; + + if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) { + if (iopte_leaf(pte, lvl, iop->fmt)) { + if (lvl == (ARM_LPAE_MAX_LEVELS - 1) || + (pte & ARM_LPAE_PTE_AP_RDONLY)) + return size; + + /* We find a writable block, split it. */ + return arm_lpae_do_split_blk(data, iova, size, pte, + lvl + 1, ptep); + } else { + /* If it is the last table level, then nothing to do */ + if (lvl == (ARM_LPAE_MAX_LEVELS - 2)) + return size; + + total_size = 0; + next_size = ARM_LPAE_BLOCK_SIZE(lvl + 1, data); + ptep = iopte_deref(pte, data); + for (base = 0; base < size; base += next_size) + total_size += __arm_lpae_split_block(data, + iova + base, next_size, lvl + 1, + ptep); + return total_size; + } + } else if (iopte_leaf(pte, lvl, iop->fmt)) { + WARN(1, "Can't split behind a block.\n"); + return 0; + } + + /* Keep on walkin */ + ptep = iopte_deref(pte, data); + return __arm_lpae_split_block(data, iova, size, lvl + 1, ptep); +} + +static size_t arm_lpae_split_block(struct io_pgtable_ops *ops, + unsigned long iova, size_t size) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &data->iop.cfg; + arm_lpae_iopte *ptep = data->pgd; + int lvl = data->start_level; + long iaext = (s64)iova >> cfg->ias; + + if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size)) + return 0; + + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) + iaext = ~iaext; + if (WARN_ON(iaext)) + return 0; + + /* If it is smallest granule, then nothing to do */ + if (size == ARM_LPAE_BLOCK_SIZE(ARM_LPAE_MAX_LEVELS - 1, data)) + return size; + + return __arm_lpae_split_block(data, iova, size, lvl, ptep); +} + static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg) { unsigned long granule, page_sizes; @@ -795,6 +916,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) .map_pages = arm_lpae_map_pages, .unmap_pages = arm_lpae_unmap_pages, .iova_to_phys = arm_lpae_iova_to_phys, + .split_block = arm_lpae_split_block, };
return data; diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 84b39e820d0e..388f0a281de4 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -177,6 +177,8 @@ struct io_pgtable_ops { struct iommu_iotlb_gather *gather); phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops, unsigned long iova); + size_t (*split_block)(struct io_pgtable_ops *ops, unsigned long iova, + size_t size); };
/**
Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/iommu/io-pgtable-arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 89a73ef4b41e..d220642b2645 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -745,7 +745,7 @@ static size_t arm_lpae_do_split_blk(struct arm_lpae_io_pgtable *data, blk_paddr = iopte_to_paddr(blk_pte, data); pte = iopte_prot(blk_pte); for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) - __arm_lpae_init_pte(data, blk_paddr, pte, lvl, &tablep[i]); + __arm_lpae_init_pte(data, blk_paddr, pte, lvl, 1, &tablep[i]);
if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_BBML1) { /* Race does not exist */
From: Kunkun Jiang jiangkunkun@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I3ZUKK CVE: NA
------------------------------
If block(largepage) mappings are split during start dirty log, then when stop dirty log, we need to recover them for better DMA performance.
This recovers block mappings and unmap the span of page mappings. BBML1 or BBML2 feature is required.
Merging page is designed to be only used by dirty log tracking, which does not concurrently work with other pgtable ops that access underlying page table, so race condition does not exist.
Co-developed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com Reviewed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/iommu/io-pgtable-arm.c | 78 ++++++++++++++++++++++++++++++++++ include/linux/io-pgtable.h | 2 + 2 files changed, 80 insertions(+)
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index d220642b2645..8b4ce1a6b511 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -838,6 +838,83 @@ static size_t arm_lpae_split_block(struct io_pgtable_ops *ops, return __arm_lpae_split_block(data, iova, size, lvl, ptep); }
+static size_t __arm_lpae_merge_page(struct arm_lpae_io_pgtable *data, + unsigned long iova, phys_addr_t paddr, + size_t size, int lvl, arm_lpae_iopte *ptep, + arm_lpae_iopte prot) +{ + arm_lpae_iopte pte, *tablep; + struct io_pgtable *iop = &data->iop; + struct io_pgtable_cfg *cfg = &data->iop.cfg; + + if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) + return 0; + + ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); + pte = READ_ONCE(*ptep); + if (WARN_ON(!pte)) + return 0; + + if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) { + if (iopte_leaf(pte, lvl, iop->fmt)) + return size; + + /* Race does not exist */ + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_BBML1) { + prot |= ARM_LPAE_PTE_NT; + __arm_lpae_init_pte(data, paddr, prot, lvl, ptep); + io_pgtable_tlb_flush_walk(iop, iova, size, + ARM_LPAE_GRANULE(data)); + + prot &= ~(ARM_LPAE_PTE_NT); + __arm_lpae_init_pte(data, paddr, prot, lvl, ptep); + } else { + __arm_lpae_init_pte(data, paddr, prot, lvl, ptep); + } + + tablep = iopte_deref(pte, data); + __arm_lpae_free_pgtable(data, lvl + 1, tablep); + return size; + } else if (iopte_leaf(pte, lvl, iop->fmt)) { + /* The size is too small, already merged */ + return size; + } + + /* Keep on walkin */ + ptep = iopte_deref(pte, data); + return __arm_lpae_merge_page(data, iova, paddr, size, lvl + 1, ptep, prot); +} + +static size_t arm_lpae_merge_page(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t size, int iommu_prot) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &data->iop.cfg; + arm_lpae_iopte *ptep = data->pgd; + int lvl = data->start_level; + arm_lpae_iopte prot; + long iaext = (s64)iova >> cfg->ias; + + if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size)) + return 0; + + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) + iaext = ~iaext; + if (WARN_ON(iaext || paddr >> cfg->oas)) + return 0; + + /* If no access, then nothing to do */ + if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE))) + return size; + + /* If it is smallest granule, then nothing to do */ + if (size == ARM_LPAE_BLOCK_SIZE(ARM_LPAE_MAX_LEVELS - 1, data)) + return size; + + prot = arm_lpae_prot_to_pte(data, iommu_prot); + return __arm_lpae_merge_page(data, iova, paddr, size, lvl, ptep, prot); +} + static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg) { unsigned long granule, page_sizes; @@ -917,6 +994,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) .unmap_pages = arm_lpae_unmap_pages, .iova_to_phys = arm_lpae_iova_to_phys, .split_block = arm_lpae_split_block, + .merge_page = arm_lpae_merge_page, };
return data; diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 388f0a281de4..9f8c80cda0ed 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -179,6 +179,8 @@ struct io_pgtable_ops { unsigned long iova); size_t (*split_block)(struct io_pgtable_ops *ops, unsigned long iova, size_t size); + size_t (*merge_page)(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t phys, size_t size, int prot); };
/**
Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/iommu/io-pgtable-arm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 8b4ce1a6b511..2967d9f836a6 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -862,14 +862,14 @@ static size_t __arm_lpae_merge_page(struct arm_lpae_io_pgtable *data, /* Race does not exist */ if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_BBML1) { prot |= ARM_LPAE_PTE_NT; - __arm_lpae_init_pte(data, paddr, prot, lvl, ptep); + __arm_lpae_init_pte(data, paddr, prot, lvl, 1, ptep); io_pgtable_tlb_flush_walk(iop, iova, size, ARM_LPAE_GRANULE(data));
prot &= ~(ARM_LPAE_PTE_NT); - __arm_lpae_init_pte(data, paddr, prot, lvl, ptep); + __arm_lpae_init_pte(data, paddr, prot, lvl, 1, ptep); } else { - __arm_lpae_init_pte(data, paddr, prot, lvl, ptep); + __arm_lpae_init_pte(data, paddr, prot, lvl, 1, ptep); }
tablep = iopte_deref(pte, data);
From: Kunkun Jiang jiangkunkun@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I3ZUKK CVE: NA
------------------------------
During dirty log tracking, user will try to retrieve dirty log from iommu if it supports hardware dirty log. Scan leaf TTD and treat it is dirty if it's writable. As we just set DBM bit for stage1 mapping, so check whether AP[2] is not set.
Co-developed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com Reviewed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/iommu/io-pgtable-arm.c | 89 ++++++++++++++++++++++++++++++++++ include/linux/io-pgtable.h | 4 ++ 2 files changed, 93 insertions(+)
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 2967d9f836a6..95a99c01794c 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -915,6 +915,94 @@ static size_t arm_lpae_merge_page(struct io_pgtable_ops *ops, unsigned long iova return __arm_lpae_merge_page(data, iova, paddr, size, lvl, ptep, prot); }
+static int __arm_lpae_sync_dirty_log(struct arm_lpae_io_pgtable *data, + unsigned long iova, size_t size, + int lvl, arm_lpae_iopte *ptep, + unsigned long *bitmap, + unsigned long base_iova, + unsigned long bitmap_pgshift) +{ + arm_lpae_iopte pte; + struct io_pgtable *iop = &data->iop; + size_t base, next_size; + unsigned long offset; + int nbits, ret; + + if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) + return -EINVAL; + + ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); + pte = READ_ONCE(*ptep); + if (WARN_ON(!pte)) + return -EINVAL; + + if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) { + if (iopte_leaf(pte, lvl, iop->fmt)) { + if (pte & ARM_LPAE_PTE_AP_RDONLY) + return 0; + + /* It is writable, set the bitmap */ + nbits = size >> bitmap_pgshift; + offset = (iova - base_iova) >> bitmap_pgshift; + bitmap_set(bitmap, offset, nbits); + return 0; + } + /* Current level is table, traverse next level */ + next_size = ARM_LPAE_BLOCK_SIZE(lvl + 1, data); + ptep = iopte_deref(pte, data); + for (base = 0; base < size; base += next_size) { + ret = __arm_lpae_sync_dirty_log(data, iova + base, + next_size, lvl + 1, ptep, bitmap, + base_iova, bitmap_pgshift); + if (ret) + return ret; + } + return 0; + } else if (iopte_leaf(pte, lvl, iop->fmt)) { + if (pte & ARM_LPAE_PTE_AP_RDONLY) + return 0; + + /* Though the size is too small, also set bitmap */ + nbits = size >> bitmap_pgshift; + offset = (iova - base_iova) >> bitmap_pgshift; + bitmap_set(bitmap, offset, nbits); + return 0; + } + + /* Keep on walkin */ + ptep = iopte_deref(pte, data); + return __arm_lpae_sync_dirty_log(data, iova, size, lvl + 1, ptep, + bitmap, base_iova, bitmap_pgshift); +} + +static int arm_lpae_sync_dirty_log(struct io_pgtable_ops *ops, + unsigned long iova, size_t size, + unsigned long *bitmap, + unsigned long base_iova, + unsigned long bitmap_pgshift) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &data->iop.cfg; + arm_lpae_iopte *ptep = data->pgd; + int lvl = data->start_level; + long iaext = (s64)iova >> cfg->ias; + + if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size)) + return -EINVAL; + + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) + iaext = ~iaext; + if (WARN_ON(iaext)) + return -EINVAL; + + if (data->iop.fmt != ARM_64_LPAE_S1 && + data->iop.fmt != ARM_32_LPAE_S1) + return -EINVAL; + + return __arm_lpae_sync_dirty_log(data, iova, size, lvl, ptep, + bitmap, base_iova, bitmap_pgshift); +} + static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg) { unsigned long granule, page_sizes; @@ -995,6 +1083,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) .iova_to_phys = arm_lpae_iova_to_phys, .split_block = arm_lpae_split_block, .merge_page = arm_lpae_merge_page, + .sync_dirty_log = arm_lpae_sync_dirty_log, };
return data; diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 9f8c80cda0ed..72eee0080e17 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -181,6 +181,10 @@ struct io_pgtable_ops { size_t size); size_t (*merge_page)(struct io_pgtable_ops *ops, unsigned long iova, phys_addr_t phys, size_t size, int prot); + int (*sync_dirty_log)(struct io_pgtable_ops *ops, + unsigned long iova, size_t size, + unsigned long *bitmap, unsigned long base_iova, + unsigned long bitmap_pgshift); };
/**
From: Kunkun Jiang jiangkunkun@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I3ZUKK CVE: NA
------------------------------
After dirty log is retrieved, user should clear dirty log to re-enable dirty log tracking for these dirtied pages. This clears the dirty state (As we just set DBM bit for stage1 mapping, so should set the AP[2] bit) of these leaf TTDs that are specified by the user provided bitmap.
Co-developed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com Reviewed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/iommu/io-pgtable-arm.c | 93 ++++++++++++++++++++++++++++++++++ include/linux/io-pgtable.h | 4 ++ 2 files changed, 97 insertions(+)
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 95a99c01794c..6ad083bd34c3 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -1003,6 +1003,98 @@ static int arm_lpae_sync_dirty_log(struct io_pgtable_ops *ops, bitmap, base_iova, bitmap_pgshift); }
+static int __arm_lpae_clear_dirty_log(struct arm_lpae_io_pgtable *data, + unsigned long iova, size_t size, + int lvl, arm_lpae_iopte *ptep, + unsigned long *bitmap, + unsigned long base_iova, + unsigned long bitmap_pgshift) +{ + arm_lpae_iopte pte; + struct io_pgtable *iop = &data->iop; + unsigned long offset; + size_t base, next_size; + int nbits, ret, i; + + if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) + return -EINVAL; + + ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); + pte = READ_ONCE(*ptep); + if (WARN_ON(!pte)) + return -EINVAL; + + if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) { + if (iopte_leaf(pte, lvl, iop->fmt)) { + if (pte & ARM_LPAE_PTE_AP_RDONLY) + return 0; + + /* Ensure all corresponding bits are set */ + nbits = size >> bitmap_pgshift; + offset = (iova - base_iova) >> bitmap_pgshift; + for (i = offset; i < offset + nbits; i++) { + if (!test_bit(i, bitmap)) + return 0; + } + + /* Race does not exist */ + pte |= ARM_LPAE_PTE_AP_RDONLY; + __arm_lpae_set_pte(ptep, pte, &iop->cfg); + return 0; + } + /* Current level is table, traverse next level */ + next_size = ARM_LPAE_BLOCK_SIZE(lvl + 1, data); + ptep = iopte_deref(pte, data); + for (base = 0; base < size; base += next_size) { + ret = __arm_lpae_clear_dirty_log(data, iova + base, + next_size, lvl + 1, ptep, bitmap, + base_iova, bitmap_pgshift); + if (ret) + return ret; + } + return 0; + } else if (iopte_leaf(pte, lvl, iop->fmt)) { + /* Though the size is too small, it is already clean */ + if (pte & ARM_LPAE_PTE_AP_RDONLY) + return 0; + + return -EINVAL; + } + + /* Keep on walkin */ + ptep = iopte_deref(pte, data); + return __arm_lpae_clear_dirty_log(data, iova, size, lvl + 1, ptep, + bitmap, base_iova, bitmap_pgshift); +} + +static int arm_lpae_clear_dirty_log(struct io_pgtable_ops *ops, + unsigned long iova, size_t size, + unsigned long *bitmap, + unsigned long base_iova, + unsigned long bitmap_pgshift) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &data->iop.cfg; + arm_lpae_iopte *ptep = data->pgd; + int lvl = data->start_level; + long iaext = (s64)iova >> cfg->ias; + + if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size)) + return -EINVAL; + + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) + iaext = ~iaext; + if (WARN_ON(iaext)) + return -EINVAL; + + if (data->iop.fmt != ARM_64_LPAE_S1 && + data->iop.fmt != ARM_32_LPAE_S1) + return -EINVAL; + + return __arm_lpae_clear_dirty_log(data, iova, size, lvl, ptep, + bitmap, base_iova, bitmap_pgshift); +} + static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg) { unsigned long granule, page_sizes; @@ -1084,6 +1176,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) .split_block = arm_lpae_split_block, .merge_page = arm_lpae_merge_page, .sync_dirty_log = arm_lpae_sync_dirty_log, + .clear_dirty_log = arm_lpae_clear_dirty_log, };
return data; diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 72eee0080e17..6f5efc6ffbd1 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -185,6 +185,10 @@ struct io_pgtable_ops { unsigned long iova, size_t size, unsigned long *bitmap, unsigned long base_iova, unsigned long bitmap_pgshift); + int (*clear_dirty_log)(struct io_pgtable_ops *ops, + unsigned long iova, size_t size, + unsigned long *bitmap, unsigned long base_iova, + unsigned long bitmap_pgshift); };
/**
From: Kunkun Jiang jiangkunkun@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I3ZUKK CVE: NA
------------------------------
This detects BBML feature and if SMMU supports it, transfer BBMLx quirk to io-pgtable.
Co-developed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com Reviewed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 19 +++++++++++++++++++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 6 ++++++ 2 files changed, 25 insertions(+)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index d0422bb13724..4bad986cdd76 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2229,6 +2229,11 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain, .iommu_dev = smmu->dev, };
+ if (smmu->features & ARM_SMMU_FEAT_BBML1) + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_BBML1; + else if (smmu->features & ARM_SMMU_FEAT_BBML2) + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_BBML2; + pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); if (!pgtbl_ops) return -ENOMEM; @@ -3681,6 +3686,20 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
/* IDR3 */ reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3); + switch (FIELD_GET(IDR3_BBML, reg)) { + case IDR3_BBML0: + break; + case IDR3_BBML1: + smmu->features |= ARM_SMMU_FEAT_BBML1; + break; + case IDR3_BBML2: + smmu->features |= ARM_SMMU_FEAT_BBML2; + break; + default: + dev_err(smmu->dev, "unknown/unsupported BBM behavior level\n"); + return -ENXIO; + } + if (FIELD_GET(IDR3_RIL, reg)) smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 269b6fa705d4..44771da77816 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -51,6 +51,10 @@ #define IDR1_SIDSIZE GENMASK(5, 0)
#define ARM_SMMU_IDR3 0xc +#define IDR3_BBML GENMASK(12, 11) +#define IDR3_BBML0 0 +#define IDR3_BBML1 1 +#define IDR3_BBML2 2 #define IDR3_RIL (1 << 10)
#define ARM_SMMU_IDR5 0x14 @@ -646,6 +650,8 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_SVA (1 << 17) #define ARM_SMMU_FEAT_E2H (1 << 18) #define ARM_SMMU_FEAT_NESTING (1 << 19) +#define ARM_SMMU_FEAT_BBML1 (1 << 20) +#define ARM_SMMU_FEAT_BBML2 (1 << 21) u32 features;
#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/iommu/io-pgtable-arm.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 6ad083bd34c3..404c87422338 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -244,13 +244,19 @@ static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries, sizeof(*ptep) * num_entries, DMA_TO_DEVICE); }
-static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg) +static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte, + struct io_pgtable_cfg *cfg) { - - *ptep = 0; + *ptep = pte;
if (!cfg->coherent_walk) - __arm_lpae_sync_pte(ptep, 1, cfg); + __arm_lpae_sync_pte(ptep, cfg); +} + +static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg) +{ + + __arm_lpae_set_pte(ptep, 0, cfg); }
static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
From: Kunkun Jiang jiangkunkun@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I3ZUKK CVE: NA
------------------------------
This realizes switch_dirty_log. In order to get finer dirty granule, it invokes arm_smmu_split_block when start dirty log, and invokes arm_smmu_merge_page() to recover block mapping when stop dirty log.
Co-developed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com Reviewed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 142 ++++++++++++++++++++ drivers/iommu/iommu.c | 5 +- include/linux/iommu.h | 2 + 3 files changed, 147 insertions(+), 2 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 4bad986cdd76..2c2726a53b9f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2780,6 +2780,147 @@ static int arm_smmu_enable_nesting(struct iommu_domain *domain) return ret; }
+static int arm_smmu_split_block(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; + size_t handled_size; + + if (!(smmu->features & (ARM_SMMU_FEAT_BBML1 | ARM_SMMU_FEAT_BBML2))) { + dev_err(smmu->dev, "don't support BBML1/2, can't split block\n"); + return -ENODEV; + } + if (!ops || !ops->split_block) { + pr_err("io-pgtable don't realize split block\n"); + return -ENODEV; + } + + handled_size = ops->split_block(ops, iova, size); + if (handled_size != size) { + pr_err("split block failed\n"); + return -EFAULT; + } + + return 0; +} + +static int __arm_smmu_merge_page(struct iommu_domain *domain, + unsigned long iova, phys_addr_t paddr, + size_t size, int prot) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; + size_t handled_size; + + if (!ops || !ops->merge_page) { + pr_err("io-pgtable don't realize merge page\n"); + return -ENODEV; + } + + while (size) { + size_t pgsize = iommu_pgsize(domain, iova | paddr, size); + + handled_size = ops->merge_page(ops, iova, paddr, pgsize, prot); + if (handled_size != pgsize) { + pr_err("merge page failed\n"); + return -EFAULT; + } + + pr_debug("merge handled: iova 0x%lx pa %pa size 0x%zx\n", + iova, &paddr, pgsize); + + iova += pgsize; + paddr += pgsize; + size -= pgsize; + } + + return 0; +} + +static int arm_smmu_merge_page(struct iommu_domain *domain, unsigned long iova, + size_t size, int prot) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; + phys_addr_t phys; + dma_addr_t p, i; + size_t cont_size; + int ret = 0; + + if (!(smmu->features & (ARM_SMMU_FEAT_BBML1 | ARM_SMMU_FEAT_BBML2))) { + dev_err(smmu->dev, "don't support BBML1/2, can't merge page\n"); + return -ENODEV; + } + + if (!ops || !ops->iova_to_phys) + return -ENODEV; + + while (size) { + phys = ops->iova_to_phys(ops, iova); + cont_size = PAGE_SIZE; + p = phys + cont_size; + i = iova + cont_size; + + while (cont_size < size && p == ops->iova_to_phys(ops, i)) { + p += PAGE_SIZE; + i += PAGE_SIZE; + cont_size += PAGE_SIZE; + } + + if (cont_size != PAGE_SIZE) { + ret = __arm_smmu_merge_page(domain, iova, phys, + cont_size, prot); + if (ret) + break; + } + + iova += cont_size; + size -= cont_size; + } + + return ret; +} + +static int arm_smmu_switch_dirty_log(struct iommu_domain *domain, bool enable, + unsigned long iova, size_t size, int prot) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_device *smmu = smmu_domain->smmu; + + if (!(smmu->features & ARM_SMMU_FEAT_HD)) + return -ENODEV; + if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1) + return -EINVAL; + + if (enable) { + /* + * For SMMU, the hardware dirty management is always enabled if + * hardware supports HTTU HD. The action to start dirty log is + * spliting block mapping. + * + * We don't return error even if the split operation fail, as we + * can still track dirty at block granule, which is still a much + * better choice compared to full dirty policy. + */ + arm_smmu_split_block(domain, iova, size); + } else { + /* + * For SMMU, the hardware dirty management is always enabled if + * hardware supports HTTU HD. The action to stop dirty log is + * merging page mapping. + * + * We don't return error even if the merge operation fail, as it + * just effects performace of DMA transaction. + */ + arm_smmu_merge_page(domain, iova, size, prot); + } + + return 0; +} + static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args) { return iommu_fwspec_add_ids(dev, args->args, 1); @@ -2890,6 +3031,7 @@ static struct iommu_ops arm_smmu_ops = { .probe_device = arm_smmu_probe_device, .release_device = arm_smmu_release_device, .device_group = arm_smmu_device_group, + .switch_dirty_log = arm_smmu_switch_dirty_log, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, .remove_dev_pasid = arm_smmu_remove_dev_pasid, diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3a67e636287a..7b2dc57f738d 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2364,8 +2364,8 @@ phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) } EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
-static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, size_t *count) +size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, size_t *count) { unsigned int pgsize_idx, pgsize_idx_next; unsigned long pgsizes; @@ -2417,6 +2417,7 @@ static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, *count = size >> pgsize_idx; return pgsize; } +EXPORT_SYMBOL_GPL(iommu_pgsize);
static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 0225cf7445de..c484ceda2c70 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -475,6 +475,8 @@ extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); +extern size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, size_t *count); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp); extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 2c2726a53b9f..8785b37c78c7 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2820,7 +2820,7 @@ static int __arm_smmu_merge_page(struct iommu_domain *domain, }
while (size) { - size_t pgsize = iommu_pgsize(domain, iova | paddr, size); + size_t pgsize = iommu_pgsize(domain, iova, paddr, size, NULL);
handled_size = ops->merge_page(ops, iova, paddr, pgsize, prot); if (handled_size != pgsize) {
From: Kunkun Jiang jiangkunkun@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I3ZUKK CVE: NA
------------------------------
This realizes sync_dirty_log iommu ops based on sync_dirty_log io-pgtable ops.
Co-developed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com Reviewed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 30 +++++++++++++++++++++ 1 file changed, 30 insertions(+)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 8785b37c78c7..819b12dca934 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2921,6 +2921,35 @@ static int arm_smmu_switch_dirty_log(struct iommu_domain *domain, bool enable, return 0; }
+static int arm_smmu_sync_dirty_log(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long *bitmap, + unsigned long base_iova, + unsigned long bitmap_pgshift) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; + struct arm_smmu_device *smmu = smmu_domain->smmu; + + if (!(smmu->features & ARM_SMMU_FEAT_HD)) + return -ENODEV; + if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1) + return -EINVAL; + + if (!ops || !ops->sync_dirty_log) { + pr_err("io-pgtable don't realize sync dirty log\n"); + return -ENODEV; + } + + /* + * Flush iotlb to ensure all inflight transactions are completed. + * See doc IHI0070Da 3.13.4 "HTTU behavior summary". + */ + arm_smmu_flush_iotlb_all(domain); + return ops->sync_dirty_log(ops, iova, size, bitmap, base_iova, + bitmap_pgshift); +} + static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args) { return iommu_fwspec_add_ids(dev, args->args, 1); @@ -3032,6 +3061,7 @@ static struct iommu_ops arm_smmu_ops = { .release_device = arm_smmu_release_device, .device_group = arm_smmu_device_group, .switch_dirty_log = arm_smmu_switch_dirty_log, + .sync_dirty_log = arm_smmu_sync_dirty_log, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, .remove_dev_pasid = arm_smmu_remove_dev_pasid,
From: Kunkun Jiang jiangkunkun@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I3ZUKK CVE: NA
------------------------------
This realizes clear_dirty_log iommu ops based on clear_dirty_log io-pgtable ops.
Co-developed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com Reviewed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 25 +++++++++++++++++++++ 1 file changed, 25 insertions(+)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 819b12dca934..16a7d0ffb268 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2950,6 +2950,30 @@ static int arm_smmu_sync_dirty_log(struct iommu_domain *domain, bitmap_pgshift); }
+static int arm_smmu_clear_dirty_log(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long *bitmap, + unsigned long base_iova, + unsigned long bitmap_pgshift) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; + struct arm_smmu_device *smmu = smmu_domain->smmu; + + if (!(smmu->features & ARM_SMMU_FEAT_HD)) + return -ENODEV; + if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1) + return -EINVAL; + + if (!ops || !ops->clear_dirty_log) { + pr_err("io-pgtable don't realize clear dirty log\n"); + return -ENODEV; + } + + return ops->clear_dirty_log(ops, iova, size, bitmap, base_iova, + bitmap_pgshift); +} + static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args) { return iommu_fwspec_add_ids(dev, args->args, 1); @@ -3062,6 +3086,7 @@ static struct iommu_ops arm_smmu_ops = { .device_group = arm_smmu_device_group, .switch_dirty_log = arm_smmu_switch_dirty_log, .sync_dirty_log = arm_smmu_sync_dirty_log, + .clear_dirty_log = arm_smmu_clear_dirty_log, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, .remove_dev_pasid = arm_smmu_remove_dev_pasid,
From: Kunkun Jiang jiangkunkun@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I3ZUKK CVE: NA
------------------------------
We have implemented these interfaces required to support iommu dirty log tracking. The last step is reporting this feature to upper user, then the user can perform higher policy base on it. For arm smmuv3, it is equal to ARM_SMMU_FEAT_HD.
Co-developed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com Reviewed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 16a7d0ffb268..4dc6fab6ff5a 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2884,6 +2884,13 @@ static int arm_smmu_merge_page(struct iommu_domain *domain, unsigned long iova, return ret; }
+static bool arm_smmu_support_dirty_log(struct iommu_domain *domain) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + + return !!(smmu_domain->smmu->features & ARM_SMMU_FEAT_HD); +} + static int arm_smmu_switch_dirty_log(struct iommu_domain *domain, bool enable, unsigned long iova, size_t size, int prot) { @@ -3084,6 +3091,7 @@ static struct iommu_ops arm_smmu_ops = { .probe_device = arm_smmu_probe_device, .release_device = arm_smmu_release_device, .device_group = arm_smmu_device_group, + .support_dirty_log = arm_smmu_support_dirty_log, .switch_dirty_log = arm_smmu_switch_dirty_log, .sync_dirty_log = arm_smmu_sync_dirty_log, .clear_dirty_log = arm_smmu_clear_dirty_log,
From: Kunkun Jiang jiangkunkun@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I3ZUKK CVE: NA
------------------------------
We are going to optimize dirty log tracking based on iommu dirty log tracking, but the dirty log from iommu is useful only when all iommu backed domains support it.
This maintains a counter in vfio_iommu, which is used for dirty bitmap population in next patch.
This also maintains a boolean flag in vfio_domain, which is used in the policy of switch dirty log in next patch.
Co-developed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com Reviewed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/vfio/vfio_iommu_type1.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index eacd6ec04de5..398dce33b530 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -71,6 +71,7 @@ struct vfio_iommu { unsigned int vaddr_invalid_count; uint64_t pgsize_bitmap; uint64_t num_non_pinned_groups; + uint64_t num_non_hwdbm_domains; bool v2; bool nesting; bool dirty_page_tracking; @@ -83,6 +84,7 @@ struct vfio_domain { struct list_head group_list; bool fgsp : 1; /* Fine-grained super pages */ bool enforce_cache_coherency : 1; + bool iommu_hwdbm; /* Hardware dirty management */ };
struct vfio_dma { @@ -2149,6 +2151,26 @@ static int vfio_iommu_domain_alloc(struct device *dev, void *data) return 1; /* Don't iterate */ }
+/* + * Called after a new group is added to the iommu_domain, or an old group is + * removed from the iommu_domain. Update the HWDBM status of vfio_domain and + * vfio_iommu. + */ +static void vfio_iommu_update_hwdbm(struct vfio_iommu *iommu, + struct vfio_domain *domain, + bool attach) +{ + bool old_hwdbm = domain->iommu_hwdbm; + bool new_hwdbm = iommu_support_dirty_log(domain->domain); + + if (old_hwdbm && !new_hwdbm && attach) { + iommu->num_non_hwdbm_domains++; + } else if (!old_hwdbm && new_hwdbm && !attach) { + iommu->num_non_hwdbm_domains--; + } + domain->iommu_hwdbm = new_hwdbm; +} + static int vfio_iommu_type1_attach_group(void *iommu_data, struct iommu_group *iommu_group, enum vfio_group_type type) { @@ -2291,6 +2313,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, if (!iommu_attach_group(d->domain, group->iommu_group)) { list_add(&group->next, &d->group_list); + vfio_iommu_update_hwdbm(iommu, d, true); iommu_domain_free(domain->domain); kfree(domain); goto done; @@ -2318,6 +2341,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
list_add(&domain->next, &iommu->domain_list); vfio_update_pgsize_bitmap(iommu); + vfio_iommu_update_hwdbm(iommu, domain, true); done: /* Delete the old one and insert new iova list */ vfio_iommu_iova_insert_copy(iommu, &iova_copy); @@ -2496,6 +2520,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, continue;
iommu_detach_group(domain->domain, group->iommu_group); + vfio_iommu_update_hwdbm(iommu, domain, false); update_dirty_scope = !group->pinned_page_dirty_scope; list_del(&group->next); kfree(group);
Signed-off-by: jiaqingtong jiaqingtong@huawei.com
From: Kunkun Jiang jiangkunkun@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I3ZUKK CVE: NA
------------------------------
In the past if vfio_iommu is not of pinned_page_dirty_scope and vfio_dma is iommu_mapped, we populate full dirty bitmap for this vfio_dma. Now we can try to get dirty log from iommu before make the lousy decision.
The new dirty bitmap population policy:
In detail, if all vfio_group are of pinned_page_dirty_scope, the dirty bitmap population is not affected. If there are vfio_groups not of pinned_page_dirty_scope and all domains support HWDBM, we can try to get dirty log from IOMMU. Otherwise, lead to full dirty bitmap.
Consider DMA and group hotplug:
Start dirty log for newly added DMA range, and stop dirty log for DMA range going to remove. If a domain don't support HWDBM at start, but can support it after hotplug some groups (attach a first group with HWDBM or detach all groups without HWDBM). If a domain support HWDBM at start, but do not support it after hotplug some groups (attach a group without HWDBM or detach all groups without HWDBM). So our policy is that switch dirty log for domains dynamically.
Co-developed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com Reviewed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/vfio/vfio_iommu_type1.c | 180 ++++++++++++++++++++++++++++++-- 1 file changed, 172 insertions(+), 8 deletions(-)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 398dce33b530..9edc28f4a2d1 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1157,6 +1157,46 @@ static void vfio_update_pgsize_bitmap(struct vfio_iommu *iommu) } }
+static int vfio_iommu_dirty_log_clear(struct vfio_iommu *iommu, + dma_addr_t start_iova, size_t size, + unsigned long *bitmap_buffer, + dma_addr_t base_iova, + unsigned long pgshift) +{ + struct vfio_domain *d; + int ret = 0; + + list_for_each_entry(d, &iommu->domain_list, next) { + ret = iommu_clear_dirty_log(d->domain, start_iova, size, + bitmap_buffer, base_iova, pgshift); + if (ret) { + pr_warn("vfio_iommu dirty log clear failed!\n"); + break; + } + } + + return ret; +} + +static int vfio_iommu_dirty_log_sync(struct vfio_iommu *iommu, + struct vfio_dma *dma, + unsigned long pgshift) +{ + struct vfio_domain *d; + int ret = 0; + + list_for_each_entry(d, &iommu->domain_list, next) { + ret = iommu_sync_dirty_log(d->domain, dma->iova, dma->size, + dma->bitmap, dma->iova, pgshift); + if (ret) { + pr_warn("vfio_iommu dirty log sync failed!\n"); + break; + } + } + + return ret; +} + static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, struct vfio_dma *dma, dma_addr_t base_iova, size_t pgsize) @@ -1167,13 +1207,24 @@ static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, unsigned long copy_offset = bit_offset / BITS_PER_LONG; unsigned long shift = bit_offset % BITS_PER_LONG; unsigned long leftover; + bool iommu_hwdbm_dirty = false; + int ret;
- /* - * mark all pages dirty if any IOMMU capable device is not able - * to report dirty pages and all pages are pinned and mapped. - */ - if (iommu->num_non_pinned_groups && dma->iommu_mapped) + if (!iommu->num_non_pinned_groups || !dma->iommu_mapped) { + /* nothing to do */ + } else if (!iommu->num_non_hwdbm_domains) { + /* try to get dirty log from IOMMU */ + iommu_hwdbm_dirty = true; + ret = vfio_iommu_dirty_log_sync(iommu, dma, pgshift); + if (ret) + return ret; + } else { + /* + * mark all pages dirty if any IOMMU capable device is not able + * to report dirty pages and all pages are pinned and mapped. + */ bitmap_set(dma->bitmap, 0, nbits); + }
if (shift) { bitmap_shift_left(dma->bitmap, dma->bitmap, shift, @@ -1191,6 +1242,11 @@ static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, DIRTY_BITMAP_BYTES(nbits + shift))) return -EFAULT;
+ /* Recover the bitmap if it'll be used to clear hardware dirty log */ + if (shift && iommu_hwdbm_dirty) + bitmap_shift_right(dma->bitmap, dma->bitmap, shift, + nbits + shift); + return 0; }
@@ -1229,6 +1285,16 @@ static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, if (ret) return ret;
+ /* Clear iommu dirty log to re-enable dirty log tracking */ + if (iommu->num_non_pinned_groups && dma->iommu_mapped && + !iommu->num_non_hwdbm_domains) { + ret = vfio_iommu_dirty_log_clear(iommu, dma->iova, + dma->size, dma->bitmap, dma->iova, + pgshift); + if (ret) + return ret; + } + /* * Re-populate bitmap to include all pinned pages which are * considered as dirty but exclude pages which are unpinned and @@ -1278,6 +1344,22 @@ static void vfio_notify_dma_unmap(struct vfio_iommu *iommu, mutex_lock(&iommu->lock); }
+static void vfio_dma_dirty_log_switch(struct vfio_iommu *iommu, + struct vfio_dma *dma, bool enable) +{ + struct vfio_domain *d; + + if (!dma->iommu_mapped) + return; + + list_for_each_entry(d, &iommu->domain_list, next) { + if (!d->iommu_hwdbm) + continue; + WARN_ON(iommu_switch_dirty_log(d->domain, enable, dma->iova, + dma->size, d->prot | dma->prot)); + } +} + static int vfio_dma_do_unmap(struct vfio_iommu *iommu, struct vfio_iommu_type1_dma_unmap *unmap, struct vfio_bitmap *bitmap) @@ -1415,6 +1497,10 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, break; }
+ /* Stop log for removed dma */ + if (iommu->dirty_page_tracking) + vfio_dma_dirty_log_switch(iommu, dma, false); + unmapped += dma->size; n = rb_next(n); vfio_remove_dma(iommu, dma); @@ -1667,8 +1753,13 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
if (!ret && iommu->dirty_page_tracking) { ret = vfio_dma_bitmap_alloc(dma, pgsize); - if (ret) + if (ret) { vfio_remove_dma(iommu, dma); + goto out_unlock; + } + + /* Start dirty log for newly added dma */ + vfio_dma_dirty_log_switch(iommu, dma, true); }
out_unlock: @@ -2151,6 +2242,21 @@ static int vfio_iommu_domain_alloc(struct device *dev, void *data) return 1; /* Don't iterate */ }
+static void vfio_domain_dirty_log_switch(struct vfio_iommu *iommu, + struct vfio_domain *d, bool enable) +{ + struct rb_node *n; + struct vfio_dma *dma; + + for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) { + dma = rb_entry(n, struct vfio_dma, node); + if (!dma->iommu_mapped) + continue; + WARN_ON(iommu_switch_dirty_log(d->domain, enable, dma->iova, + dma->size, d->prot | dma->prot)); + } +} + /* * Called after a new group is added to the iommu_domain, or an old group is * removed from the iommu_domain. Update the HWDBM status of vfio_domain and @@ -2162,13 +2268,48 @@ static void vfio_iommu_update_hwdbm(struct vfio_iommu *iommu, { bool old_hwdbm = domain->iommu_hwdbm; bool new_hwdbm = iommu_support_dirty_log(domain->domain); + bool singular = list_is_singular(&domain->group_list); + bool num_non_hwdbm_zeroed = false; + bool log_enabled, should_enable;
if (old_hwdbm && !new_hwdbm && attach) { iommu->num_non_hwdbm_domains++; } else if (!old_hwdbm && new_hwdbm && !attach) { iommu->num_non_hwdbm_domains--; + if (!iommu->num_non_hwdbm_domains) + num_non_hwdbm_zeroed = true; } domain->iommu_hwdbm = new_hwdbm; + + if (!iommu->dirty_page_tracking) + return; + + /* + * When switch the dirty policy from full dirty to iommu hwdbm, we must + * populate full dirty now to avoid losing dirty. + */ + if (iommu->num_non_pinned_groups && num_non_hwdbm_zeroed) + vfio_iommu_populate_bitmap_full(iommu); + + /* + * The vfio_domain can switch dirty log tracking dynamically due to + * group attach/detach. The basic idea is to convert current dirty log + * status to desired dirty log status. + * + * If old_hwdbm is true then dirty log has been enabled. One exception + * is that this is the first group attached to a domain. + * + * If new_hwdbm is true then dirty log should be enabled. One exception + * is that this is the last group detached from a domain. + */ + log_enabled = old_hwdbm && !(attach && singular); + should_enable = new_hwdbm && !(!attach && singular); + + /* Switch dirty log tracking when status changed */ + if (should_enable && !log_enabled) + vfio_domain_dirty_log_switch(iommu, domain, true); + else if (!should_enable && log_enabled) + vfio_domain_dirty_log_switch(iommu, domain, false); }
static int vfio_iommu_type1_attach_group(void *iommu_data, @@ -2562,7 +2703,11 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, */ if (update_dirty_scope) { iommu->num_non_pinned_groups--; - if (iommu->dirty_page_tracking) + /* + * When switch the dirty policy from full dirty to pinned scope, + * we must populate full dirty now to avoid losing dirty. + */ + if (iommu->dirty_page_tracking && iommu->num_non_hwdbm_domains) vfio_iommu_populate_bitmap_full(iommu); } mutex_unlock(&iommu->lock); @@ -2915,6 +3060,22 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu, -EFAULT : 0; }
+static void vfio_iommu_dirty_log_switch(struct vfio_iommu *iommu, bool enable) +{ + struct vfio_domain *d; + + /* + * We enable dirty log tracking for these vfio_domains that support + * HWDBM. Even if all iommu domains don't support HWDBM for now. They + * may support it after detach some groups. + */ + list_for_each_entry(d, &iommu->domain_list, next) { + if (!d->iommu_hwdbm) + continue; + vfio_domain_dirty_log_switch(iommu, d, enable); + } +} + static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu, unsigned long arg) { @@ -2947,8 +3108,10 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu, pgsize = 1 << __ffs(iommu->pgsize_bitmap); if (!iommu->dirty_page_tracking) { ret = vfio_dma_bitmap_alloc_all(iommu, pgsize); - if (!ret) + if (!ret) { iommu->dirty_page_tracking = true; + vfio_iommu_dirty_log_switch(iommu, true); + } } mutex_unlock(&iommu->lock); return ret; @@ -2957,6 +3120,7 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu, if (iommu->dirty_page_tracking) { iommu->dirty_page_tracking = false; vfio_dma_bitmap_free_all(iommu); + vfio_iommu_dirty_log_switch(iommu, false); } mutex_unlock(&iommu->lock); return 0;
Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/vfio/vfio_iommu_type1.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 9edc28f4a2d1..74e1211d9aaf 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1356,7 +1356,7 @@ static void vfio_dma_dirty_log_switch(struct vfio_iommu *iommu, if (!d->iommu_hwdbm) continue; WARN_ON(iommu_switch_dirty_log(d->domain, enable, dma->iova, - dma->size, d->prot | dma->prot)); + dma->size, IOMMU_CACHE | dma->prot)); } }
@@ -2253,7 +2253,7 @@ static void vfio_domain_dirty_log_switch(struct vfio_iommu *iommu, if (!dma->iommu_mapped) continue; WARN_ON(iommu_switch_dirty_log(d->domain, enable, dma->iova, - dma->size, d->prot | dma->prot)); + dma->size, IOMMU_CACHE | dma->prot)); } }
From: Kunkun Jiang jiangkunkun@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I3ZUKK CVE: NA
------------------------------
In the past, we clear dirty log immediately after sync dirty log to userspace. This may cause redundant dirty handling if userspace handles dirty log iteratively:
After vfio clears dirty log, new dirty log starts to generate. These new dirty log will be reported to userspace even if they are generated before userspace handles the same dirty page. That's to say, we should minimize the time gap of dirty log clearing and dirty log handling.
This adds two user interfaces. Note that user should clear dirty log before handle corresponding dirty pages. 1. GET_BITMAP_NOCLEAR: get dirty log without clear. 2. CLEAR_BITMAP: manually clear dirty log.
Co-developed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com Reviewed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/vfio/vfio_iommu_type1.c | 114 ++++++++++++++++++++++++++++++-- include/uapi/linux/vfio.h | 36 +++++++++- 2 files changed, 142 insertions(+), 8 deletions(-)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 74e1211d9aaf..fc62430a73dd 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -76,6 +76,7 @@ struct vfio_iommu { bool nesting; bool dirty_page_tracking; struct list_head emulated_iommu_groups; + bool dirty_log_get_no_clear; };
struct vfio_domain { @@ -1197,6 +1198,76 @@ static int vfio_iommu_dirty_log_sync(struct vfio_iommu *iommu, return ret; }
+static int vfio_iova_dirty_log_clear(u64 __user *bitmap, + struct vfio_iommu *iommu, + dma_addr_t iova, size_t size, + size_t pgsize) +{ + struct vfio_dma *dma; + struct rb_node *n; + dma_addr_t start_iova, end_iova, riova; + unsigned long pgshift = __ffs(pgsize); + unsigned long bitmap_size; + unsigned long *bitmap_buffer = NULL; + bool clear_valid; + int rs, re, start, end, dma_offset; + int ret = 0; + + bitmap_size = DIRTY_BITMAP_BYTES(size >> pgshift); + bitmap_buffer = kvmalloc(bitmap_size, GFP_KERNEL); + if (!bitmap_buffer) { + ret = -ENOMEM; + goto out; + } + + if (copy_from_user(bitmap_buffer, bitmap, bitmap_size)) { + ret = -EFAULT; + goto out; + } + + for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) { + dma = rb_entry(n, struct vfio_dma, node); + if ((dma->iova + dma->size - 1) < iova) + continue; + if (dma->iova > iova + size - 1) + break; + + start_iova = max(iova, dma->iova); + end_iova = min(iova + size, dma->iova + dma->size); + + /* Similar logic as the tail of vfio_iova_dirty_bitmap */ + + clear_valid = false; + start = (start_iova - iova) >> pgshift; + end = (end_iova - iova) >> pgshift; + bitmap_for_each_set_region(bitmap_buffer, rs, re, start, end) { + clear_valid = true; + riova = iova + (rs << pgshift); + dma_offset = (riova - dma->iova) >> pgshift; + bitmap_clear(dma->bitmap, dma_offset, re - rs); + } + + if (clear_valid) + vfio_dma_populate_bitmap(dma, pgsize); + + if (clear_valid && iommu->num_non_pinned_groups && + dma->iommu_mapped && !iommu->num_non_hwdbm_domains) { + ret = vfio_iommu_dirty_log_clear(iommu, start_iova, + end_iova - start_iova, bitmap_buffer, + iova, pgshift); + if (ret) { + pr_warn("dma dirty log clear failed!\n"); + goto out; + } + } + + } + +out: + kfree(bitmap_buffer); + return ret; +} + static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, struct vfio_dma *dma, dma_addr_t base_iova, size_t pgsize) @@ -1242,8 +1313,11 @@ static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, DIRTY_BITMAP_BYTES(nbits + shift))) return -EFAULT;
- /* Recover the bitmap if it'll be used to clear hardware dirty log */ - if (shift && iommu_hwdbm_dirty) + /* + * Recover the bitmap if it'll be used to clear hardware dirty log, or + * user wants to clear the dirty bitmap manually. + */ + if (shift && (iommu_hwdbm_dirty || iommu->dirty_log_get_no_clear)) bitmap_shift_right(dma->bitmap, dma->bitmap, shift, nbits + shift);
@@ -1285,6 +1359,10 @@ static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, if (ret) return ret;
+ /* Do not clear dirty automatically when require no clear */ + if (iommu->dirty_log_get_no_clear) + continue; + /* Clear iommu dirty log to re-enable dirty log tracking */ if (iommu->num_non_pinned_groups && dma->iommu_mapped && !iommu->num_non_hwdbm_domains) { @@ -2834,6 +2912,10 @@ static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu, if (!iommu) return 0; return vfio_domains_have_enforce_cache_coherency(iommu); + case VFIO_DIRTY_LOG_MANUAL_CLEAR: + if (!iommu) + return 0; + return 1; default: return 0; } @@ -3082,7 +3164,9 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu, struct vfio_iommu_type1_dirty_bitmap dirty; uint32_t mask = VFIO_IOMMU_DIRTY_PAGES_FLAG_START | VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP | - VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; + VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP | + VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR | + VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP; unsigned long minsz; int ret = 0;
@@ -3124,7 +3208,9 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu, } mutex_unlock(&iommu->lock); return 0; - } else if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP) { + } else if (dirty.flags & (VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP | + VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR | + VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP)) { struct vfio_iommu_type1_dirty_bitmap_get range; unsigned long pgshift; size_t data_size = dirty.argsz - minsz; @@ -3167,13 +3253,27 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu, goto out_unlock; }
- if (iommu->dirty_page_tracking) + if (!iommu->dirty_page_tracking) { + ret = -EINVAL; + goto out_unlock; + } + + if (dirty.flags & (VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP | + VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR)) { + + iommu->dirty_log_get_no_clear = !!(dirty.flags & + VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR); + ret = vfio_iova_dirty_bitmap(range.bitmap.data, iommu, range.iova, range.size, range.bitmap.pgsize); - else - ret = -EINVAL; + } else { + ret = vfio_iova_dirty_log_clear(range.bitmap.data, + iommu, range.iova, + range.size, + range.bitmap.pgsize); + } out_unlock: mutex_unlock(&iommu->lock);
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index afc1369216d9..2b78d03c0c15 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -56,6 +56,16 @@ */ #define VFIO_UPDATE_VADDR 10
+/* + * The vfio_iommu driver may support user clears dirty log manually, which means + * dirty log can be requested to not cleared automatically after dirty log is + * copied to userspace, it's user's duty to clear dirty log. + * + * Note: please refer to VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR and + * VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP. + */ +#define VFIO_DIRTY_LOG_MANUAL_CLEAR 11 + /* * The IOCTL interface is designed for extensibility by embedding the * structure length (argsz) and flags into structures passed between @@ -1628,8 +1638,30 @@ struct vfio_iommu_type1_dma_unmap { * actual bitmap. If dirty pages logging is not enabled, an error will be * returned. * - * Only one of the flags _START, _STOP and _GET may be specified at a time. + * The VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR flag is almost same as + * VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP, except that it requires underlying + * dirty bitmap is not cleared automatically. The user can clear it manually by + * calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP flag set. * + * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP flag set, + * instructs the IOMMU driver to clear the dirty status of pages in a bitmap + * for IOMMU container for a given IOVA range. The user must specify the IOVA + * range, the bitmap and the pgsize through the structure + * vfio_iommu_type1_dirty_bitmap_get in the data[] portion. This interface + * supports clearing a bitmap of the smallest supported pgsize only and can be + * modified in future to clear a bitmap of any specified supported pgsize. The + * user must provide a memory area for the bitmap memory and specify its size + * in bitmap.size. One bit is used to represent one page consecutively starting + * from iova offset. The user should provide page size in bitmap.pgsize field. + * A bit set in the bitmap indicates that the page at that offset from iova is + * cleared the dirty status, and dirty tracking is re-enabled for that page. The + * caller must set argsz to a value including the size of structure + * vfio_iommu_dirty_bitmap_get, but excluing the size of the actual bitmap. If + * dirty pages logging is not enabled, an error will be returned. Note: user + * should clear dirty log before handle corresponding dirty pages. + * + * Only one of the flags _START, _STOP, _GET, _GET_NOCLEAR_, and _CLEAR may be + * specified at a time. */ struct vfio_iommu_type1_dirty_bitmap { __u32 argsz; @@ -1637,6 +1669,8 @@ struct vfio_iommu_type1_dirty_bitmap { #define VFIO_IOMMU_DIRTY_PAGES_FLAG_START (1 << 0) #define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP (1 << 1) #define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP (1 << 2) +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR (1 << 3) +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP (1 << 4) __u8 data[]; };
From: Kunkun Jiang jiangkunkun@huawei.com
virt inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I6FHYK CVE: NA
--------------------------------
For security purposes, kvzalloc is used to allocate memory. Because the memory may be allocated by vmalloc. So we replace kfree with kvfree here.
Reported-by: Zhaolong Wang wangzhaolong1@huawei.com Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com Reviewed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Jialin Zhang zhangjialin11@huawei.com Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/vfio/vfio_iommu_type1.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index fc62430a73dd..88081c22e1c3 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1214,7 +1214,7 @@ static int vfio_iova_dirty_log_clear(u64 __user *bitmap, int ret = 0;
bitmap_size = DIRTY_BITMAP_BYTES(size >> pgshift); - bitmap_buffer = kvmalloc(bitmap_size, GFP_KERNEL); + bitmap_buffer = kvzalloc(bitmap_size, GFP_KERNEL); if (!bitmap_buffer) { ret = -ENOMEM; goto out; @@ -1264,7 +1264,7 @@ static int vfio_iova_dirty_log_clear(u64 __user *bitmap, }
out: - kfree(bitmap_buffer); + kvfree(bitmap_buffer); return ret; }
From: Jean-Philippe Brucker jean-philippe@linaro.org
maillist inclusion category: feature bugzilla: 51855 CVE: NA
Reference: https://jpbrucker.net/git/linux/commit/?h=sva/2021-03-01&id=9c63ea52c28e...
---------------------------------------------
If the SMMU supports it and the kernel was built with HTTU support, enable hardware update of access and dirty flags. This is essential for shared page tables, to reduce the number of access faults on the fault queue. Normal DMA with io-pgtables doesn't currently use the access or dirty flags.
We can enable HTTU even if CPUs don't support it, because the kernel always checks for HW dirty bit and updates the PTE flags atomically.
Signed-off-by: Jean-Philippe Brucker jean-philippe@linaro.org Signed-off-by: Lijun Fang fanglijun3@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 2 + drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 41 ++++++++++++++++++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 15 +++++-- 3 files changed, 54 insertions(+), 4 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 8a16cd3ef487..5bb805ca3e5d 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -127,10 +127,12 @@ static struct arm_smmu_ctx_desc *arm_smmu_alloc_shared_cd(struct mm_struct *mm) if (err) goto out_free_asid;
+ /* HA and HD will be filtered out later if not supported by the SMMU */ tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, 64ULL - vabits_actual) | FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, ARM_LPAE_TCR_RGN_WBWA) | FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, ARM_LPAE_TCR_RGN_WBWA) | FIELD_PREP(CTXDESC_CD_0_TCR_SH0, ARM_LPAE_TCR_SH_IS) | + CTXDESC_CD_0_TCR_HA | CTXDESC_CD_0_TCR_HD | CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
switch (PAGE_SIZE) { diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 4dc6fab6ff5a..83600e9b2e69 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1096,10 +1096,17 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, * this substream's traffic */ } else { /* (1) and (2) */ + u64 tcr = cd->tcr; + cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK); cdptr[2] = 0; cdptr[3] = cpu_to_le64(cd->mair);
+ if (!(smmu->features & ARM_SMMU_FEAT_HD)) + tcr &= ~CTXDESC_CD_0_TCR_HD; + if (!(smmu->features & ARM_SMMU_FEAT_HA)) + tcr &= ~CTXDESC_CD_0_TCR_HA; + /* * STE is live, and the SMMU might read dwords of this CD in any * order. Ensure that it observes valid values before reading @@ -1107,7 +1114,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, */ arm_smmu_sync_cd(smmu_domain, ssid, true);
- val = cd->tcr | + val = tcr | #ifdef __BIG_ENDIAN CTXDESC_CD_0_ENDI | #endif @@ -3747,6 +3754,28 @@ static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu) } }
+static void arm_smmu_get_httu(struct arm_smmu_device *smmu, u32 reg) +{ + u32 fw_features = smmu->features & (ARM_SMMU_FEAT_HA | ARM_SMMU_FEAT_HD); + u32 features = 0; + + switch (FIELD_GET(IDR0_HTTU, reg)) { + case IDR0_HTTU_ACCESS_DIRTY: + features |= ARM_SMMU_FEAT_HD; + fallthrough; + case IDR0_HTTU_ACCESS: + features |= ARM_SMMU_FEAT_HA; + } + + if (smmu->dev->of_node) + smmu->features |= features; + else if (features != fw_features) + /* ACPI IORT sets the HTTU bits */ + dev_warn(smmu->dev, + "IDR0.HTTU overridden by FW configuration (0x%x)\n", + fw_features); +} + static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) { u32 reg; @@ -3807,6 +3836,8 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) smmu->features |= ARM_SMMU_FEAT_E2H; }
+ arm_smmu_get_httu(smmu, reg); + /* * The coherency feature as set by FW is used in preference to the ID * register, but warn on mismatch. @@ -4013,6 +4044,14 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev, if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE) smmu->features |= ARM_SMMU_FEAT_COHERENCY;
+ switch (FIELD_GET(ACPI_IORT_SMMU_V3_HTTU_OVERRIDE, iort_smmu->flags)) { + case IDR0_HTTU_ACCESS_DIRTY: + smmu->features |= ARM_SMMU_FEAT_HD; + fallthrough; + case IDR0_HTTU_ACCESS: + smmu->features |= ARM_SMMU_FEAT_HA; + } + return 0; } #else diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 44771da77816..e0d6d8db5ba0 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -33,6 +33,10 @@ #define IDR0_ASID16 (1 << 12) #define IDR0_ATS (1 << 10) #define IDR0_HYP (1 << 9) +#define IDR0_HTTU GENMASK(7, 6) +#define IDR0_HTTU_ACCESS 1 +#define IDR0_HTTU_ACCESS_DIRTY 2 +#define IDR0_BTM (1 << 5) #define IDR0_COHACC (1 << 4) #define IDR0_TTF GENMASK(3, 2) #define IDR0_TTF_AARCH64 2 @@ -295,6 +299,9 @@ #define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32) #define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38)
+#define CTXDESC_CD_0_TCR_HA (1UL << 43) +#define CTXDESC_CD_0_TCR_HD (1UL << 42) + #define CTXDESC_CD_0_AA64 (1UL << 41) #define CTXDESC_CD_0_S (1UL << 44) #define CTXDESC_CD_0_R (1UL << 45) @@ -649,9 +656,11 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_BTM (1 << 16) #define ARM_SMMU_FEAT_SVA (1 << 17) #define ARM_SMMU_FEAT_E2H (1 << 18) -#define ARM_SMMU_FEAT_NESTING (1 << 19) -#define ARM_SMMU_FEAT_BBML1 (1 << 20) -#define ARM_SMMU_FEAT_BBML2 (1 << 21) +#define ARM_SMMU_FEAT_HA (1 << 19) +#define ARM_SMMU_FEAT_HD (1 << 20) +#define ARM_SMMU_FEAT_NESTING (1 << 21) +#define ARM_SMMU_FEAT_BBML1 (1 << 22) +#define ARM_SMMU_FEAT_BBML2 (1 << 23) u32 features;
#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
From: Kunkun Jiang jiangkunkun@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I3ZUKK CVE: NA
------------------------------
As nested mode is not upstreamed now, we just aim to support dirty log tracking for stage1 with io-pgtable mapping (means not support SVA mapping). If HTTU is supported, we enable HA/HD bits in the SMMU CD and transfer ARM_HD quirk to io-pgtable.
Co-developed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com Reviewed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 83600e9b2e69..f024093e5e4f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2131,6 +2131,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) | FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) | FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) | + CTXDESC_CD_0_TCR_HA | CTXDESC_CD_0_TCR_HD | CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64; cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair;
@@ -2240,6 +2241,8 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain, pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_BBML1; else if (smmu->features & ARM_SMMU_FEAT_BBML2) pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_BBML2; + if (smmu->features & ARM_SMMU_FEAT_HD) + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); if (!pgtbl_ops)
From: Kunkun Jiang jiangkunkun@huawei.com
virt inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UN39 CVE: NA
------------------------------
Two redundant arguements are added to 'iommu_device_register' of 'include/linux/iommu.h' in the commit bbf3b39e5576 ("iommu: Introduce dirty log tracking framework"). As a result, compiling the kernel fails when the CONFIG_IOMMU_API is disabled. Delete the two redundant arguements to solve this problem.
Fixes: bbf3b39e5576 ("iommu: Introduce dirty log tracking framework") Reported-by: Yejian Zheng zhengyejian1@huawei.com Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com Reviewed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- include/linux/iommu.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h index c484ceda2c70..56a366e4e60e 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -945,9 +945,7 @@ static inline int iommu_set_pgtable_quirks(struct iommu_domain *domain, return 0; }
-static inline int iommu_device_register(struct iommu_device *iommu, - const struct iommu_ops *ops, - struct device *hwdev) +static inline int iommu_device_register(struct iommu_device *iommu) { return -ENODEV; }
From: Keqian Zhu zhukeqian1@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I3ZUKK CVE: NA
------------------------------
Some types of IOMMU are capable of tracking DMA dirty log, such as ARM SMMU with HTTU or Intel IOMMU with SLADE. This introduces the dirty log tracking framework in the IOMMU base layer.
Four new essential interfaces are added, and we maintaince the status of dirty log tracking in iommu_domain. 1. iommu_support_dirty_log: Check whether domain supports dirty log tracking 2. iommu_switch_dirty_log: Perform actions to start|stop dirty log tracking 3. iommu_sync_dirty_log: Sync dirty log from IOMMU into a dirty bitmap 4. iommu_clear_dirty_log: Clear dirty log of IOMMU by a mask bitmap
Note: Don't concurrently call these interfaces with other ops that access underlying page table.
Signed-off-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com Reviewed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/iommu/iommu.c | 202 +++++++++++++++++++++++++++++++++++ include/linux/iommu.h | 68 +++++++++++- include/trace/events/iommu.h | 63 +++++++++++ 3 files changed, 332 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 7b2dc57f738d..4cd140bb3175 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1998,6 +1998,8 @@ static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus, iommu_domain_free(domain); domain = NULL; } + mutex_init(&domain->switch_log_lock); + return domain; }
@@ -2746,6 +2748,206 @@ int iommu_set_pgtable_quirks(struct iommu_domain *domain, } EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks);
+bool iommu_support_dirty_log(struct iommu_domain *domain) +{ + const struct iommu_ops *ops = domain->ops; + + return ops->support_dirty_log && ops->support_dirty_log(domain); +} +EXPORT_SYMBOL_GPL(iommu_support_dirty_log); + +int iommu_switch_dirty_log(struct iommu_domain *domain, bool enable, + unsigned long iova, size_t size, int prot) +{ + const struct iommu_ops *ops = domain->ops; + unsigned long orig_iova = iova; + unsigned int min_pagesz; + size_t orig_size = size; + bool flush = false; + int ret = 0; + + if (unlikely(!ops->switch_dirty_log)) + return -ENODEV; + + min_pagesz = 1 << __ffs(domain->pgsize_bitmap); + if (!IS_ALIGNED(iova | size, min_pagesz)) { + pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n", + iova, size, min_pagesz); + return -EINVAL; + } + + mutex_lock(&domain->switch_log_lock); + if (enable && domain->dirty_log_tracking) { + ret = -EBUSY; + goto out; + } else if (!enable && !domain->dirty_log_tracking) { + ret = -EINVAL; + goto out; + } + + pr_debug("switch_dirty_log %s for: iova 0x%lx size 0x%zx\n", + enable ? "enable" : "disable", iova, size); + + while (size) { + size_t pgsize = iommu_pgsize(domain, iova, size); + + flush = true; + ret = ops->switch_dirty_log(domain, enable, iova, pgsize, prot); + if (ret) + break; + + pr_debug("switch_dirty_log handled: iova 0x%lx size 0x%zx\n", + iova, pgsize); + + iova += pgsize; + size -= pgsize; + } + + if (flush) + iommu_flush_iotlb_all(domain); + + if (!ret) { + domain->dirty_log_tracking = enable; + trace_switch_dirty_log(orig_iova, orig_size, enable); + } +out: + mutex_unlock(&domain->switch_log_lock); + return ret; +} +EXPORT_SYMBOL_GPL(iommu_switch_dirty_log); + +int iommu_sync_dirty_log(struct iommu_domain *domain, unsigned long iova, + size_t size, unsigned long *bitmap, + unsigned long base_iova, unsigned long bitmap_pgshift) +{ + const struct iommu_ops *ops = domain->ops; + unsigned long orig_iova = iova; + unsigned int min_pagesz; + size_t orig_size = size; + int ret = 0; + + if (unlikely(!ops->sync_dirty_log)) + return -ENODEV; + + min_pagesz = 1 << __ffs(domain->pgsize_bitmap); + if (!IS_ALIGNED(iova | size, min_pagesz)) { + pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n", + iova, size, min_pagesz); + return -EINVAL; + } + + mutex_lock(&domain->switch_log_lock); + if (!domain->dirty_log_tracking) { + ret = -EINVAL; + goto out; + } + + pr_debug("sync_dirty_log for: iova 0x%lx size 0x%zx\n", iova, size); + + while (size) { + size_t pgsize = iommu_pgsize(domain, iova, size); + + ret = ops->sync_dirty_log(domain, iova, pgsize, + bitmap, base_iova, bitmap_pgshift); + if (ret) + break; + + pr_debug("sync_dirty_log handled: iova 0x%lx size 0x%zx\n", + iova, pgsize); + + iova += pgsize; + size -= pgsize; + } + + if (!ret) + trace_sync_dirty_log(orig_iova, orig_size); +out: + mutex_unlock(&domain->switch_log_lock); + return ret; +} +EXPORT_SYMBOL_GPL(iommu_sync_dirty_log); + +static int __iommu_clear_dirty_log(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long *bitmap, + unsigned long base_iova, + unsigned long bitmap_pgshift) +{ + const struct iommu_ops *ops = domain->ops; + unsigned long orig_iova = iova; + size_t orig_size = size; + int ret = 0; + + if (unlikely(!ops->clear_dirty_log)) + return -ENODEV; + + pr_debug("clear_dirty_log for: iova 0x%lx size 0x%zx\n", iova, size); + + while (size) { + size_t pgsize = iommu_pgsize(domain, iova, size); + + ret = ops->clear_dirty_log(domain, iova, pgsize, bitmap, + base_iova, bitmap_pgshift); + if (ret) + break; + + pr_debug("clear_dirty_log handled: iova 0x%lx size 0x%zx\n", + iova, pgsize); + + iova += pgsize; + size -= pgsize; + } + + if (!ret) + trace_clear_dirty_log(orig_iova, orig_size); + + return ret; +} + +int iommu_clear_dirty_log(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long *bitmap, unsigned long base_iova, + unsigned long bitmap_pgshift) +{ + unsigned long riova, rsize; + unsigned int min_pagesz; + bool flush = false; + int rs, re, start, end; + int ret = 0; + + min_pagesz = 1 << __ffs(domain->pgsize_bitmap); + if (!IS_ALIGNED(iova | size, min_pagesz)) { + pr_err("unaligned: iova 0x%lx min_pagesz 0x%x\n", + iova, min_pagesz); + return -EINVAL; + } + + mutex_lock(&domain->switch_log_lock); + if (!domain->dirty_log_tracking) { + ret = -EINVAL; + goto out; + } + + start = (iova - base_iova) >> bitmap_pgshift; + end = start + (size >> bitmap_pgshift); + bitmap_for_each_set_region(bitmap, rs, re, start, end) { + flush = true; + riova = base_iova + (rs << bitmap_pgshift); + rsize = (re - rs) << bitmap_pgshift; + ret = __iommu_clear_dirty_log(domain, riova, rsize, bitmap, + base_iova, bitmap_pgshift); + if (ret) + break; + } + + if (flush) + iommu_flush_iotlb_all(domain); +out: + mutex_unlock(&domain->switch_log_lock); + return ret; +} +EXPORT_SYMBOL_GPL(iommu_clear_dirty_log); + /** * iommu_get_resv_regions - get reserved regions * @dev: device for which to get reserved regions diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 56a366e4e60e..5413160ebbfb 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -111,6 +111,8 @@ struct iommu_domain { int users; }; }; + bool dirty_log_tracking; + struct mutex switch_log_lock; };
static inline bool iommu_is_dma_domain(struct iommu_domain *domain) @@ -243,6 +245,10 @@ struct iommu_iotlb_gather { * is to support old IOMMU drivers, new drivers should use * default domains, and the common IOMMU DMA ops. * @device_group: find iommu group for a particular device + * @support_dirty_log: Check whether domain supports dirty log tracking + * @switch_dirty_log: Perform actions to start|stop dirty log tracking + * @sync_dirty_log: Sync dirty log from IOMMU into a dirty bitmap + * @clear_dirty_log: Clear dirty log of IOMMU by a mask bitmap * @get_resv_regions: Request list of reserved regions for a device * @of_xlate: add OF master IDs to iommu grouping * @is_attach_deferred: Check if domain attach should be deferred from iommu @@ -274,6 +280,22 @@ struct iommu_ops { void (*set_platform_dma_ops)(struct device *dev); struct iommu_group *(*device_group)(struct device *dev);
+ /* + * Track dirty log. Note: Don't concurrently call these interfaces with + * other ops that access underlying page table. + */ + bool (*support_dirty_log)(struct iommu_domain *domain); + int (*switch_dirty_log)(struct iommu_domain *domain, bool enable, + unsigned long iova, size_t size, int prot); + int (*sync_dirty_log)(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long *bitmap, unsigned long base_iova, + unsigned long bitmap_pgshift); + int (*clear_dirty_log)(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long *bitmap, unsigned long base_iova, + unsigned long bitmap_pgshift); + /* Request/Free a list of reserved regions for a device */ void (*get_resv_regions)(struct device *dev, struct list_head *list);
@@ -538,6 +560,18 @@ int iommu_enable_nesting(struct iommu_domain *domain); int iommu_set_pgtable_quirks(struct iommu_domain *domain, unsigned long quirks);
+extern bool iommu_support_dirty_log(struct iommu_domain *domain); +extern int iommu_switch_dirty_log(struct iommu_domain *domain, bool enable, + unsigned long iova, size_t size, int prot); +extern int iommu_sync_dirty_log(struct iommu_domain *domain, unsigned long iova, + size_t size, unsigned long *bitmap, + unsigned long base_iova, + unsigned long bitmap_pgshift); +extern int iommu_clear_dirty_log(struct iommu_domain *domain, unsigned long iova, + size_t dma_size, unsigned long *bitmap, + unsigned long base_iova, + unsigned long bitmap_pgshift); + void iommu_set_dma_strict(void);
extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev, @@ -945,7 +979,39 @@ static inline int iommu_set_pgtable_quirks(struct iommu_domain *domain, return 0; }
-static inline int iommu_device_register(struct iommu_device *iommu) +static inline bool iommu_support_dirty_log(struct iommu_domain *domain) +{ + return false; +} + +static inline int iommu_switch_dirty_log(struct iommu_domain *domain, + bool enable, unsigned long iova, + size_t size, int prot) +{ + return -EINVAL; +} + +static inline int iommu_sync_dirty_log(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long *bitmap, + unsigned long base_iova, + unsigned long pgshift) +{ + return -EINVAL; +} + +static inline int iommu_clear_dirty_log(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long *bitmap, + unsigned long base_iova, + unsigned long pgshift) +{ + return -EINVAL; +} + +static inline int iommu_device_register(struct iommu_device *iommu, + const struct iommu_ops *ops, + struct device *hwdev) { return -ENODEV; } diff --git a/include/trace/events/iommu.h b/include/trace/events/iommu.h index 70743db1fb75..231e50f111ca 100644 --- a/include/trace/events/iommu.h +++ b/include/trace/events/iommu.h @@ -124,6 +124,69 @@ TRACE_EVENT(unmap, ) );
+TRACE_EVENT(switch_dirty_log, + + TP_PROTO(unsigned long iova, size_t size, bool enable), + + TP_ARGS(iova, size, enable), + + TP_STRUCT__entry( + __field(u64, iova) + __field(size_t, size) + __field(bool, enable) + ), + + TP_fast_assign( + __entry->iova = iova; + __entry->size = size; + __entry->enable = enable; + ), + + TP_printk("IOMMU: iova=0x%016llx size=%zu enable=%u", + __entry->iova, __entry->size, __entry->enable + ) +); + +TRACE_EVENT(sync_dirty_log, + + TP_PROTO(unsigned long iova, size_t size), + + TP_ARGS(iova, size), + + TP_STRUCT__entry( + __field(u64, iova) + __field(size_t, size) + ), + + TP_fast_assign( + __entry->iova = iova; + __entry->size = size; + ), + + TP_printk("IOMMU: iova=0x%016llx size=%zu", __entry->iova, + __entry->size) +); + +TRACE_EVENT(clear_dirty_log, + + TP_PROTO(unsigned long iova, size_t size), + + TP_ARGS(iova, size), + + TP_STRUCT__entry( + __field(u64, iova) + __field(size_t, size) + ), + + TP_fast_assign( + __entry->iova = iova; + __entry->size = size; + ), + + TP_printk("IOMMU: iova=0x%016llx size=%zu", __entry->iova, + __entry->size) +); + DECLARE_EVENT_CLASS(iommu_error,
TP_PROTO(struct device *dev, unsigned long iova, int flags),
From: Kunkun Jiang jiangkunkun@huawei.com
virt inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I573P1 CVE: NA
------------------------------
Some parameters in iommu_clear_dirty_log/vfio_iova_dirty_log_clear have uncorrect type, which used in bitmap operations. In some cases, it will cause data overflow.
Fixes: bbf3b39e5576b (iommu: Introduce dirty log tracking framework) Fixes: f1c9b9fa4fbc0 (vfio/iommu_type1: Add support for manual dirty log clear) Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com Reviewed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/iommu/iommu.c | 7 +++---- drivers/vfio/vfio_iommu_type1.c | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 4cd140bb3175..40912af6c304 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2910,9 +2910,8 @@ int iommu_clear_dirty_log(struct iommu_domain *domain, unsigned long bitmap_pgshift) { unsigned long riova, rsize; - unsigned int min_pagesz; + unsigned int min_pagesz, rs, re; bool flush = false; - int rs, re, start, end; int ret = 0;
min_pagesz = 1 << __ffs(domain->pgsize_bitmap); @@ -2932,8 +2931,8 @@ int iommu_clear_dirty_log(struct iommu_domain *domain, end = start + (size >> bitmap_pgshift); bitmap_for_each_set_region(bitmap, rs, re, start, end) { flush = true; - riova = base_iova + (rs << bitmap_pgshift); - rsize = (re - rs) << bitmap_pgshift; + riova = base_iova + ((unsigned long)rs << bitmap_pgshift); + rsize = (unsigned long)(re - rs) << bitmap_pgshift; ret = __iommu_clear_dirty_log(domain, riova, rsize, bitmap, base_iova, bitmap_pgshift); if (ret) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 88081c22e1c3..b7c9f4f3b796 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1210,7 +1210,7 @@ static int vfio_iova_dirty_log_clear(u64 __user *bitmap, unsigned long bitmap_size; unsigned long *bitmap_buffer = NULL; bool clear_valid; - int rs, re, start, end, dma_offset; + unsigned int rs, re, dma_offset; int ret = 0;
bitmap_size = DIRTY_BITMAP_BYTES(size >> pgshift); @@ -1242,7 +1242,7 @@ static int vfio_iova_dirty_log_clear(u64 __user *bitmap, end = (end_iova - iova) >> pgshift; bitmap_for_each_set_region(bitmap_buffer, rs, re, start, end) { clear_valid = true; - riova = iova + (rs << pgshift); + riova = iova + ((unsigned long)rs << pgshift); dma_offset = (riova - dma->iova) >> pgshift; bitmap_clear(dma->bitmap, dma_offset, re - rs); }
conflit with 89d5b9601f70b72f524fdb7bc6cf1b1e8e20e867 pass phy as virt as origin
Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/iommu/io-pgtable-arm.c | 2 +- drivers/iommu/iommu.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 404c87422338..4e5b1f4dd1a8 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -760,7 +760,7 @@ static size_t arm_lpae_do_split_blk(struct arm_lpae_io_pgtable *data, io_pgtable_tlb_flush_walk(&data->iop, iova, size, size); } /* Race does not exist */ - pte = arm_lpae_install_table(tablep, ptep, blk_pte, cfg); + pte = arm_lpae_install_table(tablep, ptep, blk_pte, data);
/* Have splited it into page? */ if (lvl == (ARM_LPAE_MAX_LEVELS - 1)) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 40912af6c304..0ebea4808212 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2789,7 +2789,7 @@ int iommu_switch_dirty_log(struct iommu_domain *domain, bool enable, enable ? "enable" : "disable", iova, size);
while (size) { - size_t pgsize = iommu_pgsize(domain, iova, size); + size_t pgsize = iommu_pgsize(domain, iova, iova, size, NULL);
flush = true; ret = ops->switch_dirty_log(domain, enable, iova, pgsize, prot); @@ -2845,7 +2845,7 @@ int iommu_sync_dirty_log(struct iommu_domain *domain, unsigned long iova, pr_debug("sync_dirty_log for: iova 0x%lx size 0x%zx\n", iova, size);
while (size) { - size_t pgsize = iommu_pgsize(domain, iova, size); + size_t pgsize = iommu_pgsize(domain, iova, iova, size, NULL);
ret = ops->sync_dirty_log(domain, iova, pgsize, bitmap, base_iova, bitmap_pgshift); @@ -2884,7 +2884,7 @@ static int __iommu_clear_dirty_log(struct iommu_domain *domain, pr_debug("clear_dirty_log for: iova 0x%lx size 0x%zx\n", iova, size);
while (size) { - size_t pgsize = iommu_pgsize(domain, iova, size); + size_t pgsize = iommu_pgsize(domain, iova, iova, size, NULL);
ret = ops->clear_dirty_log(domain, iova, pgsize, bitmap, base_iova, bitmap_pgshift);
Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/iommu/io-pgtable-arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 4e5b1f4dd1a8..1372e4c9ddcc 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -250,7 +250,7 @@ static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte, *ptep = pte;
if (!cfg->coherent_walk) - __arm_lpae_sync_pte(ptep, cfg); + __arm_lpae_sync_pte(ptep, 1, cfg); }
static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg)
Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 8 ++--- drivers/iommu/iommu.c | 13 ++++--- drivers/vfio/vfio_iommu_type1.c | 5 ++- include/linux/iommu.h | 39 +++++++++++---------- 4 files changed, 32 insertions(+), 33 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index f024093e5e4f..326bd2ed5541 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3101,10 +3101,6 @@ static struct iommu_ops arm_smmu_ops = { .probe_device = arm_smmu_probe_device, .release_device = arm_smmu_release_device, .device_group = arm_smmu_device_group, - .support_dirty_log = arm_smmu_support_dirty_log, - .switch_dirty_log = arm_smmu_switch_dirty_log, - .sync_dirty_log = arm_smmu_sync_dirty_log, - .clear_dirty_log = arm_smmu_clear_dirty_log, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, .remove_dev_pasid = arm_smmu_remove_dev_pasid, @@ -3125,6 +3121,10 @@ static struct iommu_ops arm_smmu_ops = { #endif .iova_to_phys = arm_smmu_iova_to_phys, .enable_nesting = arm_smmu_enable_nesting, + .support_dirty_log = arm_smmu_support_dirty_log, + .switch_dirty_log = arm_smmu_switch_dirty_log, + .sync_dirty_log = arm_smmu_sync_dirty_log, + .clear_dirty_log = arm_smmu_clear_dirty_log, .free = arm_smmu_domain_free, } }; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 0ebea4808212..dbae8a71c0f0 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2750,7 +2750,7 @@ EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks);
bool iommu_support_dirty_log(struct iommu_domain *domain) { - const struct iommu_ops *ops = domain->ops; + const struct iommu_domain_ops *ops = domain->ops;
return ops->support_dirty_log && ops->support_dirty_log(domain); } @@ -2759,7 +2759,7 @@ EXPORT_SYMBOL_GPL(iommu_support_dirty_log); int iommu_switch_dirty_log(struct iommu_domain *domain, bool enable, unsigned long iova, size_t size, int prot) { - const struct iommu_ops *ops = domain->ops; + const struct iommu_domain_ops *ops = domain->ops; unsigned long orig_iova = iova; unsigned int min_pagesz; size_t orig_size = size; @@ -2820,7 +2820,7 @@ int iommu_sync_dirty_log(struct iommu_domain *domain, unsigned long iova, size_t size, unsigned long *bitmap, unsigned long base_iova, unsigned long bitmap_pgshift) { - const struct iommu_ops *ops = domain->ops; + const struct iommu_domain_ops *ops = domain->ops; unsigned long orig_iova = iova; unsigned int min_pagesz; size_t orig_size = size; @@ -2873,7 +2873,7 @@ static int __iommu_clear_dirty_log(struct iommu_domain *domain, unsigned long base_iova, unsigned long bitmap_pgshift) { - const struct iommu_ops *ops = domain->ops; + const struct iommu_domain_ops *ops = domain->ops; unsigned long orig_iova = iova; size_t orig_size = size; int ret = 0; @@ -2927,9 +2927,8 @@ int iommu_clear_dirty_log(struct iommu_domain *domain, goto out; }
- start = (iova - base_iova) >> bitmap_pgshift; - end = start + (size >> bitmap_pgshift); - bitmap_for_each_set_region(bitmap, rs, re, start, end) { + rs = (iova - base_iova) >> bitmap_pgshift; + for_each_set_bitrange_from(rs, re, bitmap, (size >> bitmap_pgshift)) { flush = true; riova = base_iova + ((unsigned long)rs << bitmap_pgshift); rsize = (unsigned long)(re - rs) << bitmap_pgshift; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index b7c9f4f3b796..cb7957a6dcb4 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1238,9 +1238,8 @@ static int vfio_iova_dirty_log_clear(u64 __user *bitmap, /* Similar logic as the tail of vfio_iova_dirty_bitmap */
clear_valid = false; - start = (start_iova - iova) >> pgshift; - end = (end_iova - iova) >> pgshift; - bitmap_for_each_set_region(bitmap_buffer, rs, re, start, end) { + rs = (start_iova - iova) >> pgshift; + for_each_set_bitrange(rs, re, bitmap_buffer, (end_iova - iova) >> pgshift) { clear_valid = true; riova = iova + ((unsigned long)rs << pgshift); dma_offset = (riova - dma->iova) >> pgshift; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 5413160ebbfb..45734fd9cfd4 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -245,10 +245,6 @@ struct iommu_iotlb_gather { * is to support old IOMMU drivers, new drivers should use * default domains, and the common IOMMU DMA ops. * @device_group: find iommu group for a particular device - * @support_dirty_log: Check whether domain supports dirty log tracking - * @switch_dirty_log: Perform actions to start|stop dirty log tracking - * @sync_dirty_log: Sync dirty log from IOMMU into a dirty bitmap - * @clear_dirty_log: Clear dirty log of IOMMU by a mask bitmap * @get_resv_regions: Request list of reserved regions for a device * @of_xlate: add OF master IDs to iommu grouping * @is_attach_deferred: Check if domain attach should be deferred from iommu @@ -280,21 +276,6 @@ struct iommu_ops { void (*set_platform_dma_ops)(struct device *dev); struct iommu_group *(*device_group)(struct device *dev);
- /* - * Track dirty log. Note: Don't concurrently call these interfaces with - * other ops that access underlying page table. - */ - bool (*support_dirty_log)(struct iommu_domain *domain); - int (*switch_dirty_log)(struct iommu_domain *domain, bool enable, - unsigned long iova, size_t size, int prot); - int (*sync_dirty_log)(struct iommu_domain *domain, - unsigned long iova, size_t size, - unsigned long *bitmap, unsigned long base_iova, - unsigned long bitmap_pgshift); - int (*clear_dirty_log)(struct iommu_domain *domain, - unsigned long iova, size_t size, - unsigned long *bitmap, unsigned long base_iova, - unsigned long bitmap_pgshift);
/* Request/Free a list of reserved regions for a device */ void (*get_resv_regions)(struct device *dev, struct list_head *list); @@ -349,6 +330,10 @@ struct iommu_ops { * specific mechanisms. * @enable_nesting: Enable nesting * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*) + * @support_dirty_log: Check whether domain supports dirty log tracking + * @switch_dirty_log: Perform actions to start|stop dirty log tracking + * @sync_dirty_log: Sync dirty log from IOMMU into a dirty bitmap + * @clear_dirty_log: Clear dirty log of IOMMU by a mask bitmap * @free: Release the domain after use. */ struct iommu_domain_ops { @@ -381,6 +366,22 @@ struct iommu_domain_ops { int (*set_pgtable_quirks)(struct iommu_domain *domain, unsigned long quirks);
+ /* + * Track dirty log. Note: Don't concurrently call these interfaces with + * other ops that access underlying page table. + */ + bool (*support_dirty_log)(struct iommu_domain *domain); + int (*switch_dirty_log)(struct iommu_domain *domain, bool enable, + unsigned long iova, size_t size, int prot); + int (*sync_dirty_log)(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long *bitmap, unsigned long base_iova, + unsigned long bitmap_pgshift); + int (*clear_dirty_log)(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long *bitmap, unsigned long base_iova, + unsigned long bitmap_pgshift); + void (*free)(struct iommu_domain *domain); };
From: Keqian Zhu zhukeqian1@huawei.com
virt inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4WK5B CVE: NA
------------------------------
When a newly created iommu_domain is not HWDBM capable, the vfio_iommu's num_non_hwdbm_domains field should increase but failed to increase. This fixes this bug.
Fixes: d2373c56459f (vfio/iommu_type1: Add HWDBM status maintenance) Signed-off-by: Keqian Zhu zhukeqian1@huawei.com Tested-by: Kunkun Jiang jiangkunkun@huawei.com Reviewed-by: Kunkun Jiang jiangkunkun@huawei.com Reviewed-by: Zenghui Yu yuzenghui@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/vfio/vfio_iommu_type1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index cb7957a6dcb4..11f44270f4ff 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -2349,7 +2349,7 @@ static void vfio_iommu_update_hwdbm(struct vfio_iommu *iommu, bool num_non_hwdbm_zeroed = false; bool log_enabled, should_enable;
- if (old_hwdbm && !new_hwdbm && attach) { + if ((old_hwdbm || singular) && !new_hwdbm && attach) { iommu->num_non_hwdbm_domains++; } else if (!old_hwdbm && new_hwdbm && !attach) { iommu->num_non_hwdbm_domains--;
From: Keqian Zhu zhukeqian1@huawei.com
virt inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4WK5B CVE: NA
------------------------------
The iommu_domain may contain more than one DMA range which can be dirty log tracked separately, so it's hard to track the dirty log status of iommu_domain. The upper layer (e.g. vfio) should make sure it's doing right thing.
Fixes: bbf3b39e5576 (iommu: Introduce dirty log tracking framework) Signed-off-by: Keqian Zhu zhukeqian1@huawei.com Tested-by: Kunkun Jiang jiangkunkun@huawei.com Reviewed-by: Kunkun Jiang jiangkunkun@huawei.com Reviewed-by: Zenghui Yu yuzenghui@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: jiaqingtong jiaqingtong@huawei.com --- drivers/iommu/iommu.c | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index dbae8a71c0f0..32f0f6c461b3 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2777,13 +2777,6 @@ int iommu_switch_dirty_log(struct iommu_domain *domain, bool enable, }
mutex_lock(&domain->switch_log_lock); - if (enable && domain->dirty_log_tracking) { - ret = -EBUSY; - goto out; - } else if (!enable && !domain->dirty_log_tracking) { - ret = -EINVAL; - goto out; - }
pr_debug("switch_dirty_log %s for: iova 0x%lx size 0x%zx\n", enable ? "enable" : "disable", iova, size); @@ -2806,11 +2799,9 @@ int iommu_switch_dirty_log(struct iommu_domain *domain, bool enable, if (flush) iommu_flush_iotlb_all(domain);
- if (!ret) { - domain->dirty_log_tracking = enable; + if (!ret) trace_switch_dirty_log(orig_iova, orig_size, enable); - } -out: + mutex_unlock(&domain->switch_log_lock); return ret; } @@ -2837,10 +2828,6 @@ int iommu_sync_dirty_log(struct iommu_domain *domain, unsigned long iova, }
mutex_lock(&domain->switch_log_lock); - if (!domain->dirty_log_tracking) { - ret = -EINVAL; - goto out; - }
pr_debug("sync_dirty_log for: iova 0x%lx size 0x%zx\n", iova, size);
@@ -2861,7 +2848,7 @@ int iommu_sync_dirty_log(struct iommu_domain *domain, unsigned long iova,
if (!ret) trace_sync_dirty_log(orig_iova, orig_size); -out: + mutex_unlock(&domain->switch_log_lock); return ret; } @@ -2922,10 +2909,6 @@ int iommu_clear_dirty_log(struct iommu_domain *domain, }
mutex_lock(&domain->switch_log_lock); - if (!domain->dirty_log_tracking) { - ret = -EINVAL; - goto out; - }
rs = (iova - base_iova) >> bitmap_pgshift; for_each_set_bitrange_from(rs, re, bitmap, (size >> bitmap_pgshift)) { @@ -2940,7 +2923,7 @@ int iommu_clear_dirty_log(struct iommu_domain *domain,
if (flush) iommu_flush_iotlb_all(domain); -out: + mutex_unlock(&domain->switch_log_lock); return ret; }