From: Shameer Kolothum shameerali.kolothum.thodi@huawei.com
mainline inclusion from mainline-v6.11-rc1 commit 4fe88fd8b4aecb7f9680bf898811db76b94095a9 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IB4WDJ CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
.read_and_clear_dirty() IOMMU domain op takes care of reading the dirty bits (i.e. PTE has DBM set and AP[2] clear) and marshalling into a bitmap of a given page size.
While reading the dirty bits we also set the PTE AP[2] bit to mark it as writeable-clean depending on read_and_clear_dirty() flags.
PTE states with respect to DBM bit:
DBM bit AP[2]("RDONLY" bit) 1. writable_clean 1 1 2. writable_dirty 1 0 3. read-only 0 1
Reviewed-by: Ryan Roberts ryan.roberts@arm.com Reviewed-by: Jason Gunthorpe jgg@nvidia.com Reviewed-by: Kevin Tian kevin.tian@intel.com Signed-off-by: Shameer Kolothum shameerali.kolothum.thodi@huawei.com Link: https://lore.kernel.org/r/20240703101604.2576-4-shameerali.kolothum.thodi@hu... Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com --- drivers/iommu/io-pgtable-arm.c | 116 +++++++++++++++++++++++++++++++-- 1 file changed, 111 insertions(+), 5 deletions(-)
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 07c04322bc58..5e89e572b890 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -87,7 +87,7 @@
#define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << 2) /* Ignore the contiguous bit for block splitting */ -#define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)13) << 51) +#define ARM_LPAE_PTE_ATTR_HI_MASK (ARM_LPAE_PTE_XN | ARM_LPAE_PTE_DBM) #define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK | \ ARM_LPAE_PTE_ATTR_HI_MASK) /* Software bit for solving coherency races */ @@ -95,7 +95,11 @@
/* Stage-1 PTE */ #define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6) -#define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)2) << 6) +#define ARM_LPAE_PTE_AP_RDONLY_BIT 7 +#define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)1) << \ + ARM_LPAE_PTE_AP_RDONLY_BIT) +#define ARM_LPAE_PTE_AP_WR_CLEAN_MASK (ARM_LPAE_PTE_AP_RDONLY | \ + ARM_LPAE_PTE_DBM) #define ARM_LPAE_PTE_ATTRINDX_SHIFT 2 #define ARM_LPAE_PTE_nG (((arm_lpae_iopte)1) << 11)
@@ -141,6 +145,12 @@
#define iopte_prot(pte) ((pte) & ARM_LPAE_PTE_ATTR_MASK)
+#define iopte_writeable_dirty(pte) \ + (((pte) & ARM_LPAE_PTE_AP_WR_CLEAN_MASK) == ARM_LPAE_PTE_DBM) + +#define iopte_set_writeable_clean(ptep) \ + set_bit(ARM_LPAE_PTE_AP_RDONLY_BIT, (unsigned long *)(ptep)) + struct arm_lpae_io_pgtable { struct io_pgtable iop;
@@ -162,6 +172,13 @@ static inline bool iopte_leaf(arm_lpae_iopte pte, int lvl, return iopte_type(pte) == ARM_LPAE_PTE_TYPE_BLOCK; }
+static inline bool iopte_table(arm_lpae_iopte pte, int lvl) +{ + if (lvl == (ARM_LPAE_MAX_LEVELS - 1)) + return false; + return iopte_type(pte) == ARM_LPAE_PTE_TYPE_TABLE; +} + static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr, struct arm_lpae_io_pgtable *data) { @@ -1044,9 +1061,6 @@ static int __arm_lpae_clear_dirty_log(struct arm_lpae_io_pgtable *data, size_t base, next_size; int nbits, ret, i;
- if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) - return -EINVAL; - ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); pte = READ_ONCE(*ptep); if (WARN_ON(!pte)) @@ -1123,6 +1137,97 @@ static int arm_lpae_clear_dirty_log(struct io_pgtable_ops *ops, bitmap, base_iova, bitmap_pgshift); }
+struct io_pgtable_walk_data { + struct iommu_dirty_bitmap *dirty; + unsigned long flags; + u64 addr; + const u64 end; +}; + +static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data, + struct io_pgtable_walk_data *walk_data, + arm_lpae_iopte *ptep, + int lvl); + +static int io_pgtable_visit_dirty(struct arm_lpae_io_pgtable *data, + struct io_pgtable_walk_data *walk_data, + arm_lpae_iopte *ptep, int lvl) +{ + struct io_pgtable *iop = &data->iop; + arm_lpae_iopte pte = READ_ONCE(*ptep); + + if (iopte_leaf(pte, lvl, iop->fmt)) { + size_t size = ARM_LPAE_BLOCK_SIZE(lvl, data); + + if (iopte_writeable_dirty(pte)) { + iommu_dirty_bitmap_record(walk_data->dirty, + walk_data->addr, size); + if (!(walk_data->flags & IOMMU_DIRTY_NO_CLEAR)) + iopte_set_writeable_clean(ptep); + } + walk_data->addr += size; + return 0; + } + + if (WARN_ON(!iopte_table(pte, lvl))) + return -EINVAL; + + ptep = iopte_deref(pte, data); + return __arm_lpae_iopte_walk_dirty(data, walk_data, ptep, lvl + 1); +} + +static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data, + struct io_pgtable_walk_data *walk_data, + arm_lpae_iopte *ptep, + int lvl) +{ + u32 idx; + int max_entries, ret; + + if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) + return -EINVAL; + + if (lvl == data->start_level) + max_entries = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte); + else + max_entries = ARM_LPAE_PTES_PER_TABLE(data); + + for (idx = ARM_LPAE_LVL_IDX(walk_data->addr, lvl, data); + (idx < max_entries) && (walk_data->addr < walk_data->end); ++idx) { + ret = io_pgtable_visit_dirty(data, walk_data, ptep + idx, lvl); + if (ret) + return ret; + } + + return 0; +} + +static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &data->iop.cfg; + struct io_pgtable_walk_data walk_data = { + .dirty = dirty, + .flags = flags, + .addr = iova, + .end = iova + size, + }; + arm_lpae_iopte *ptep = data->pgd; + int lvl = data->start_level; + + if (WARN_ON(!size)) + return -EINVAL; + if (WARN_ON((iova + size - 1) & ~(BIT(cfg->ias) - 1))) + return -EINVAL; + if (data->iop.fmt != ARM_64_LPAE_S1) + return -EINVAL; + + return __arm_lpae_iopte_walk_dirty(data, &walk_data, ptep, lvl); +} + static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg) { unsigned long granule, page_sizes; @@ -1205,6 +1310,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) .merge_page = arm_lpae_merge_page, .sync_dirty_log = arm_lpae_sync_dirty_log, .clear_dirty_log = arm_lpae_clear_dirty_log, + .read_and_clear_dirty = arm_lpae_read_and_clear_dirty, };
return data;