Make the rache depot scale better. The origin depot size is fixed, may cause problem in some condition.
Add another patch to fix smmu problem.
v2->v3: - Add a smmu bugfix patch
v1->v2: - Change the commit messge
Robin Murphy (2): iommu/iova: Make the rcache depot scale better iommu/iova: Manage the depot list size
Wenkai Lin (1): iommu/arm-smmu-v3: disable stall for quiet_cd
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 3 + drivers/iommu/iova.c | 95 ++++++++++++++------- 2 files changed, 68 insertions(+), 30 deletions(-)
From: Robin Murphy robin.murphy@arm.com
mainline inclusion from mainline-v6.7-rc1 commit 911aa1245da83ff5e76d33bb612d8b5a3f2ec4a5 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8PJ8L
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------------
The algorithm in the original paper specifies the storage of full magazines in the depot as an unbounded list rather than a fixed-size array. It turns out to be pretty straightforward to do this in our implementation with no significant loss of efficiency. This allows the depot to scale up to the working set sizes of larger systems, while also potentially saving some memory on smaller ones too.
Since this involves touching struct iova_magazine with the requisite care, we may as well reinforce the comment with a proper assertion too.
Reviewed-by: John Garry john.g.garry@oracle.com Reviewed-by: Jerry Snitselaar jsnitsel@redhat.com Signed-off-by: Robin Murphy robin.murphy@arm.com Link: https://lore.kernel.org/r/f597aa72fc3e1d315bc4574af0ce0ebe5c31cd22.169453558... Signed-off-by: Joerg Roedel jroedel@suse.de Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- drivers/iommu/iova.c | 65 ++++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 30 deletions(-)
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 10b964600948..dd2309e9a6c5 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -622,15 +622,19 @@ EXPORT_SYMBOL_GPL(reserve_iova); /* * As kmalloc's buffer size is fixed to power of 2, 127 is chosen to * assure size of 'iova_magazine' to be 1024 bytes, so that no memory - * will be wasted. + * will be wasted. Since only full magazines are inserted into the depot, + * we don't need to waste PFN capacity on a separate list head either. */ #define IOVA_MAG_SIZE 127 -#define MAX_GLOBAL_MAGS 32 /* magazines per bin */
struct iova_magazine { - unsigned long size; + union { + unsigned long size; + struct iova_magazine *next; + }; unsigned long pfns[IOVA_MAG_SIZE]; }; +static_assert(!(sizeof(struct iova_magazine) & (sizeof(struct iova_magazine) - 1)));
struct iova_cpu_rcache { spinlock_t lock; @@ -640,8 +644,7 @@ struct iova_cpu_rcache {
struct iova_rcache { spinlock_t lock; - unsigned long depot_size; - struct iova_magazine *depot[MAX_GLOBAL_MAGS]; + struct iova_magazine *depot; struct iova_cpu_rcache __percpu *cpu_rcaches; };
@@ -717,6 +720,21 @@ static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn) mag->pfns[mag->size++] = pfn; }
+static struct iova_magazine *iova_depot_pop(struct iova_rcache *rcache) +{ + struct iova_magazine *mag = rcache->depot; + + rcache->depot = mag->next; + mag->size = IOVA_MAG_SIZE; + return mag; +} + +static void iova_depot_push(struct iova_rcache *rcache, struct iova_magazine *mag) +{ + mag->next = rcache->depot; + rcache->depot = mag; +} + int iova_domain_init_rcaches(struct iova_domain *iovad) { unsigned int cpu; @@ -734,7 +752,6 @@ int iova_domain_init_rcaches(struct iova_domain *iovad)
rcache = &iovad->rcaches[i]; spin_lock_init(&rcache->lock); - rcache->depot_size = 0; rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size()); if (!rcache->cpu_rcaches) { @@ -776,7 +793,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, struct iova_rcache *rcache, unsigned long iova_pfn) { - struct iova_magazine *mag_to_free = NULL; struct iova_cpu_rcache *cpu_rcache; bool can_insert = false; unsigned long flags; @@ -794,12 +810,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
if (new_mag) { spin_lock(&rcache->lock); - if (rcache->depot_size < MAX_GLOBAL_MAGS) { - rcache->depot[rcache->depot_size++] = - cpu_rcache->loaded; - } else { - mag_to_free = cpu_rcache->loaded; - } + iova_depot_push(rcache, cpu_rcache->loaded); spin_unlock(&rcache->lock);
cpu_rcache->loaded = new_mag; @@ -812,11 +823,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
spin_unlock_irqrestore(&cpu_rcache->lock, flags);
- if (mag_to_free) { - iova_magazine_free_pfns(mag_to_free, iovad); - iova_magazine_free(mag_to_free); - } - return can_insert; }
@@ -854,9 +860,9 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache, has_pfn = true; } else { spin_lock(&rcache->lock); - if (rcache->depot_size > 0) { + if (rcache->depot) { iova_magazine_free(cpu_rcache->loaded); - cpu_rcache->loaded = rcache->depot[--rcache->depot_size]; + cpu_rcache->loaded = iova_depot_pop(rcache); has_pfn = true; } spin_unlock(&rcache->lock); @@ -895,9 +901,8 @@ static void free_iova_rcaches(struct iova_domain *iovad) struct iova_rcache *rcache; struct iova_cpu_rcache *cpu_rcache; unsigned int cpu; - int i, j;
- for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { + for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { rcache = &iovad->rcaches[i]; if (!rcache->cpu_rcaches) break; @@ -907,8 +912,8 @@ static void free_iova_rcaches(struct iova_domain *iovad) iova_magazine_free(cpu_rcache->prev); } free_percpu(rcache->cpu_rcaches); - for (j = 0; j < rcache->depot_size; ++j) - iova_magazine_free(rcache->depot[j]); + while (rcache->depot) + iova_magazine_free(iova_depot_pop(rcache)); }
kfree(iovad->rcaches); @@ -942,16 +947,16 @@ static void free_global_cached_iovas(struct iova_domain *iovad) { struct iova_rcache *rcache; unsigned long flags; - int i, j;
- for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { + for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { rcache = &iovad->rcaches[i]; spin_lock_irqsave(&rcache->lock, flags); - for (j = 0; j < rcache->depot_size; ++j) { - iova_magazine_free_pfns(rcache->depot[j], iovad); - iova_magazine_free(rcache->depot[j]); + while (rcache->depot) { + struct iova_magazine *mag = iova_depot_pop(rcache); + + iova_magazine_free_pfns(mag, iovad); + iova_magazine_free(mag); } - rcache->depot_size = 0; spin_unlock_irqrestore(&rcache->lock, flags); } }
From: Robin Murphy robin.murphy@arm.com
mainline inclusion from mainline-v6.7-rc1 commit 233045378dbbc0a7346127d19a54d4f91e0bd855 category: bugfix bugzilla: bugzilla: https://gitee.com/openeuler/kernel/issues/I8PJ8L
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------------------------
Automatically scaling the depot up to suit the peak capacity of a workload is all well and good, but it would be nice to have a way to scale it back down again if the workload changes. To that end, add backround reclaim that will gradually free surplus magazines if the depot size remains above a reasonable threshold for long enough.
Reviewed-by: Jerry Snitselaar jsnitsel@redhat.com Signed-off-by: Robin Murphy robin.murphy@arm.com Link: https://lore.kernel.org/r/03170665c56d89c6ce6081246b47f68d4e483308.169453558... Signed-off-by: Joerg Roedel jroedel@suse.de Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- drivers/iommu/iova.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+)
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index dd2309e9a6c5..d30e453d0fb4 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -11,6 +11,7 @@ #include <linux/smp.h> #include <linux/bitops.h> #include <linux/cpu.h> +#include <linux/workqueue.h>
/* The anchor node sits above the top of the usable address space */ #define IOVA_ANCHOR ~0UL @@ -627,6 +628,8 @@ EXPORT_SYMBOL_GPL(reserve_iova); */ #define IOVA_MAG_SIZE 127
+#define IOVA_DEPOT_DELAY msecs_to_jiffies(100) + struct iova_magazine { union { unsigned long size; @@ -644,8 +647,11 @@ struct iova_cpu_rcache {
struct iova_rcache { spinlock_t lock; + unsigned int depot_size; struct iova_magazine *depot; struct iova_cpu_rcache __percpu *cpu_rcaches; + struct iova_domain *iovad; + struct delayed_work work; };
static struct iova_magazine *iova_magazine_alloc(gfp_t flags) @@ -726,6 +732,7 @@ static struct iova_magazine *iova_depot_pop(struct iova_rcache *rcache)
rcache->depot = mag->next; mag->size = IOVA_MAG_SIZE; + rcache->depot_size--; return mag; }
@@ -733,6 +740,25 @@ static void iova_depot_push(struct iova_rcache *rcache, struct iova_magazine *ma { mag->next = rcache->depot; rcache->depot = mag; + rcache->depot_size++; +} + +static void iova_depot_work_func(struct work_struct *work) +{ + struct iova_rcache *rcache = container_of(work, typeof(*rcache), work.work); + struct iova_magazine *mag = NULL; + unsigned long flags; + + spin_lock_irqsave(&rcache->lock, flags); + if (rcache->depot_size > num_online_cpus()) + mag = iova_depot_pop(rcache); + spin_unlock_irqrestore(&rcache->lock, flags); + + if (mag) { + iova_magazine_free_pfns(mag, rcache->iovad); + iova_magazine_free(mag); + schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY); + } }
int iova_domain_init_rcaches(struct iova_domain *iovad) @@ -752,6 +778,8 @@ int iova_domain_init_rcaches(struct iova_domain *iovad)
rcache = &iovad->rcaches[i]; spin_lock_init(&rcache->lock); + rcache->iovad = iovad; + INIT_DELAYED_WORK(&rcache->work, iova_depot_work_func); rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size()); if (!rcache->cpu_rcaches) { @@ -812,6 +840,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, spin_lock(&rcache->lock); iova_depot_push(rcache, cpu_rcache->loaded); spin_unlock(&rcache->lock); + schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY);
cpu_rcache->loaded = new_mag; can_insert = true; @@ -912,6 +941,7 @@ static void free_iova_rcaches(struct iova_domain *iovad) iova_magazine_free(cpu_rcache->prev); } free_percpu(rcache->cpu_rcaches); + cancel_delayed_work_sync(&rcache->work); while (rcache->depot) iova_magazine_free(iova_depot_pop(rcache)); }
From: Wenkai Lin linwenkai6@hisilicon.com
maillist inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8PSC6
Reference: https://patchwork.kernel.org/project/kvm/patch/20231206005727.46150-1-zhangf...
----------------------------------------------
In the stall model, invalid transactions were expected to be stalled and aborted by the IOPF handler.
However, when killing a test case with a huge amount of data, the accelerator streamline can not stop until all data is consumed even if the page fault handler reports errors. As a result, the kill may take a long time, about 10 seconds with numerous iopf interrupts.
So disable stall for quiet_cd in the non-force stall model, since force stall model (STALL_MODEL==0b10) requires CD.S must be 1.
Signed-off-by: Zhangfei Gao zhangfei.gao@linaro.org Signed-off-by: Wenkai Lin linwenkai6@hisilicon.com Suggested-by: Jean-Philippe Brucker jean-philippe@linaro.org Reviewed-by: Jason Gunthorpe jgg@nvidia.com Reviewed-by: Jean-Philippe Brucker jean-philippe@linaro.org Link: https://lore.kernel.org/r/20231206005727.46150-1-zhangfei.gao@linaro.org Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 4d417b4243eb..5055a66644af 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1070,6 +1070,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, u64 val; bool cd_live; __le64 *cdptr; + struct arm_smmu_device *smmu = smmu_domain->smmu;
if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax))) return -E2BIG; @@ -1084,6 +1085,8 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, if (!cd) { /* (5) */ val = 0; } else if (cd == &quiet_cd) { /* (4) */ + if (!(smmu->features & ARM_SMMU_FEAT_STALL_FORCE)) + val &= ~(CTXDESC_CD_0_S | CTXDESC_CD_0_R); val |= CTXDESC_CD_0_TCR_EPD0; } else if (cd_live) { /* (3) */ val &= ~CTXDESC_CD_0_ASID;
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/3486 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/7...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/3486 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/7...