In some scenarios in which is sensitive to the efficiency of allocating iova from iova_rcache. The commit 911aa1245da8 ("iommu/iova: Make the rcache depot scale better") change the format of depot from the fix length array to a list to make depot scale better for machines which has much cores. But the list operation is not as efficiency as the array operation, for some test case, such as IP tansfering, in which allocating iova from iova_rcache can become a bottle neck, this list operation can be reflected by a performance decrease. We can see the higher occupation of queue_iova() and __iova_rcache_get() in perf top.
Operating iova_rache->depot on a list:
14.47% [kernel] [k] queue_iova 12.99% [kernel] [k] __iova_rcache_get 8.22% [kernel] [k] __arm_lpae_unmap 6.98% [kernel] [k] arm_lpae_map 6.20% [kernel] [k] native_queued_spin_unlock 4.63% [kernel] [k] __arm_lpae_map 2.56% [kernel] [k] __dev_queue_xmit
Operating iova_rache->depot on a array:
10.50% [kernel] [k] __arm_lpae_unmap 7.80% [kernel] [k] native_queued_spin_unlock 7.23% [kernel] [k] queue_iova 7.16% [kernel] [k] arm_lpae_map 5.90% [kernel] [k] __arm_lpae_map 3.67% [kernel] [k] __iova_rcache_get 3.20% [kernel] [k] __dev_queue_xmit
To make the depot operation more efficient, we can introduce a array buffer before using the depot list, the length of the array will be set to NR_CPUS, which should be enough for most workloads. iova_rcache will first try to use magazines in the array part of the depot. If we have too much magazines which need to be put in the depot, then the depot list will be used.
Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- drivers/iommu/iova.c | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-)
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index d59d0ea2fd21..220256dde24f 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -563,6 +563,7 @@ EXPORT_SYMBOL_GPL(reserve_iova); * we don't need to waste PFN capacity on a separate list head either. */ #define IOVA_MAG_SIZE 127 +#define IOVA_DEPOT_POOL_LEN NR_CPUS
#define IOVA_DEPOT_DELAY msecs_to_jiffies(100)
@@ -581,10 +582,15 @@ struct iova_cpu_rcache { struct iova_magazine *prev; };
+struct iova_rcache_depot { + struct iova_magazine *pool[IOVA_DEPOT_POOL_LEN]; + struct iova_magazine *list; +}; + struct iova_rcache { spinlock_t lock; unsigned int depot_size; - struct iova_magazine *depot; + struct iova_rcache_depot *depot; struct iova_cpu_rcache __percpu *cpu_rcaches; struct iova_domain *iovad; struct delayed_work work; @@ -671,9 +677,14 @@ static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
static struct iova_magazine *iova_depot_pop(struct iova_rcache *rcache) { - struct iova_magazine *mag = rcache->depot; + struct iova_rcache_depot *depot = rcache->depot; + struct iova_magazine *mag;
- rcache->depot = mag->next; + if (rcache->depot_size <= IOVA_DEPOT_POOL_LEN) + return depot->pool[--rcache->depot_size]; + + mag = depot->list; + depot->list = mag->next; mag->size = IOVA_MAG_SIZE; rcache->depot_size--; return mag; @@ -681,8 +692,15 @@ static struct iova_magazine *iova_depot_pop(struct iova_rcache *rcache)
static void iova_depot_push(struct iova_rcache *rcache, struct iova_magazine *mag) { - mag->next = rcache->depot; - rcache->depot = mag; + struct iova_rcache_depot *depot = rcache->depot; + + if (rcache->depot_size < IOVA_DEPOT_POOL_LEN) { + depot->pool[rcache->depot_size++] = mag; + return; + } + + mag->next = depot->list; + depot->list = mag; rcache->depot_size++; }
@@ -729,6 +747,11 @@ int iova_domain_init_rcaches(struct iova_domain *iovad) ret = -ENOMEM; goto out_err; } + rcache->depot = kzalloc(sizeof(struct iova_rcache_depot), GFP_KERNEL); + if (!rcache->depot) { + ret = -ENOMEM; + goto out_err; + } for_each_possible_cpu(cpu) { cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
@@ -832,7 +855,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache, has_pfn = true; } else { spin_lock(&rcache->lock); - if (rcache->depot) { + if (rcache->depot_size > 0) { iova_magazine_free(cpu_rcache->loaded); cpu_rcache->loaded = iova_depot_pop(rcache); has_pfn = true; @@ -885,8 +908,9 @@ static void free_iova_rcaches(struct iova_domain *iovad) } free_percpu(rcache->cpu_rcaches); cancel_delayed_work_sync(&rcache->work); - while (rcache->depot) + while (rcache->depot_size > 0) iova_magazine_free(iova_depot_pop(rcache)); + kfree(rcache->depot); }
kfree(iovad->rcaches); @@ -924,7 +948,7 @@ static void free_global_cached_iovas(struct iova_domain *iovad) for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { rcache = &iovad->rcaches[i]; spin_lock_irqsave(&rcache->lock, flags); - while (rcache->depot) { + while (rcache->depot_size > 0) { struct iova_magazine *mag = iova_depot_pop(rcache);
iova_magazine_free_pfns(mag, iovad);