From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: NA CVE: NA
-------------------------------------------------
Provide a free area cache for the share pool VA allocator, based on the algorithm used by linux mainline commit 89699605fe7c.
This reduces the number of rbtree operations and linear traversals over the share pool extents in order to find a free area, by starting off at the last point that a free area was found.
The free area cache is reset if areas are freed behind it, or if we are searching for an another area (such as DVPP 16G area) than last time. So allocation patterns are not changed.
After this patch, the search will start from where it left off, giving closer to an amortized O(1).
Test environment: Huawei 1951 DC (8 CPU cores) with 21G memory, no load.
Test method: A single thread process first call sp_alloc() to allocate a specified number of 2M hugepages, then we calculate the allocation time when sp_alloc() another 2M hugepage. The results are in microsecond.
test 1, first sp_alloc() 256 2M-hugepage, total 512M test 2, first sp_alloc() 512 2M-hugepage, total 1G test 3, first sp_alloc() 1024 2M-hugepage, total 2G test 4, first sp_alloc() 1536 2M-hugepage, total 3G test 5, first sp_alloc() 2048 2M-hugepage, total 4G test 6, first sp_alloc() 4096 2M-hugepage, total 8G test 7, first sp_alloc() 6072 2M-hugepage, total 12G test 8, first sp_alloc() 8192 2M-hugepage, total 16G
test1 test2 test3 test4 231 238 240 252 279 253 315 268 242 238 247 253 282 255 326 265 233 234 250 243 272 251 314 258 239 224 245 246 273 261 324 262 234 233 252 257 277 262 326 265 225 231 243 243 279 249 325 264 236 261 246 248 265 262 323 266 233 238 247 246 281 259 331 265 239 222 243 241 270 248 325 263 241 231 239 241 335 246 321 268 avg: 235.3 235 245.2 247 281.3 254.6 323 264.4 res: - - 9.49% 18.14%
test5 test6 test7 test8 371 280 720 458 1001 629 1547 909 369 283 691 465 1005 718 1533 903 374 279 954 470 1003 680 1371 908 363 279 697 457 1004 923 1375 930 369 286 711 464 1016 683 1395 1083 382 280 967 491 1029 695 1413 1096 378 284 688 823 1008 689 1419 905 376 360 921 469 1285 696 1554 1085 374 287 896 485 1030 682 1381 902 380 276 706 545 1286 717 1606 1097 avg: 373.6 289.4 791.5 512.7 1066.7 717.5 1459.4 981.8 res: 22.54% 35.52% 32.74% 32.73%
Suggested-by: Zefan Li lizefan@huawei.com Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 104 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 75 insertions(+), 29 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 24c5dd680451..b9d30d36b7c2 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -730,6 +730,11 @@ static void __insert_sp_area(struct sp_area *spa) rb_insert_color(&spa->rb_node, &sp_area_root); }
+/* The sp_area cache globals are protected by sp_area_lock */ +static struct rb_node *free_sp_area_cache; +static unsigned long cached_hole_size; +static unsigned long cached_vstart; /* affected by SP_DVPP and sp_config_dvpp_range() */ + /* * Allocate a region of VA from the share pool. * @size - the size of VA to allocate @@ -741,7 +746,7 @@ static void __insert_sp_area(struct sp_area *spa) static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, struct sp_group *spg, enum spa_type type) { - struct sp_area *spa, *err; + struct sp_area *spa, *first, *err; struct rb_node *n; unsigned long vstart = MMAP_SHARE_POOL_START; unsigned long vend = MMAP_SHARE_POOL_16G_START; @@ -763,8 +768,6 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, } }
- addr = vstart; - spa = kmalloc(sizeof(struct sp_area), GFP_KERNEL); if (unlikely(!spa)) { if (printk_ratelimit()) @@ -774,45 +777,75 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags,
spin_lock(&sp_area_lock);
- n = sp_area_root.rb_node; - if (n) { - struct sp_area *first = NULL; + /* + * Invalidate cache if we have more permissive parameters. + * cached_hole_size notes the largest hole noticed _below_ + * the sp_area cached in free_sp_area_cache: if size fits + * into that hole, we want to scan from vstart to reuse + * the hole instead of allocating above free_sp_area_cache. + * Note that sp_free_area may update free_sp_area_cache + * without updating cached_hole_size. + */ + if (!free_sp_area_cache || size_align < cached_hole_size || + vstart != cached_vstart) { + cached_hole_size = 0; + free_sp_area_cache = NULL; + } + + /* record if we encounter less permissive parameters */ + cached_vstart = vstart; + + /* find starting point for our search */ + if (free_sp_area_cache) { + first = rb_entry(free_sp_area_cache, struct sp_area, rb_node); + addr = first->va_end; + if (addr + size_align < addr) { + err = ERR_PTR(-EOVERFLOW); + goto error; + } + } else { + addr = vstart; + if (addr + size_align < addr) { + err = ERR_PTR(-EOVERFLOW); + goto error; + } + + n = sp_area_root.rb_node; + first = NULL;
- do { + while (n) { struct sp_area *tmp; tmp = rb_entry(n, struct sp_area, rb_node); if (tmp->va_end >= addr) { - if (!first && tmp->va_start < addr + size_align) - first = tmp; - n = n->rb_left; - } else { first = tmp; + if (tmp->va_start <= addr) + break; + n = n->rb_left; + } else n = n->rb_right; - } - } while (n); + }
if (!first) goto found; + }
- if (first->va_end < addr) { - n = rb_next(&first->rb_node); - if (n) - first = rb_entry(n, struct sp_area, rb_node); - else - goto found; + /* from the starting point, traverse areas until a suitable hole is found */ + while (addr + size_align > first->va_start && addr + size_align <= vend) { + if (addr + cached_hole_size < first->va_start) + cached_hole_size = first->va_start - addr; + addr = first->va_end; + if (addr + size_align < addr) { + err = ERR_PTR(-EOVERFLOW); + goto error; }
- while (addr + size_align >= first->va_start && - addr + size_align <= vend) { - addr = first->va_end; - - n = rb_next(&first->rb_node); - if (n) - first = rb_entry(n, struct sp_area, rb_node); - else - goto found; - } + n = rb_next(&first->rb_node); + if (n) + first = rb_entry(n, struct sp_area, rb_node); + else + goto found; } + found: if (addr + size_align > vend) { err = ERR_PTR(-EOVERFLOW); @@ -833,6 +866,7 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, }
__insert_sp_area(spa); + free_sp_area_cache = &spa->rb_node; if (spa->spg) { atomic_inc(&spg->spa_num); atomic64_add(size, &spg->size); @@ -889,6 +923,18 @@ static void sp_free_area(struct sp_area *spa) { lockdep_assert_held(&sp_area_lock);
+ if (free_sp_area_cache) { + struct sp_area *cache; + cache = rb_entry(free_sp_area_cache, struct sp_area, rb_node); + if (spa->va_start <= cache->va_start) { + free_sp_area_cache = rb_prev(&spa->rb_node); + /* + * We don't try to update cached_hole_size, + * but it won't go very wrong. + */ + } + } + spa_dec_usage(spa->type, spa->real_size); /* won't fail */ if (spa->spg) { atomic_dec(&spa->spg->spa_num);