From: Kairui Song kasong@tencent.com
mainline inclusion from mainline-v6.12-rc1 commit 650975d2b181e30c9017c42cb3f6535287555b1e category: performance bugzilla: https://gitee.com/openeuler/kernel/issues/IBC5I1
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Currently when we are freeing mTHP folios from swap cache, we free then one by one and put each entry into swap slot cache. Slot cache is designed to reduce the overhead by batching the freeing, but mTHP swap entries are already continuous so they can be batch freed without it already, it saves litle overhead, or even increase overhead for larger mTHP.
What's more, mTHP entries could stay in swap cache for a while. Contiguous swap entry is an rather rare resource so releasing them directly can help improve mTHP allocation success rate when under pressure.
Link: https://lkml.kernel.org/r/20240730-swap-allocator-v5-5-cb9c148b9297@kernel.o... Signed-off-by: Kairui Song kasong@tencent.com Reported-by: Barry Song 21cnbao@gmail.com Acked-by: Barry Song baohua@kernel.org Cc: Chris Li chrisl@kernel.org Cc: "Huang, Ying" ying.huang@intel.com Cc: Hugh Dickins hughd@google.com Cc: Kalesh Singh kaleshsingh@google.com Cc: Ryan Roberts ryan.roberts@arm.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Conflicts: mm/swapfile.c Signed-off-by: Liu Shixin liushixin2@huawei.com --- mm/swapfile.c | 59 +++++++++++++++++++++++---------------------------- 1 file changed, 26 insertions(+), 33 deletions(-)
diff --git a/mm/swapfile.c b/mm/swapfile.c index 4be5fbbdc1c8..44726e0b8f8f 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -478,20 +478,21 @@ static void inc_cluster_info_page(struct swap_info_struct *p, }
/* - * The cluster ci decreases one usage. If the usage counter becomes 0, + * The cluster ci decreases @nr_pages usage. If the usage counter becomes 0, * which means no page in the cluster is in use, we can optionally discard * the cluster and add it to free cluster list. */ -static void dec_cluster_info_page(struct swap_info_struct *p, struct swap_cluster_info *ci) +static void dec_cluster_info_page(struct swap_info_struct *p, + struct swap_cluster_info *ci, int nr_pages) { if (!p->cluster_info) return;
- VM_BUG_ON(ci->count == 0); + VM_BUG_ON(ci->count < nr_pages); VM_BUG_ON(cluster_is_free(ci)); lockdep_assert_held(&p->lock); lockdep_assert_held(&ci->lock); - ci->count--; + ci->count -= nr_pages;
if (!ci->count) { free_cluster(p, ci); @@ -983,19 +984,6 @@ static int scan_swap_map_slots(struct swap_info_struct *si, return n_ret; }
-static void swap_free_cluster(struct swap_info_struct *si, unsigned long idx) -{ - unsigned long offset = idx * SWAPFILE_CLUSTER; - struct swap_cluster_info *ci; - - ci = lock_cluster(si, offset); - memset(si->swap_map + offset, 0, SWAPFILE_CLUSTER); - ci->count = 0; - free_cluster(si, ci); - unlock_cluster(ci); - swap_range_free(si, offset, SWAPFILE_CLUSTER); -} - #ifdef CONFIG_MEMCG_SWAP_QOS int write_swapfile_for_memcg(struct address_space *mapping, int *swap_type) { @@ -1343,21 +1331,28 @@ static unsigned char __swap_entry_free(struct swap_info_struct *p, return usage; }
-static void swap_entry_free(struct swap_info_struct *p, swp_entry_t entry) +/* + * Drop the last HAS_CACHE flag of swap entries, caller have to + * ensure all entries belong to the same cgroup. + */ +static void swap_entry_range_free(struct swap_info_struct *p, swp_entry_t entry, + unsigned int nr_pages) { - struct swap_cluster_info *ci; unsigned long offset = swp_offset(entry); - unsigned char count; + unsigned char *map = p->swap_map + offset; + unsigned char *map_end = map + nr_pages; + struct swap_cluster_info *ci;
ci = lock_cluster(p, offset); - count = p->swap_map[offset]; - VM_BUG_ON(count != SWAP_HAS_CACHE); - p->swap_map[offset] = 0; - dec_cluster_info_page(p, ci); + do { + VM_BUG_ON(*map != SWAP_HAS_CACHE); + *map = 0; + } while (++map < map_end); + dec_cluster_info_page(p, ci, nr_pages); unlock_cluster(ci);
- mem_cgroup_uncharge_swap(entry, 1); - swap_range_free(p, offset, 1); + mem_cgroup_uncharge_swap(entry, nr_pages); + swap_range_free(p, offset, nr_pages); }
static void cluster_swap_free_nr(struct swap_info_struct *sis, @@ -1418,7 +1413,6 @@ void swap_free_nr(swp_entry_t entry, int nr_pages) void put_swap_folio(struct folio *folio, swp_entry_t entry) { unsigned long offset = swp_offset(entry); - unsigned long idx = offset / SWAPFILE_CLUSTER; struct swap_cluster_info *ci; struct swap_info_struct *si; unsigned char *map; @@ -1431,19 +1425,18 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry) return;
ci = lock_cluster_or_swap_info(si, offset); - if (size == SWAPFILE_CLUSTER) { + if (size > 1) { map = si->swap_map + offset; - for (i = 0; i < SWAPFILE_CLUSTER; i++) { + for (i = 0; i < size; i++) { val = map[i]; VM_BUG_ON(!(val & SWAP_HAS_CACHE)); if (val == SWAP_HAS_CACHE) free_entries++; } - if (free_entries == SWAPFILE_CLUSTER) { + if (free_entries == size) { unlock_cluster_or_swap_info(si, ci); spin_lock(&si->lock); - mem_cgroup_uncharge_swap(entry, SWAPFILE_CLUSTER); - swap_free_cluster(si, idx); + swap_entry_range_free(si, entry, size); spin_unlock(&si->lock); return; } @@ -1488,7 +1481,7 @@ void swapcache_free_entries(swp_entry_t *entries, int n) for (i = 0; i < n; ++i) { p = swap_info_get_cont(entries[i], prev); if (p) - swap_entry_free(p, entries[i]); + swap_entry_range_free(p, entries[i], 1); prev = p; } if (p)