From: Chengming Zhou zhouchengming@bytedance.com
mainline inclusion from mainline-v6.9-rc1 commit 0827a1fb143fae588cb6f5b9a97c405d6c2ddec9 category: performance bugzilla: https://gitee.com/openeuler/kernel/issues/IBC5I1
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
During testing I found there are some times the zswap_writeback_entry() return -ENOMEM, which is not we expected:
bpftrace -e 'kr:zswap_writeback_entry {@[(int32)retval]=count()}' @[-12]: 1563 @[0]: 277221
The reason is that __read_swap_cache_async() return NULL because swapcache_prepare() failed. The reason is that we won't invalidate zswap entry when swap entry freed to the per-cpu pool, these zswap entries are still on the zswap tree and lru list.
This patch moves the invalidation ahead to when swap entry freed to the per-cpu pool, since there is no any benefit to leave trashy zswap entry on the tree and lru list.
With this patch: bpftrace -e 'kr:zswap_writeback_entry {@[(int32)retval]=count()}' @[0]: 259744
Note: large folio can't have zswap entry for now, so don't bother to add zswap entry invalidation in the large folio swap free path.
Link: https://lkml.kernel.org/r/20240201-b4-zswap-invalidate-entry-v2-2-99d4084260... Signed-off-by: Chengming Zhou zhouchengming@bytedance.com Reviewed-by: Nhat Pham nphamcs@gmail.com Acked-by: Johannes Weiner hannes@cmpxchg.org Acked-by: Yosry Ahmed yosryahmed@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Conflicts: include/linux/zswap.h mm/zswap.c [ Context conflict. ] Signed-off-by: Liu Shixin liushixin2@huawei.com --- include/linux/zswap.h | 4 ++-- mm/swap_slots.c | 4 ++++ mm/swapfile.c | 1 - mm/zswap.c | 5 +++-- 4 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/include/linux/zswap.h b/include/linux/zswap.h index 2a60ce39cfde..a13d2d2d9131 100644 --- a/include/linux/zswap.h +++ b/include/linux/zswap.h @@ -12,7 +12,7 @@ extern atomic_t zswap_stored_pages;
bool zswap_store(struct folio *folio); bool zswap_load(struct folio *folio); -void zswap_invalidate(int type, pgoff_t offset); +void zswap_invalidate(swp_entry_t swp); void zswap_swapon(int type); void zswap_swapoff(int type);
@@ -28,7 +28,7 @@ static inline bool zswap_load(struct folio *folio) return false; }
-static inline void zswap_invalidate(int type, pgoff_t offset) {} +static inline void zswap_invalidate(swp_entry_t swp) {} static inline void zswap_swapon(int type) {} static inline void zswap_swapoff(int type) {}
diff --git a/mm/swap_slots.c b/mm/swap_slots.c index 7af3b93d4c8c..5579eed7065f 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c @@ -34,6 +34,7 @@ #include <linux/vmalloc.h> #include <linux/mutex.h> #include <linux/mm.h> +#include <linux/zswap.h>
static DEFINE_PER_CPU(struct swap_slots_cache, swp_slots); #ifdef CONFIG_MEMCG_SWAP_QOS @@ -394,6 +395,9 @@ void free_swap_slot(swp_entry_t entry) { struct swap_slots_cache *cache;
+ /* Large folio swap slot is not covered. */ + zswap_invalidate(entry); + cache = raw_cpu_ptr(&swp_slots); if (likely(use_swap_slot_cache && cache->slots_ret)) { spin_lock_irq(&cache->free_lock); diff --git a/mm/swapfile.c b/mm/swapfile.c index 3af5b6ebb241..30832b85d6c2 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -765,7 +765,6 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset, swap_slot_free_notify = NULL; while (offset <= end) { arch_swap_invalidate_page(si->type, offset); - zswap_invalidate(si->type, offset); if (swap_slot_free_notify) swap_slot_free_notify(si->bdev, offset); offset++; diff --git a/mm/zswap.c b/mm/zswap.c index 69681b9173fd..5acda5b906bc 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1482,9 +1482,10 @@ bool zswap_load(struct folio *folio) return ret; }
-void zswap_invalidate(int type, pgoff_t offset) +void zswap_invalidate(swp_entry_t swp) { - struct zswap_tree *tree = zswap_trees[type]; + pgoff_t offset = swp_offset(swp); + struct zswap_tree *tree = zswap_trees[swp_type(swp)]; struct zswap_entry *entry;
/* find */