
From: Barry Song <v-songbaohua@oppo.com> mainline inclusion from mainline-v6.12-rc6 commit 01626a18230246efdcea322aa8f067e60ffe5ccd category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB7294 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- Commit 13ddaf26be32 ("mm/swap: fix race when skipping swapcache") introduced an unconditional one-tick sleep when `swapcache_prepare()` fails, which has led to reports of UI stuttering on latency-sensitive Android devices. To address this, we can use a waitqueue to wake up tasks that fail `swapcache_prepare()` sooner, instead of always sleeping for a full tick. While tasks may occasionally be woken by an unrelated `do_swap_page()`, this method is preferable to two scenarios: rapid re-entry into page faults, which can cause livelocks, and multiple millisecond sleeps, which visibly degrade user experience. Oven's testing shows that a single waitqueue resolves the UI stuttering issue. If a 'thundering herd' problem becomes apparent later, a waitqueue hash similar to `folio_wait_table[PAGE_WAIT_TABLE_SIZE]` for page bit locks can be introduced. [v-songbaohua@oppo.com: wake_up only when swapcache_wq waitqueue is active] Link: https://lkml.kernel.org/r/20241008130807.40833-1-21cnbao@gmail.com Link: https://lkml.kernel.org/r/20240926211936.75373-1-21cnbao@gmail.com Fixes: 13ddaf26be32 ("mm/swap: fix race when skipping swapcache") Signed-off-by: Barry Song <v-songbaohua@oppo.com> Reported-by: Oven Liyang <liyangouwen1@oppo.com> Tested-by: Oven Liyang <liyangouwen1@oppo.com> Cc: Kairui Song <kasong@tencent.com> Cc: "Huang, Ying" <ying.huang@intel.com> Cc: Yu Zhao <yuzhao@google.com> Cc: David Hildenbrand <david@redhat.com> Cc: Chris Li <chrisl@kernel.org> Cc: Hugh Dickins <hughd@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Michal Hocko <mhocko@suse.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Yosry Ahmed <yosryahmed@google.com> Cc: SeongJae Park <sj@kernel.org> Cc: Kalesh Singh <kaleshsingh@google.com> Cc: Suren Baghdasaryan <surenb@google.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Conflicts: mm/memory.c [Context conflicts.] Signed-off-by: Jinjiang Tu <tujinjiang@huawei.com> --- mm/memory.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 20869d0cd5a1..f580b9c54247 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3397,6 +3397,8 @@ void unmap_mapping_range(struct address_space *mapping, } EXPORT_SYMBOL(unmap_mapping_range); +static DECLARE_WAIT_QUEUE_HEAD(swapcache_wq); + /* * We enter with non-exclusive mmap_lock (to exclude vma changes, * but allow concurrent faults), and pte mapped but not yet locked. @@ -3408,6 +3410,7 @@ EXPORT_SYMBOL(unmap_mapping_range); vm_fault_t do_swap_page(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; + DECLARE_WAITQUEUE(wait, current); struct page *page = NULL, *swapcache; struct swap_info_struct *si = NULL; bool need_clear_cache = false; @@ -3475,7 +3478,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) */ if (swapcache_prepare(entry)) { /* Relax a bit to prevent rapid repeated page faults */ + add_wait_queue(&swapcache_wq, &wait); schedule_timeout_uninterruptible(1); + remove_wait_queue(&swapcache_wq, &wait); goto out; } need_clear_cache = true; @@ -3650,8 +3655,11 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) pte_unmap_unlock(vmf->pte, vmf->ptl); out: /* Clear the swap cache pin for direct swapin after PTL unlock */ - if (need_clear_cache) + if (need_clear_cache) { swapcache_clear(si, entry); + if (waitqueue_active(&swapcache_wq)) + wake_up(&swapcache_wq); + } if (si) put_swap_device(si); return ret; @@ -3665,8 +3673,11 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) unlock_page(swapcache); put_page(swapcache); } - if (need_clear_cache) + if (need_clear_cache) { swapcache_clear(si, entry); + if (waitqueue_active(&swapcache_wq)) + wake_up(&swapcache_wq); + } if (si) put_swap_device(si); return ret; -- 2.34.1