From: Kirill Tkhai ktkhai@virtuozzo.com
mainline inclusion from mainline-v5.2-rc1 commit 9851ac13592df77958ae7bac6ba39e71420c38ec category: bugfix bugzilla: 36232 CVE: NA
-------------------------------------------------
We know which LRU is not active.
[chris@chrisdown.name: fix build on !CONFIG_MEMCG] Link: http://lkml.kernel.org/r/20190322150513.GA22021@chrisdown.name Link: http://lkml.kernel.org/r/155290128498.31489.18250485448913338607.stgit@local... Signed-off-by: Kirill Tkhai ktkhai@virtuozzo.com Signed-off-by: Chris Down chris@chrisdown.name Reviewed-by: Daniel Jordan daniel.m.jordan@oracle.com Cc: Michal Hocko mhocko@kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Liu Shixin liushixin2@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/memcontrol.h | 6 ++++++ mm/vmscan.c | 10 ++++------ 2 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index f6f51b14a332..029b4848ab6f 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1109,6 +1109,12 @@ static inline void count_memcg_events(struct mem_cgroup *memcg, { }
+static inline void __count_memcg_events(struct mem_cgroup *memcg, + enum vm_event_item idx, + unsigned long count) +{ +} + static inline void count_memcg_page_event(struct page *page, int idx) { diff --git a/mm/vmscan.c b/mm/vmscan.c index 38fcda38bf7b..92f2489efda8 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2077,12 +2077,6 @@ static unsigned move_active_pages_to_lru(struct lruvec *lruvec, } }
- if (!is_active_lru(lru)) { - __count_vm_events(PGDEACTIVATE, nr_moved); - count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, - nr_moved); - } - return nr_moved; }
@@ -2178,6 +2172,10 @@ static void shrink_active_list(unsigned long nr_to_scan,
nr_activate = move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru); nr_deactivate = move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE); + + __count_vm_events(PGDEACTIVATE, nr_deactivate); + __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_deactivate); + __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); spin_unlock_irq(&pgdat->lru_lock);
From: Shakeel Butt shakeelb@google.com
mainline inclusion from mainline-v5.8-rc1 commit 5d91f31faf8ebed2acfc3a1d6ac344f95c488d66 category: bugfix bugzilla: 36232 CVE: NA
-------------------------------------------------
Many of the callbacks called by pagevec_lru_move_fn() does not correctly update the vmstats for huge pages. Fix that. Also __pagevec_lru_add_fn() use the irq-unsafe alternative to update the stat as the irqs are already disabled.
Signed-off-by: Shakeel Butt shakeelb@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Acked-by: Johannes Weiner hannes@cmpxchg.org Link: http://lkml.kernel.org/r/20200527182916.249910-1-shakeelb@google.com Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Liu Shixin liushixin2@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/swap.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/mm/swap.c b/mm/swap.c index 320ac35bde26..a819f8746ee2 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -224,7 +224,7 @@ static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec, del_page_from_lru_list(page, lruvec, page_lru(page)); ClearPageActive(page); add_page_to_lru_list_tail(page, lruvec, page_lru(page)); - (*pgmoved)++; + (*pgmoved) += hpage_nr_pages(page); } }
@@ -284,7 +284,7 @@ static void __activate_page(struct page *page, struct lruvec *lruvec, add_page_to_lru_list(page, lruvec, lru); trace_mm_lru_activate(page);
- __count_vm_event(PGACTIVATE); + __count_vm_events(PGACTIVATE, hpage_nr_pages(page)); update_page_reclaim_stat(lruvec, file, 1); } } @@ -499,6 +499,7 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec, { int lru, file; bool active; + int nr_pages = hpage_nr_pages(page);
if (!PageLRU(page)) return; @@ -532,11 +533,11 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec, * We moves tha page into tail of inactive. */ list_move_tail(&page->lru, &lruvec->lists[lru]); - __count_vm_event(PGROTATED); + __count_vm_events(PGROTATED, nr_pages); }
if (active) - __count_vm_event(PGDEACTIVATE); + __count_vm_events(PGDEACTIVATE, nr_pages); update_page_reclaim_stat(lruvec, file, 0); }
@@ -870,6 +871,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, { enum lru_list lru; int was_unevictable = TestClearPageUnevictable(page); + int nr_pages = hpage_nr_pages(page);
VM_BUG_ON_PAGE(PageLRU(page), page);
@@ -907,13 +909,13 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, update_page_reclaim_stat(lruvec, page_is_file_cache(page), PageActive(page)); if (was_unevictable) - count_vm_event(UNEVICTABLE_PGRESCUED); + __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages); } else { lru = LRU_UNEVICTABLE; ClearPageActive(page); SetPageUnevictable(page); if (!was_unevictable) - count_vm_event(UNEVICTABLE_PGCULLED); + __count_vm_events(UNEVICTABLE_PGCULLED, nr_pages); }
add_page_to_lru_list(page, lruvec, lru);
From: Shakeel Butt shakeelb@google.com
mainline inclusion from mainline-v5.8-rc1 commit 21e330fc632d6a288f73de48045b782cc51d501a category: bugfix bugzilla: 36232 CVE: NA
-------------------------------------------------
The commit 2262185c5b28 ("mm: per-cgroup memory reclaim stats") added PGLAZYFREE, PGACTIVATE & PGDEACTIVATE stats for cgroups but missed couple of places and PGLAZYFREE missed huge page handling. Fix that. Also for PGLAZYFREE use the irq-unsafe function to update as the irq is already disabled.
Fixes: 2262185c5b28 ("mm: per-cgroup memory reclaim stats") Signed-off-by: Shakeel Butt shakeelb@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Acked-by: Johannes Weiner hannes@cmpxchg.org Link: http://lkml.kernel.org/r/20200527182947.251343-1-shakeelb@google.com Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Liu Shixin liushixin2@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/swap.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/mm/swap.c b/mm/swap.c index a819f8746ee2..0156aa4d772c 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -277,6 +277,7 @@ static void __activate_page(struct page *page, struct lruvec *lruvec, if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { int file = page_is_file_cache(page); int lru = page_lru_base_type(page); + int nr_pages = hpage_nr_pages(page);
del_page_from_lru_list(page, lruvec, lru); SetPageActive(page); @@ -284,7 +285,9 @@ static void __activate_page(struct page *page, struct lruvec *lruvec, add_page_to_lru_list(page, lruvec, lru); trace_mm_lru_activate(page);
- __count_vm_events(PGACTIVATE, hpage_nr_pages(page)); + __count_vm_events(PGACTIVATE, nr_pages); + __count_memcg_events(lruvec_memcg(lruvec), PGACTIVATE, + nr_pages); update_page_reclaim_stat(lruvec, file, 1); } } @@ -536,18 +539,21 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec, __count_vm_events(PGROTATED, nr_pages); }
- if (active) + if (active) { __count_vm_events(PGDEACTIVATE, nr_pages); + __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, + nr_pages); + } update_page_reclaim_stat(lruvec, file, 0); }
- static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec, void *arg) { if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && !PageSwapCache(page) && !PageUnevictable(page)) { bool active = PageActive(page); + int nr_pages = hpage_nr_pages(page);
del_page_from_lru_list(page, lruvec, LRU_INACTIVE_ANON + active); @@ -561,8 +567,9 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec, ClearPageSwapBacked(page); add_page_to_lru_list(page, lruvec, LRU_INACTIVE_FILE);
- __count_vm_events(PGLAZYFREE, hpage_nr_pages(page)); - count_memcg_page_event(page, PGLAZYFREE); + __count_vm_events(PGLAZYFREE, nr_pages); + __count_memcg_events(lruvec_memcg(lruvec), PGLAZYFREE, + nr_pages); update_page_reclaim_stat(lruvec, 1, 0); } }
From: Hugh Dickins hughd@google.com
mainline inclusion from mainline-v5.9-rc6 commit 0964730bf46b4e271c5ecad5badbbd95737c087b category: bugfix bugzilla: 36232 CVE: NA
-------------------------------------------------
5.8 commit 5d91f31faf8e ("mm: swap: fix vmstats for huge page") has established that vm_events should count every subpage of a THP, including unevictable_pgs_culled and unevictable_pgs_rescued; but lru_cache_add_inactive_or_unevictable() was not doing so for unevictable_pgs_mlocked, and mm/mlock.c was not doing so for unevictable_pgs mlocked, munlocked, cleared and stranded.
Fix them; but THPs don't go the pagevec way in mlock.c, so no fixes needed on that path.
Fixes: 5d91f31faf8e ("mm: swap: fix vmstats for huge page") Signed-off-by: Hugh Dickins hughd@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Reviewed-by: Shakeel Butt shakeelb@google.com Acked-by: Yang Shi shy828301@gmail.com Cc: Alex Shi alex.shi@linux.alibaba.com Cc: Johannes Weiner hannes@cmpxchg.org Cc: Michal Hocko mhocko@suse.com Cc: Mike Kravetz mike.kravetz@oracle.com Cc: Qian Cai cai@lca.pw Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008301408230.5954@eggly.anvils Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Liu Shixin liushixin2@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/mlock.c | 25 +++++++++++++++---------- mm/swap.c | 6 +++--- 2 files changed, 18 insertions(+), 13 deletions(-)
diff --git a/mm/mlock.c b/mm/mlock.c index 8112a2200d3e..4077b1e8e199 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -58,12 +58,14 @@ EXPORT_SYMBOL(can_do_mlock); */ void clear_page_mlock(struct page *page) { + int nr_pages; + if (!TestClearPageMlocked(page)) return;
- mod_zone_page_state(page_zone(page), NR_MLOCK, - -hpage_nr_pages(page)); - count_vm_event(UNEVICTABLE_PGCLEARED); + nr_pages = hpage_nr_pages(page); + mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); + count_vm_events(UNEVICTABLE_PGCLEARED, nr_pages); /* * The previous TestClearPageMlocked() corresponds to the smp_mb() * in __pagevec_lru_add_fn(). @@ -77,7 +79,7 @@ void clear_page_mlock(struct page *page) * We lost the race. the page already moved to evictable list. */ if (PageUnevictable(page)) - count_vm_event(UNEVICTABLE_PGSTRANDED); + count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages); } }
@@ -94,9 +96,10 @@ void mlock_vma_page(struct page *page) VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page);
if (!TestSetPageMlocked(page)) { - mod_zone_page_state(page_zone(page), NR_MLOCK, - hpage_nr_pages(page)); - count_vm_event(UNEVICTABLE_PGMLOCKED); + int nr_pages = hpage_nr_pages(page); + + mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages); + count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages); if (!isolate_lru_page(page)) putback_lru_page(page); } @@ -139,7 +142,7 @@ static void __munlock_isolated_page(struct page *page)
/* Did try_to_unlock() succeed or punt? */ if (!PageMlocked(page)) - count_vm_event(UNEVICTABLE_PGMUNLOCKED); + count_vm_events(UNEVICTABLE_PGMUNLOCKED, hpage_nr_pages(page));
putback_lru_page(page); } @@ -155,10 +158,12 @@ static void __munlock_isolated_page(struct page *page) */ static void __munlock_isolation_failed(struct page *page) { + int nr_pages = hpage_nr_pages(page); + if (PageUnevictable(page)) - __count_vm_event(UNEVICTABLE_PGSTRANDED); + __count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages); else - __count_vm_event(UNEVICTABLE_PGMUNLOCKED); + __count_vm_events(UNEVICTABLE_PGMUNLOCKED, nr_pages); }
/** diff --git a/mm/swap.c b/mm/swap.c index 0156aa4d772c..bdb9b294afbf 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -464,14 +464,14 @@ void lru_cache_add_active_or_unevictable(struct page *page, if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) SetPageActive(page); else if (!TestSetPageMlocked(page)) { + int nr_pages = hpage_nr_pages(page); /* * We use the irq-unsafe __mod_zone_page_stat because this * counter is not modified from interrupt context, and the pte * lock is held(spinlock), which implies preemption disabled. */ - __mod_zone_page_state(page_zone(page), NR_MLOCK, - hpage_nr_pages(page)); - count_vm_event(UNEVICTABLE_PGMLOCKED); + __mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages); + count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages); } lru_cache_add(page); }
From: Hugh Dickins hughd@google.com
mainline inclusion from mainline-v5.9-rc6 commit 8d8869ca5d2d9d86db96271ab063fdcfa9baf5b4 category: bugfix bugzilla: 36232 CVE: NA
-------------------------------------------------
check_move_unevictable_pages() is used in making unevictable shmem pages evictable: by shmem_unlock_mapping(), drm_gem_check_release_pagevec() and i915/gem check_release_pagevec(). Those may pass down subpages of a huge page, when /sys/kernel/mm/transparent_hugepage/shmem_enabled is "force".
That does not crash or warn at present, but the accounting of vmstats unevictable_pgs_scanned and unevictable_pgs_rescued is inconsistent: scanned being incremented on each subpage, rescued only on the head (since tails already appear evictable once the head has been updated).
5.8 commit 5d91f31faf8e ("mm: swap: fix vmstats for huge page") has established that vm_events in general (and unevictable_pgs_rescued in particular) should count every subpage: so follow that precedent here.
Do this in such a way that if mem_cgroup_page_lruvec() is made stricter (to check page->mem_cgroup is always set), no problem: skip the tails before calling it, and add thp_nr_pages() to vmstats on the head.
Signed-off-by: Hugh Dickins hughd@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Reviewed-by: Shakeel Butt shakeelb@google.com Acked-by: Yang Shi shy828301@gmail.com Cc: Johannes Weiner hannes@cmpxchg.org Cc: Michal Hocko mhocko@suse.com Cc: Mike Kravetz mike.kravetz@oracle.com Cc: Qian Cai cai@lca.pw Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008301405000.5954@eggly.anvils Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Liu Shixin liushixin2@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/vmscan.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c index 92f2489efda8..4a0fd5d6c4a2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4366,8 +4366,14 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) for (i = 0; i < nr_pages; i++) { struct page *page = pages[i]; struct pglist_data *pagepgdat = page_pgdat(page); + int _nr_pages; + + if (PageTransTail(page)) + continue; + + _nr_pages = hpage_nr_pages(page); + pgscanned += _nr_pages;
- pgscanned++; if (pagepgdat != pgdat) { if (pgdat) spin_unlock_irq(&pgdat->lru_lock); @@ -4386,7 +4392,7 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) ClearPageUnevictable(page); del_page_from_lru_list(page, lruvec, LRU_UNEVICTABLE); add_page_to_lru_list(page, lruvec, lru); - pgrescued++; + pgrescued += _nr_pages; } }
From: Huang Ying ying.huang@intel.com
mainline inclusion from mainline-v5.10-rc1 commit c4f9c701f9b44299e6adbc58d1a4bb2c40383494 category: bugfix bugzilla: 44801 CVE: NA
-------------------------------------------------
It is reported that the following bug is triggered if the HDD is used as swap device,
[ 5758.157556] BUG: kernel NULL pointer dereference, address: 0000000000000007 [ 5758.165331] #PF: supervisor write access in kernel mode [ 5758.171161] #PF: error_code(0x0002) - not-present page [ 5758.176894] PGD 0 P4D 0 [ 5758.179721] Oops: 0002 [#1] SMP PTI [ 5758.183614] CPU: 10 PID: 316 Comm: kswapd1 Kdump: loaded Tainted: G S --------- --- 5.9.0-0.rc3.1.tst.el8.x86_64 #1 [ 5758.196717] Hardware name: Intel Corporation S2600CP/S2600CP, BIOS SE5C600.86B.02.01.0002.082220131453 08/22/2013 [ 5758.208176] RIP: 0010:split_swap_cluster+0x47/0x60 [ 5758.213522] Code: c1 e3 06 48 c1 eb 0f 48 8d 1c d8 48 89 df e8 d0 20 6a 00 80 63 07 fb 48 85 db 74 16 48 89 df c6 07 00 66 66 66 90 31 c0 5b c3 <80> 24 25 07 00 00 00 fb 31 c0 5b c3 b8 f0 ff ff ff 5b c3 66 0f 1f [ 5758.234478] RSP: 0018:ffffb147442d7af0 EFLAGS: 00010246 [ 5758.240309] RAX: 0000000000000000 RBX: 000000000014b217 RCX: ffffb14779fd9000 [ 5758.248281] RDX: 000000000014b217 RSI: ffff9c52f2ab1400 RDI: 000000000014b217 [ 5758.256246] RBP: ffffe00c51168080 R08: ffffe00c5116fe08 R09: ffff9c52fffd3000 [ 5758.264208] R10: ffffe00c511537c8 R11: ffff9c52fffd3c90 R12: 0000000000000000 [ 5758.272172] R13: ffffe00c51170000 R14: ffffe00c51170000 R15: ffffe00c51168040 [ 5758.280134] FS: 0000000000000000(0000) GS:ffff9c52f2a80000(0000) knlGS:0000000000000000 [ 5758.289163] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 5758.295575] CR2: 0000000000000007 CR3: 0000000022a0e003 CR4: 00000000000606e0 [ 5758.303538] Call Trace: [ 5758.306273] split_huge_page_to_list+0x88b/0x950 [ 5758.311433] deferred_split_scan+0x1ca/0x310 [ 5758.316202] do_shrink_slab+0x12c/0x2a0 [ 5758.320491] shrink_slab+0x20f/0x2c0 [ 5758.324482] shrink_node+0x240/0x6c0 [ 5758.328469] balance_pgdat+0x2d1/0x550 [ 5758.332652] kswapd+0x201/0x3c0 [ 5758.336157] ? finish_wait+0x80/0x80 [ 5758.340147] ? balance_pgdat+0x550/0x550 [ 5758.344525] kthread+0x114/0x130 [ 5758.348126] ? kthread_park+0x80/0x80 [ 5758.352214] ret_from_fork+0x22/0x30 [ 5758.356203] Modules linked in: fuse zram rfkill sunrpc intel_rapl_msr intel_rapl_common sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp mgag200 iTCO_wdt crct10dif_pclmul iTCO_vendor_support drm_kms_helper crc32_pclmul ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops cec rapl joydev intel_cstate ipmi_si ipmi_devintf drm intel_uncore i2c_i801 ipmi_msghandler pcspkr lpc_ich mei_me i2c_smbus mei ioatdma ip_tables xfs libcrc32c sr_mod sd_mod cdrom t10_pi sg igb ahci libahci i2c_algo_bit crc32c_intel libata dca wmi dm_mirror dm_region_hash dm_log dm_mod [ 5758.412673] CR2: 0000000000000007 [ 0.000000] Linux version 5.9.0-0.rc3.1.tst.el8.x86_64 (mockbuild@x86-vm-15.build.eng.bos.redhat.com) (gcc (GCC) 8.3.1 20191121 (Red Hat 8.3.1-5), GNU ld version 2.30-79.el8) #1 SMP Wed Sep 9 16:03:34 EDT 2020
After further digging it's found that the following race condition exists in the original implementation,
CPU1 CPU2 ---- ---- deferred_split_scan() split_huge_page(page) /* page isn't compound head */ split_huge_page_to_list(page, NULL) __split_huge_page(page, ) ClearPageCompound(head) /* unlock all subpages except page (not head) */ add_to_swap(head) /* not THP */ get_swap_page(head) add_to_swap_cache(head, ) SetPageSwapCache(head) if PageSwapCache(head) split_swap_cluster(/* swap entry of head */) /* Deref sis->cluster_info: NULL accessing! */
So, in split_huge_page_to_list(), PageSwapCache() is called for the already split and unlocked "head", which may be added to swap cache in another CPU. So split_swap_cluster() may be called wrongly.
To fix the race, the call to split_swap_cluster() is moved to __split_huge_page() before all subpages are unlocked. So that the PageSwapCache() is stable.
Fixes: 59807685a7e77 ("mm, THP, swap: support splitting THP for THP swap out") Reported-by: Rafael Aquini aquini@redhat.com Signed-off-by: "Huang, Ying" ying.huang@intel.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Tested-by: Rafael Aquini aquini@redhat.com Acked-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Cc: Hugh Dickins hughd@google.com Cc: Andrea Arcangeli aarcange@redhat.com Cc: Matthew Wilcox willy@infradead.org Link: https://lkml.kernel.org/r/20201009073647.1531083-1-ying.huang@intel.com Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Liu Shixin liushixin2@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/huge_memory.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 705f50c6330d..c2013b3e92e7 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2555,6 +2555,12 @@ static void __split_huge_page(struct page *page, struct list_head *list,
remap_page(head);
+ if (PageSwapCache(head)) { + swp_entry_t entry = { .val = page_private(head) }; + + split_swap_cluster(entry); + } + for (i = 0; i < HPAGE_PMD_NR; i++) { struct page *subpage = head + i; if (subpage == page) @@ -2792,12 +2798,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) __dec_node_page_state(page, NR_SHMEM_THPS); spin_unlock(&pgdata->split_queue_lock); __split_huge_page(page, list, end, flags); - if (PageSwapCache(head)) { - swp_entry_t entry = { .val = page_private(head) }; - - ret = split_swap_cluster(entry); - } else - ret = 0; + ret = 0; } else { if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) { pr_alert("total_mapcount: %u, page_count(): %u\n",
From: Naoya Horiguchi naoya.horiguchi@nec.com
mainline inclusion from mainline-v5.10-rc1 commit 1f2481ddbe444de5bed72f167d7180d1b2708e56 category: bugfix bugzilla: 44803 CVE: NA
-------------------------------------------------
Soft offlining could fail with EIO due to the race condition with hugepage migration. This issuse became visible due to the change by previous patch that makes soft offline handler take page refcount by its own. We have no way to directly pin zero refcount page, and the page considered as a zero refcount page could be allocated just after the first check.
This patch adds the second check to find the race and gives us chance to handle it more reliably.
Reported-by: Qian Cai cai@lca.pw Signed-off-by: Naoya Horiguchi naoya.horiguchi@nec.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Cc: "Aneesh Kumar K.V" aneesh.kumar@linux.ibm.com Cc: Aneesh Kumar K.V aneesh.kumar@linux.vnet.ibm.com Cc: Aristeu Rozanski aris@ruivo.org Cc: Dave Hansen dave.hansen@intel.com Cc: David Hildenbrand david@redhat.com Cc: Dmitry Yakunin zeil@yandex-team.ru Cc: Michal Hocko mhocko@kernel.org Cc: Mike Kravetz mike.kravetz@oracle.com Cc: Oscar Salvador osalvador@suse.com Cc: Tony Luck tony.luck@intel.com Link: https://lkml.kernel.org/r/20200922135650.1634-14-osalvador@suse.de Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Liu Shixin liushixin2@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/memory-failure.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 148fdd929a19..fa091bfb5943 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1646,6 +1646,9 @@ static int __get_any_page(struct page *p, unsigned long pfn, int flags) } else if (is_free_buddy_page(p)) { pr_info("%s: %#lx free buddy page\n", __func__, pfn); ret = 0; + } else if (page_count(p)) { + /* raced with allocation */ + ret = -EBUSY; } else { pr_info("%s: %#lx: unknown zero refcount page type %lx\n", __func__, pfn, p->flags); @@ -1662,6 +1665,9 @@ static int get_any_page(struct page *page, unsigned long pfn, int flags) { int ret = __get_any_page(page, pfn, flags);
+ if (ret == -EBUSY) + ret = __get_any_page(page, pfn, flags); + if (ret == 1 && !PageHuge(page) && !PageLRU(page) && !__PageMovable(page)) { /*
From: "Matthew Wilcox (Oracle)" willy@infradead.org
mainline inclusion from mainline-v5.10-rc1 commit e320d3012d25b1fb5f3df4edb7bd44a1c362ec10 category: bugfix bugzilla: 43588 CVE: NA
-------------------------------------------------
Here is a very rare race which leaks memory:
Page P0 is allocated to the page cache. Page P1 is free.
Thread A Thread B Thread C find_get_entry(): xas_load() returns P0 Removes P0 from page cache P0 finds its buddy P1 alloc_pages(GFP_KERNEL, 1) returns P0 P0 has refcount 1 page_cache_get_speculative(P0) P0 has refcount 2 __free_pages(P0) P0 has refcount 1 put_page(P0) P1 is not freed
Fix this by freeing all the pages in __free_pages() that won't be freed by the call to put_page(). It's usually not a good idea to split a page, but this is a very unlikely scenario.
Fixes: e286781d5f2e ("mm: speculative page references") Signed-off-by: Matthew Wilcox (Oracle) willy@infradead.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Acked-by: Mike Rapoport rppt@linux.ibm.com Cc: Nick Piggin npiggin@gmail.com Cc: Hugh Dickins hughd@google.com Cc: Peter Zijlstra peterz@infradead.org Link: https://lkml.kernel.org/r/20200926213919.26642-1-willy@infradead.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Liu Shixin liushixin2@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- lib/Kconfig.debug | 9 +++++++++ lib/Makefile | 1 + lib/test_free_pages.c | 42 ++++++++++++++++++++++++++++++++++++++++++ mm/page_alloc.c | 3 +++ 4 files changed, 55 insertions(+) create mode 100644 lib/test_free_pages.c
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2d2c51cbf4bb..a6cd1e8059e0 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1981,6 +1981,15 @@ config TEST_DEBUG_VIRTUAL
If unsure, say N.
+config TEST_FREE_PAGES + tristate "Test freeing pages" + help + Test that a memory leak does not occur due to a race between + freeing a block of pages and a speculative page reference. + Loading this module is safe if your kernel has the bug fixed. + If the bug is not fixed, it will leak gigabytes of memory and + probably OOM your system. + endif # RUNTIME_TESTING_MENU
config MEMTEST diff --git a/lib/Makefile b/lib/Makefile index f92a84934ab2..71443e03deb3 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -81,6 +81,7 @@ obj-$(CONFIG_TEST_UUID) += test_uuid.o obj-$(CONFIG_TEST_PARMAN) += test_parman.o obj-$(CONFIG_TEST_KMOD) += test_kmod.o obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o +obj-$(CONFIG_TEST_FREE_PAGES) += test_free_pages.o
ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG diff --git a/lib/test_free_pages.c b/lib/test_free_pages.c new file mode 100644 index 000000000000..074e76bd76b2 --- /dev/null +++ b/lib/test_free_pages.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * test_free_pages.c: Check that free_pages() doesn't leak memory + * Copyright (c) 2020 Oracle + * Author: Matthew Wilcox willy@infradead.org + */ + +#include <linux/gfp.h> +#include <linux/mm.h> +#include <linux/module.h> + +static void test_free_pages(gfp_t gfp) +{ + unsigned int i; + + for (i = 0; i < 1000 * 1000; i++) { + unsigned long addr = __get_free_pages(gfp, 3); + struct page *page = virt_to_page(addr); + + /* Simulate page cache getting a speculative reference */ + get_page(page); + free_pages(addr, 3); + put_page(page); + } +} + +static int m_in(void) +{ + test_free_pages(GFP_KERNEL); + test_free_pages(GFP_KERNEL | __GFP_COMP); + + return 0; +} + +static void m_ex(void) +{ +} + +module_init(m_in); +module_exit(m_ex); +MODULE_AUTHOR("Matthew Wilcox willy@infradead.org"); +MODULE_LICENSE("GPL"); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 45fda5c01141..67768c56d412 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4585,6 +4585,9 @@ void __free_pages(struct page *page, unsigned int order) { if (put_page_testzero(page)) free_the_page(page, order); + else if (!PageHead(page)) + while (order-- > 0) + free_the_page(page + (1 << order), order); } EXPORT_SYMBOL(__free_pages);
From: Liu Shixin liushixin2@huawei.com
hulk inclusion category:feature bugzilla: 43588 CVE: NA
-------------------------------------------------
set default value of CONFIG_TEST_FREE_PAGES
Signed-off-by: Liu Shixin liushixin2@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/configs/euleros_defconfig | 1 + arch/arm64/configs/hulk_defconfig | 1 + arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/hulk_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + 5 files changed, 5 insertions(+)
diff --git a/arch/arm64/configs/euleros_defconfig b/arch/arm64/configs/euleros_defconfig index 4d0a652d02e7..53af3aa69638 100644 --- a/arch/arm64/configs/euleros_defconfig +++ b/arch/arm64/configs/euleros_defconfig @@ -5652,6 +5652,7 @@ CONFIG_TEST_KSTRTOX=y # CONFIG_TEST_UDELAY is not set # CONFIG_TEST_STATIC_KEYS is not set # CONFIG_TEST_KMOD is not set +# CONFIG_TEST_FREE_PAGES is not set # CONFIG_MEMTEST is not set # CONFIG_BUG_ON_DATA_CORRUPTION is not set # CONFIG_SAMPLES is not set diff --git a/arch/arm64/configs/hulk_defconfig b/arch/arm64/configs/hulk_defconfig index eee186ec38c4..3b2b957f9ae0 100644 --- a/arch/arm64/configs/hulk_defconfig +++ b/arch/arm64/configs/hulk_defconfig @@ -5670,6 +5670,7 @@ CONFIG_TEST_KSTRTOX=y # CONFIG_TEST_UDELAY is not set # CONFIG_TEST_STATIC_KEYS is not set # CONFIG_TEST_KMOD is not set +# CONFIG_TEST_FREE_PAGES is not set # CONFIG_MEMTEST is not set # CONFIG_BUG_ON_DATA_CORRUPTION is not set # CONFIG_SAMPLES is not set diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index ffbd0fba2adc..c28ca5f782cf 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -6013,6 +6013,7 @@ CONFIG_TEST_KSTRTOX=y # CONFIG_TEST_UDELAY is not set # CONFIG_TEST_STATIC_KEYS is not set # CONFIG_TEST_KMOD is not set +# CONFIG_TEST_FREE_PAGES is not set # CONFIG_MEMTEST is not set # CONFIG_BUG_ON_DATA_CORRUPTION is not set # CONFIG_SAMPLES is not set diff --git a/arch/x86/configs/hulk_defconfig b/arch/x86/configs/hulk_defconfig index 2820af8d5b9a..eabaf356664d 100644 --- a/arch/x86/configs/hulk_defconfig +++ b/arch/x86/configs/hulk_defconfig @@ -7479,6 +7479,7 @@ CONFIG_TEST_KSTRTOX=y # CONFIG_TEST_UDELAY is not set # CONFIG_TEST_STATIC_KEYS is not set # CONFIG_TEST_KMOD is not set +# CONFIG_TEST_FREE_PAGES is not set # CONFIG_MEMTEST is not set # CONFIG_BUG_ON_DATA_CORRUPTION is not set # CONFIG_SAMPLES is not set diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index b1f2e0cafe04..4b7502fb79e7 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -7487,6 +7487,7 @@ CONFIG_TEST_KSTRTOX=y # CONFIG_TEST_UDELAY is not set # CONFIG_TEST_STATIC_KEYS is not set # CONFIG_TEST_KMOD is not set +# CONFIG_TEST_FREE_PAGES is not set # CONFIG_MEMTEST is not set # CONFIG_BUG_ON_DATA_CORRUPTION is not set # CONFIG_SAMPLES is not set
From: Stefano Garzarella sgarzare@redhat.com
mainline inclusion from mainline-v5.4-rc1 commit 473c7391ce731adb482c03e420964676ed8b494d category: bugfix bugzilla: NA CVE: NA
--------------------------------
Since virtio-vsock was introduced, the buffers filled by the host and pushed to the guest using the vring, are directly queued in a per-socket list. These buffers are preallocated by the guest with a fixed size (4 KB).
The maximum amount of memory used by each socket should be controlled by the credit mechanism. The default credit available per-socket is 256 KB, but if we use only 1 byte per packet, the guest can queue up to 262144 of 4 KB buffers, using up to 1 GB of memory per-socket. In addition, the guest will continue to fill the vring with new 4 KB free buffers to avoid starvation of other sockets.
This patch mitigates this issue copying the payload of small packets (< 128 bytes) into the buffer of last packet queued, in order to avoid wasting memory.
Signed-off-by: Stefano Garzarella sgarzare@redhat.com Reviewed-by: Stefan Hajnoczi stefanha@redhat.com Acked-by: Michael S. Tsirkin mst@redhat.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Li Xianming lixianming5@huawei.com Reviewed-by: Su Bo subo7@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/vhost/vsock.c | 2 + include/linux/virtio_vsock.h | 1 + net/vmw_vsock/virtio_transport.c | 1 + net/vmw_vsock/virtio_transport_common.c | 60 +++++++++++++++++++++---- 4 files changed, 55 insertions(+), 9 deletions(-)
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 6ee320259e4f..294b6a2eba64 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -360,6 +360,8 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, return NULL; }
+ pkt->buf_len = pkt->len; + nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter); if (nbytes != pkt->len) { vq_err(vq, "Expected %u byte payload, got %zu bytes\n", diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 8b8d13f01cae..cb2f9232f965 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -52,6 +52,7 @@ struct virtio_vsock_pkt { /* socket refcnt not held, only use for cancellation */ struct vsock_sock *vsk; void *buf; + u32 buf_len; u32 len; u32 off; bool reply; diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index cc70d651d13e..1f57a2b40c92 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -281,6 +281,7 @@ static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) break; }
+ pkt->buf_len = buf_len; pkt->len = buf_len;
sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 5f8a72d34d31..a1c276498723 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -27,6 +27,9 @@ /* How long to wait for graceful shutdown of a connection */ #define VSOCK_CLOSE_TIMEOUT (8 * HZ)
+/* Threshold for detecting small packets to copy */ +#define GOOD_COPY_LEN 128 + static const struct virtio_transport *virtio_transport_get_ops(void) { const struct vsock_transport *t = vsock_core_get_transport(); @@ -65,6 +68,9 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info, pkt->buf = kmalloc(len, GFP_KERNEL); if (!pkt->buf) goto out_pkt; + + pkt->buf_len = len; + err = memcpy_from_msg(pkt->buf, info->msg, len); if (err) goto out; @@ -850,24 +856,60 @@ virtio_transport_recv_connecting(struct sock *sk, return err; }
+static void +virtio_transport_recv_enqueue(struct vsock_sock *vsk, + struct virtio_vsock_pkt *pkt) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + bool free_pkt = false; + + pkt->len = le32_to_cpu(pkt->hdr.len); + pkt->off = 0; + + spin_lock_bh(&vvs->rx_lock); + + virtio_transport_inc_rx_pkt(vvs, pkt); + + /* Try to copy small packets into the buffer of last packet queued, + * to avoid wasting memory queueing the entire buffer with a small + * payload. + */ + if (pkt->len <= GOOD_COPY_LEN && !list_empty(&vvs->rx_queue)) { + struct virtio_vsock_pkt *last_pkt; + + last_pkt = list_last_entry(&vvs->rx_queue, + struct virtio_vsock_pkt, list); + + /* If there is space in the last packet queued, we copy the + * new packet in its buffer. + */ + if (pkt->len <= last_pkt->buf_len - last_pkt->len) { + memcpy(last_pkt->buf + last_pkt->len, pkt->buf, + pkt->len); + last_pkt->len += pkt->len; + free_pkt = true; + goto out; + } + } + + list_add_tail(&pkt->list, &vvs->rx_queue); + +out: + spin_unlock_bh(&vvs->rx_lock); + if (free_pkt) + virtio_transport_free_pkt(pkt); +} + static int virtio_transport_recv_connected(struct sock *sk, struct virtio_vsock_pkt *pkt) { struct vsock_sock *vsk = vsock_sk(sk); - struct virtio_vsock_sock *vvs = vsk->trans; int err = 0;
switch (le16_to_cpu(pkt->hdr.op)) { case VIRTIO_VSOCK_OP_RW: - pkt->len = le32_to_cpu(pkt->hdr.len); - pkt->off = 0; - - spin_lock_bh(&vvs->rx_lock); - virtio_transport_inc_rx_pkt(vvs, pkt); - list_add_tail(&pkt->list, &vvs->rx_queue); - spin_unlock_bh(&vvs->rx_lock); - + virtio_transport_recv_enqueue(vsk, pkt); sk->sk_data_ready(sk); return err; case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
From: Jia He justin.he@arm.com
mainline inclusion from mainline-v5.7 commit 8692cefc433f282228fd44938dd4d26ed38254a2 category: bugfix bugzilla: NA CVE: NA
--------------------------------
When client on the host tries to connect(SOCK_STREAM, O_NONBLOCK) to the server on the guest, there will be a panic on a ThunderX2 (armv8a server):
[ 463.718844] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [ 463.718848] Mem abort info: [ 463.718849] ESR = 0x96000044 [ 463.718852] EC = 0x25: DABT (current EL), IL = 32 bits [ 463.718853] SET = 0, FnV = 0 [ 463.718854] EA = 0, S1PTW = 0 [ 463.718855] Data abort info: [ 463.718856] ISV = 0, ISS = 0x00000044 [ 463.718857] CM = 0, WnR = 1 [ 463.718859] user pgtable: 4k pages, 48-bit VAs, pgdp=0000008f6f6e9000 [ 463.718861] [0000000000000000] pgd=0000000000000000 [ 463.718866] Internal error: Oops: 96000044 [#1] SMP [...] [ 463.718977] CPU: 213 PID: 5040 Comm: vhost-5032 Tainted: G O 5.7.0-rc7+ #139 [ 463.718980] Hardware name: GIGABYTE R281-T91-00/MT91-FS1-00, BIOS F06 09/25/2018 [ 463.718982] pstate: 60400009 (nZCv daif +PAN -UAO) [ 463.718995] pc : virtio_transport_recv_pkt+0x4c8/0xd40 [vmw_vsock_virtio_transport_common] [ 463.718999] lr : virtio_transport_recv_pkt+0x1fc/0xd40 [vmw_vsock_virtio_transport_common] [ 463.719000] sp : ffff80002dbe3c40 [...] [ 463.719025] Call trace: [ 463.719030] virtio_transport_recv_pkt+0x4c8/0xd40 [vmw_vsock_virtio_transport_common] [ 463.719034] vhost_vsock_handle_tx_kick+0x360/0x408 [vhost_vsock] [ 463.719041] vhost_worker+0x100/0x1a0 [vhost] [ 463.719048] kthread+0x128/0x130 [ 463.719052] ret_from_fork+0x10/0x18
The race condition is as follows: Task1 Task2 ===== ===== __sock_release virtio_transport_recv_pkt __vsock_release vsock_find_bound_socket (found sk) lock_sock_nested vsock_remove_sock sock_orphan sk_set_socket(sk, NULL) sk->sk_shutdown = SHUTDOWN_MASK ... release_sock lock_sock virtio_transport_recv_connecting sk->sk_socket->state (panic!)
The root cause is that vsock_find_bound_socket can't hold the lock_sock, so there is a small race window between vsock_find_bound_socket() and lock_sock(). If __vsock_release() is running in another task, sk->sk_socket will be set to NULL inadvertently.
This fixes it by checking sk->sk_shutdown(suggested by Stefano) after lock_sock since sk->sk_shutdown is set to SHUTDOWN_MASK under the protection of lock_sock_nested.
Signed-off-by: Jia He justin.he@arm.com Reviewed-by: Stefano Garzarella sgarzare@redhat.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Li Xianming lixianming5@huawei.com Reviewed-by: Su Bo subo7@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/vmw_vsock/virtio_transport_common.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index a1c276498723..4ae824ea3f28 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1079,6 +1079,14 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
lock_sock(sk);
+ /* Check if sk has been released before lock_sock */ + if (sk->sk_shutdown == SHUTDOWN_MASK) { + (void)virtio_transport_reset_no_sock(t, pkt); + release_sock(sk); + sock_put(sk); + goto free_pkt; + } + /* Update CID in case it has changed after a transport reset event */ vsk->local_addr.svm_cid = dst.svm_cid;