From: Yu Kuai yukuai3@huawei.com
hulk inclusion category: bugfix bugzilla: 46357 CVE: NA
---------------------------
test procedures:
a. create 20000 cgroups, and echo "8:0 10000" to blkio.throttle.write_bps_device b. echo 1 > /sys/blocd/sda/device/delete
test result:
rcu: INFO: rcu_sched detected stalls on CPUs/tasks: [5/1143] rcu: 0-...0: (0 ticks this GP) idle=0f2/1/0x4000000000000000 softirq=15507/15507 fq rcu: (detected by 6, t=60012 jiffies, g=119977, q=27153) NMI backtrace for cpu 0 CPU: 0 PID: 443 Comm: bash Not tainted 4.19.95-00061-g0bcc83b30eec #63 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-p4 RIP: 0010:blk_throtl_update_limit_valid.isra.0+0x116/0x2a0 Code: 01 00 00 e8 7c dd 74 ff 48 83 bb 78 01 00 00 00 0f 85 54 01 00 00 48 8d bb 88 01 1 RSP: 0018:ffff8881030bf9f0 EFLAGS: 00000046 RAX: 0000000000000000 RBX: ffff8880b4f37080 RCX: ffffffff95da0afe RDX: dffffc0000000000 RSI: ffff888100373980 RDI: ffff8880b4f37208 RBP: ffff888100deca00 R08: ffffffff9528f951 R09: 0000000000000001 R10: ffffed10159dbf56 R11: ffff8880acedfab3 R12: ffff8880b9fda498 R13: ffff8880b9fda4f4 R14: 0000000000000050 R15: ffffffff98b622c0 FS: 00007feb51c51700(0000) GS:ffff888106200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000561619547080 CR3: 0000000102bc9000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: throtl_pd_offline+0x98/0x100 blkg_destroy+0x133/0x630 ? blkcg_deactivate_policy+0x2c0/0x2c0 ? lock_timer_base+0x65/0x110 blkg_destroy_all+0x7f/0x100 blkcg_exit_queue+0x3f/0xa5 blk_exit_queue+0x69/0xa0 blk_cleanup_queue+0x226/0x360 __scsi_remove_device+0xb4/0x3c0 scsi_remove_device+0x38/0x60 sdev_store_delete+0x74/0x100 ? dev_driver_string+0xb0/0xb0 dev_attr_store+0x41/0x70 sysfs_kf_write+0x89/0xc0 kernfs_fop_write+0x1b6/0x2e0 ? sysfs_kf_bin_read+0x130/0x130 __vfs_write+0xca/0x420 ? kernel_read+0xc0/0xc0 ? __alloc_fd+0x16f/0x2d0 ? __fd_install+0x95/0x1a0 ? handle_mm_fault+0x3e0/0x560 vfs_write+0x11a/0x2f0 ksys_write+0xb9/0x1e0 ? __x64_sys_read+0x60/0x60 ? kasan_check_write+0x20/0x30 ? filp_close+0xb5/0xf0 __x64_sys_write+0x46/0x60 do_syscall_64+0xd9/0x1f0 entry_SYSCALL_64_after_hwframe+0x44/0xa9
The usage of so many blkg is very rare, however, such problem do exist in theory. In order to avoid such warnings, release 'q->queue_lock' for a while when a batch of blkg were destroyed.
Signed-off-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Tao Hou houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- block/blk-cgroup.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index e592167449aa..c64f0afa27dc 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -364,16 +364,31 @@ static void blkg_destroy(struct blkcg_gq *blkg) */ static void blkg_destroy_all(struct request_queue *q) { +#define BLKG_DESTROY_BATCH 4096 struct blkcg_gq *blkg, *n; + int count;
lockdep_assert_held(q->queue_lock);
+again: + count = BLKG_DESTROY_BATCH; list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) { struct blkcg *blkcg = blkg->blkcg;
spin_lock(&blkcg->lock); blkg_destroy(blkg); spin_unlock(&blkcg->lock); + /* + * If the list is too long, the loop can took a long time, + * thus relese the lock for a while when a batch of blkg + * were destroyed. + */ + if (!--count) { + spin_unlock_irq(q->queue_lock); + cond_resched(); + spin_lock_irq(q->queue_lock); + goto again; + } }
q->root_blkg = NULL;
From: Yu Kuai yukuai3@huawei.com
hulk inclusion category: bugfix bugzilla: 46357 CVE: NA
---------------------------
blk_throtl_update_limit_valid() will search for descendants to see if 'LIMIT_LOW' of bps/iops and READ/WRITE is nonzero. However, they're always zero if CONFIG_BLK_DEV_THROTTLING_LOW is not set, furthermore, a lot of time will be wasted to iterate descendants.
Thus do nothing in blk_throtl_update_limit_valid() in such situation.
Signed-off-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Tao Hou houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- block/blk-throttle.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index caee658609d7..c7b4f905feb5 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -568,6 +568,7 @@ static void throtl_pd_online(struct blkg_policy_data *pd) tg_update_has_rules(tg); }
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW static void blk_throtl_update_limit_valid(struct throtl_data *td) { struct cgroup_subsys_state *pos_css; @@ -588,6 +589,11 @@ static void blk_throtl_update_limit_valid(struct throtl_data *td)
td->limit_valid[LIMIT_LOW] = low_valid; } +#else +static inline void blk_throtl_update_limit_valid(struct throtl_data *td) +{ +} +#endif
static void throtl_upgrade_state(struct throtl_data *td); static void throtl_pd_offline(struct blkg_policy_data *pd)
From: Kirill Tkhai ktkhai@virtuozzo.com
mainline inclusion from mainline-v5.2-rc1 commit 9851ac13592df77958ae7bac6ba39e71420c38ec category: bugfix bugzilla: 36232 CVE: NA
-------------------------------------------------
We know which LRU is not active.
[chris@chrisdown.name: fix build on !CONFIG_MEMCG] Link: http://lkml.kernel.org/r/20190322150513.GA22021@chrisdown.name Link: http://lkml.kernel.org/r/155290128498.31489.18250485448913338607.stgit@local... Signed-off-by: Kirill Tkhai ktkhai@virtuozzo.com Signed-off-by: Chris Down chris@chrisdown.name Reviewed-by: Daniel Jordan daniel.m.jordan@oracle.com Cc: Michal Hocko mhocko@kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Liu Shixin liushixin2@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- include/linux/memcontrol.h | 6 ++++++ mm/vmscan.c | 10 ++++------ 2 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 24068566ada2..0069cd618b1e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1100,6 +1100,12 @@ static inline void count_memcg_events(struct mem_cgroup *memcg, { }
+static inline void __count_memcg_events(struct mem_cgroup *memcg, + enum vm_event_item idx, + unsigned long count) +{ +} + static inline void count_memcg_page_event(struct page *page, int idx) { diff --git a/mm/vmscan.c b/mm/vmscan.c index ac6f4cbef5ea..01abd3f43a02 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2076,12 +2076,6 @@ static unsigned move_active_pages_to_lru(struct lruvec *lruvec, } }
- if (!is_active_lru(lru)) { - __count_vm_events(PGDEACTIVATE, nr_moved); - count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, - nr_moved); - } - return nr_moved; }
@@ -2176,6 +2170,10 @@ static void shrink_active_list(unsigned long nr_to_scan,
nr_activate = move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru); nr_deactivate = move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE); + + __count_vm_events(PGDEACTIVATE, nr_deactivate); + __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_deactivate); + __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); spin_unlock_irq(&pgdat->lru_lock);
From: Shakeel Butt shakeelb@google.com
mainline inclusion from mainline-v5.8-rc1 commit 5d91f31faf8ebed2acfc3a1d6ac344f95c488d66 category: bugfix bugzilla: 36232 CVE: NA
-------------------------------------------------
Many of the callbacks called by pagevec_lru_move_fn() does not correctly update the vmstats for huge pages. Fix that. Also __pagevec_lru_add_fn() use the irq-unsafe alternative to update the stat as the irqs are already disabled.
Signed-off-by: Shakeel Butt shakeelb@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Acked-by: Johannes Weiner hannes@cmpxchg.org Link: http://lkml.kernel.org/r/20200527182916.249910-1-shakeelb@google.com Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Liu Shixin liushixin2@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- mm/swap.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/mm/swap.c b/mm/swap.c index 320ac35bde26..a819f8746ee2 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -224,7 +224,7 @@ static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec, del_page_from_lru_list(page, lruvec, page_lru(page)); ClearPageActive(page); add_page_to_lru_list_tail(page, lruvec, page_lru(page)); - (*pgmoved)++; + (*pgmoved) += hpage_nr_pages(page); } }
@@ -284,7 +284,7 @@ static void __activate_page(struct page *page, struct lruvec *lruvec, add_page_to_lru_list(page, lruvec, lru); trace_mm_lru_activate(page);
- __count_vm_event(PGACTIVATE); + __count_vm_events(PGACTIVATE, hpage_nr_pages(page)); update_page_reclaim_stat(lruvec, file, 1); } } @@ -499,6 +499,7 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec, { int lru, file; bool active; + int nr_pages = hpage_nr_pages(page);
if (!PageLRU(page)) return; @@ -532,11 +533,11 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec, * We moves tha page into tail of inactive. */ list_move_tail(&page->lru, &lruvec->lists[lru]); - __count_vm_event(PGROTATED); + __count_vm_events(PGROTATED, nr_pages); }
if (active) - __count_vm_event(PGDEACTIVATE); + __count_vm_events(PGDEACTIVATE, nr_pages); update_page_reclaim_stat(lruvec, file, 0); }
@@ -870,6 +871,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, { enum lru_list lru; int was_unevictable = TestClearPageUnevictable(page); + int nr_pages = hpage_nr_pages(page);
VM_BUG_ON_PAGE(PageLRU(page), page);
@@ -907,13 +909,13 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, update_page_reclaim_stat(lruvec, page_is_file_cache(page), PageActive(page)); if (was_unevictable) - count_vm_event(UNEVICTABLE_PGRESCUED); + __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages); } else { lru = LRU_UNEVICTABLE; ClearPageActive(page); SetPageUnevictable(page); if (!was_unevictable) - count_vm_event(UNEVICTABLE_PGCULLED); + __count_vm_events(UNEVICTABLE_PGCULLED, nr_pages); }
add_page_to_lru_list(page, lruvec, lru);
From: Shakeel Butt shakeelb@google.com
mainline inclusion from mainline-v5.8-rc1 commit 21e330fc632d6a288f73de48045b782cc51d501a category: bugfix bugzilla: 36232 CVE: NA
-------------------------------------------------
The commit 2262185c5b28 ("mm: per-cgroup memory reclaim stats") added PGLAZYFREE, PGACTIVATE & PGDEACTIVATE stats for cgroups but missed couple of places and PGLAZYFREE missed huge page handling. Fix that. Also for PGLAZYFREE use the irq-unsafe function to update as the irq is already disabled.
Fixes: 2262185c5b28 ("mm: per-cgroup memory reclaim stats") Signed-off-by: Shakeel Butt shakeelb@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Acked-by: Johannes Weiner hannes@cmpxchg.org Link: http://lkml.kernel.org/r/20200527182947.251343-1-shakeelb@google.com Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Liu Shixin liushixin2@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- mm/swap.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/mm/swap.c b/mm/swap.c index a819f8746ee2..0156aa4d772c 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -277,6 +277,7 @@ static void __activate_page(struct page *page, struct lruvec *lruvec, if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { int file = page_is_file_cache(page); int lru = page_lru_base_type(page); + int nr_pages = hpage_nr_pages(page);
del_page_from_lru_list(page, lruvec, lru); SetPageActive(page); @@ -284,7 +285,9 @@ static void __activate_page(struct page *page, struct lruvec *lruvec, add_page_to_lru_list(page, lruvec, lru); trace_mm_lru_activate(page);
- __count_vm_events(PGACTIVATE, hpage_nr_pages(page)); + __count_vm_events(PGACTIVATE, nr_pages); + __count_memcg_events(lruvec_memcg(lruvec), PGACTIVATE, + nr_pages); update_page_reclaim_stat(lruvec, file, 1); } } @@ -536,18 +539,21 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec, __count_vm_events(PGROTATED, nr_pages); }
- if (active) + if (active) { __count_vm_events(PGDEACTIVATE, nr_pages); + __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, + nr_pages); + } update_page_reclaim_stat(lruvec, file, 0); }
- static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec, void *arg) { if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && !PageSwapCache(page) && !PageUnevictable(page)) { bool active = PageActive(page); + int nr_pages = hpage_nr_pages(page);
del_page_from_lru_list(page, lruvec, LRU_INACTIVE_ANON + active); @@ -561,8 +567,9 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec, ClearPageSwapBacked(page); add_page_to_lru_list(page, lruvec, LRU_INACTIVE_FILE);
- __count_vm_events(PGLAZYFREE, hpage_nr_pages(page)); - count_memcg_page_event(page, PGLAZYFREE); + __count_vm_events(PGLAZYFREE, nr_pages); + __count_memcg_events(lruvec_memcg(lruvec), PGLAZYFREE, + nr_pages); update_page_reclaim_stat(lruvec, 1, 0); } }