hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8QUNW
-------------------------------
Introcude more memory fine grianed stall tracking in pressure.stat, such as global memory relcaim, memory compact, memory async cgroup reclaim and swap.
Signed-off-by: Lu Jialin lujialin4@huawei.com --- block/blk-cgroup.c | 2 +- include/linux/psi_types.h | 20 ++++++++++++++++++ kernel/sched/psi.c | 44 +++++++++++++++++++++++++++++++++++++++ mm/compaction.c | 2 +- mm/filemap.c | 6 +++--- mm/memcontrol.c | 3 +++ mm/page_alloc.c | 6 ++++++ mm/page_io.c | 3 +++ mm/readahead.c | 12 ++++++++++- mm/vmscan.c | 5 ++++- 10 files changed, 96 insertions(+), 7 deletions(-)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 4a42ea2972ad..eb5bc214a966 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1831,7 +1831,7 @@ static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now) */ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay) { - unsigned long pflags; + unsigned long pflags = 0; bool clamp; u64 now = ktime_to_ns(ktime_get()); u64 exp; diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index 984aabee2c35..d20a83184fd0 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -212,12 +212,28 @@ struct psi_group { enum psi_stat_states { PSI_MEMCG_RECLAIM_SOME, PSI_MEMCG_RECLAIM_FULL, + PSI_GLOBAL_RECLAIM_SOME, + PSI_GLOBAL_RECLAIM_FULL, + PSI_COMPACT_SOME, + PSI_COMPACT_FULL, + PSI_ASYNC_MEMCG_RECLAIM_SOME, + PSI_ASYNC_MEMCG_RECLAIM_FULL, + PSI_SWAP_SOME, + PSI_SWAP_FULL, NR_PSI_STAT_STATES, };
enum psi_stat_task_count { NR_MEMCG_RECLAIM, NR_MEMCG_RECLAIM_RUNNING, + NR_GLOBAL_RECLAIM, + NR_GLOBAL_RECLAIM_RUNNING, + NR_COMPACT, + NR_COMPACT_RUNNING, + NR_ASYNC_MEMCG_RECLAIM, + NR_ASYNC_MEMCG_RECLAIM_RUNNING, + NR_SWAP, + NR_SWAP_RUNNING, NR_PSI_STAT_TASK_COUNTS, };
@@ -262,6 +278,10 @@ struct psi_group { }; */ enum psi_memstall_type { PSI_MEMCG_RECLAIM = 1, + PSI_GLOBAL_RECLAIM, + PSI_COMPACT, + PSI_ASYNC_MEMCG_RECLAIM, + PSI_SWAP, };
#endif /* _LINUX_PSI_TYPES_H */ diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 161af4a19493..c5cde57bf8de 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -378,6 +378,26 @@ static void record_stat_times(struct psi_group_ext *psi_ext, int cpu) if (ext_grpc->state_mask & (1 << PSI_MEMCG_RECLAIM_FULL)) ext_grpc->times[PSI_MEMCG_RECLAIM_FULL] += delta; } + if (ext_grpc->state_mask & (1 << PSI_GLOBAL_RECLAIM_SOME)) { + ext_grpc->times[PSI_GLOBAL_RECLAIM_SOME] += delta; + if (ext_grpc->state_mask & (1 << PSI_GLOBAL_RECLAIM_FULL)) + ext_grpc->times[PSI_GLOBAL_RECLAIM_FULL] += delta; + } + if (ext_grpc->state_mask & (1 << PSI_COMPACT_SOME)) { + ext_grpc->times[PSI_COMPACT_SOME] += delta; + if (ext_grpc->state_mask & (1 << PSI_COMPACT_FULL)) + ext_grpc->times[PSI_COMPACT_FULL] += delta; + } + if (ext_grpc->state_mask & (1 << PSI_ASYNC_MEMCG_RECLAIM_SOME)) { + ext_grpc->times[PSI_ASYNC_MEMCG_RECLAIM_SOME] += delta; + if (ext_grpc->state_mask & (1 << PSI_ASYNC_MEMCG_RECLAIM_FULL)) + ext_grpc->times[PSI_ASYNC_MEMCG_RECLAIM_FULL] += delta; + } + if (ext_grpc->state_mask & (1 << PSI_SWAP_SOME)) { + ext_grpc->times[PSI_SWAP_SOME] += delta; + if (ext_grpc->state_mask & (1 << PSI_SWAP_FULL)) + ext_grpc->times[PSI_SWAP_FULL] += delta; + } }
static bool test_fine_grained_stat(unsigned int *stat_tasks, @@ -390,6 +410,26 @@ static bool test_fine_grained_stat(unsigned int *stat_tasks, case PSI_MEMCG_RECLAIM_FULL: return unlikely(stat_tasks[NR_MEMCG_RECLAIM] && nr_running == stat_tasks[NR_MEMCG_RECLAIM_RUNNING]); + case PSI_GLOBAL_RECLAIM_SOME: + return unlikely(stat_tasks[NR_GLOBAL_RECLAIM]); + case PSI_GLOBAL_RECLAIM_FULL: + return unlikely(stat_tasks[NR_GLOBAL_RECLAIM] && + nr_running == stat_tasks[NR_GLOBAL_RECLAIM_RUNNING]); + case PSI_COMPACT_SOME: + return unlikely(stat_tasks[NR_COMPACT]); + case PSI_COMPACT_FULL: + return unlikely(stat_tasks[NR_COMPACT] && + nr_running == stat_tasks[NR_COMPACT_RUNNING]); + case PSI_ASYNC_MEMCG_RECLAIM_SOME: + return unlikely(stat_tasks[NR_ASYNC_MEMCG_RECLAIM]); + case PSI_ASYNC_MEMCG_RECLAIM_FULL: + return unlikely(stat_tasks[NR_ASYNC_MEMCG_RECLAIM] && + nr_running == stat_tasks[NR_ASYNC_MEMCG_RECLAIM_RUNNING]); + case PSI_SWAP_SOME: + return unlikely(stat_tasks[NR_SWAP]); + case PSI_SWAP_FULL: + return unlikely(stat_tasks[NR_SWAP] && + nr_running == stat_tasks[NR_SWAP_RUNNING]); default: return false; } @@ -1893,6 +1933,10 @@ static const struct proc_ops psi_cpu_proc_ops = { #ifdef CONFIG_PSI_FINE_GRAINED static const char *const psi_stat_names[] = { "cgroup_memory_reclaim", + "global_memory_reclaim", + "compact", + "cgroup_async_memory_reclaim", + "swap", };
int psi_stat_show(struct seq_file *m, struct psi_group *group) diff --git a/mm/compaction.c b/mm/compaction.c index 38c8d216c6a3..771e9629b95c 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -3061,7 +3061,7 @@ static int kcompactd(void *p) pgdat->kcompactd_highest_zoneidx = pgdat->nr_zones - 1;
while (!kthread_should_stop()) { - unsigned long pflags; + unsigned long pflags = 0;
/* * Avoid the unnecessary wakeup for proactive compaction diff --git a/mm/filemap.c b/mm/filemap.c index 1c398edcfcaf..d0a2beabc68a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1227,7 +1227,7 @@ static inline int folio_wait_bit_common(struct folio *folio, int bit_nr, struct wait_page_queue wait_page; wait_queue_entry_t *wait = &wait_page.wait; bool thrashing = false; - unsigned long pflags; + unsigned long pflags = 0; bool in_thrashing;
if (bit_nr == PG_locked && @@ -1378,7 +1378,7 @@ void migration_entry_wait_on_locked(swp_entry_t entry, spinlock_t *ptl) struct wait_page_queue wait_page; wait_queue_entry_t *wait = &wait_page.wait; bool thrashing = false; - unsigned long pflags; + unsigned long pflags = 0; bool in_thrashing; wait_queue_head_t *q; struct folio *folio = page_folio(pfn_swap_entry_to_page(entry)); @@ -2366,7 +2366,7 @@ static int filemap_read_folio(struct file *file, filler_t filler, struct folio *folio) { bool workingset = folio_test_workingset(folio); - unsigned long pflags; + unsigned long pflags = 0; int error;
/* diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a3c3a508a24d..74e1fad14a2c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2450,6 +2450,9 @@ static void async_reclaim_high(struct mem_cgroup *memcg) return; }
+#ifdef CONFIG_PSI_FINE_GRAINED + pflags = PSI_ASYNC_MEMCG_RECLAIM; +#endif psi_memstall_enter(&pflags); nr_pages = memcg_usage > safe_pages ? memcg_usage - safe_pages : MEMCG_CHARGE_BATCH; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f5b61c1060d1..798a9ec645c0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3518,6 +3518,9 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, if (!order) return NULL;
+#ifdef CONFIG_PSI_FINE_GRAINED + pflags = PSI_COMPACT; +#endif psi_memstall_enter(&pflags); delayacct_compact_start(); noreclaim_flag = memalloc_noreclaim_save(); @@ -3787,6 +3790,9 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, unsigned long pflags; bool drained = false;
+#ifdef CONFIG_PSI_FINE_GRAINED + pflags = PSI_GLOBAL_RECLAIM; +#endif psi_memstall_enter(&pflags); *did_some_progress = __perform_reclaim(gfp_mask, order, ac); if (unlikely(!(*did_some_progress))) diff --git a/mm/page_io.c b/mm/page_io.c index fe4c21af23f2..95c3616b5db3 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -509,6 +509,9 @@ void swap_readpage(struct page *page, bool synchronous, struct swap_iocb **plug) */ if (workingset) { delayacct_thrashing_start(&in_thrashing); +#ifdef CONFIG_PSI_FINE_GRAINED + pflags = PSI_SWAP; +#endif psi_memstall_enter(&pflags); } delayacct_swapin_start(); diff --git a/mm/readahead.c b/mm/readahead.c index 6925e6959fd3..e09919547c3b 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -152,8 +152,12 @@ static void read_pages(struct readahead_control *rac) if (!readahead_count(rac)) return;
- if (unlikely(rac->_workingset)) + if (unlikely(rac->_workingset)) { +#ifdef CONFIG_PSI_FINE_GRAINED + rac->_pflags = 0; +#endif psi_memstall_enter(&rac->_pflags); + } blk_start_plug(&plug);
if (aops->readahead) { @@ -803,6 +807,9 @@ void readahead_expand(struct readahead_control *ractl, if (unlikely(folio_test_workingset(folio)) && !ractl->_workingset) { ractl->_workingset = true; +#ifdef CONFIG_PSI_FINE_GRAINED + ractl->_pflags = 0; +#endif psi_memstall_enter(&ractl->_pflags); } ractl->_nr_pages++; @@ -830,6 +837,9 @@ void readahead_expand(struct readahead_control *ractl, if (unlikely(folio_test_workingset(folio)) && !ractl->_workingset) { ractl->_workingset = true; +#ifdef CONFIG_PSI_FINE_GRAINED + ractl->_pflags = 0; +#endif psi_memstall_enter(&ractl->_pflags); } ractl->_nr_pages++; diff --git a/mm/vmscan.c b/mm/vmscan.c index 6f13394b112e..4d753ce13901 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -7393,7 +7393,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx) int i; unsigned long nr_soft_reclaimed; unsigned long nr_soft_scanned; - unsigned long pflags; + unsigned long pflags = 0; unsigned long nr_boost_reclaim; unsigned long zone_boosts[MAX_NR_ZONES] = { 0, }; bool boosted; @@ -8025,6 +8025,9 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in sc.gfp_mask);
cond_resched(); +#ifdef CONFIG_PSI_FINE_GRAINED + pflags = PSI_GLOBAL_RECLAIM; +#endif psi_memstall_enter(&pflags); fs_reclaim_acquire(sc.gfp_mask); /*