hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8NIKC
--------------------------------
Adding periodical memory reclaim support, there are three new interfaces:
1) /proc/sys/vm/cache_reclaim_s --- used to set reclaim interval 2) /proc/sys/vm/cache_reclaim_weight --- used to calculate reclaim amount 3) /proc/sys/vm/cache_reclaim_enable --- used to switch on/off this feature
Signed-off-by: Ze Zuo zuoze1@huawei.com --- Documentation/admin-guide/sysctl/vm.rst | 35 ++++++ mm/Kconfig | 13 +++ mm/Makefile | 1 + mm/internal.h | 4 + mm/page_cache_limit.c | 135 ++++++++++++++++++++++++ mm/vmscan.c | 39 +++++++ 6 files changed, 227 insertions(+) create mode 100644 mm/page_cache_limit.c
diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index 45ba1f4dc004..dc2705cbdcb8 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -75,6 +75,9 @@ Currently, these files are in /proc/sys/vm: - watermark_boost_factor - watermark_scale_factor - zone_reclaim_mode +- cache_reclaim_s +- cache_reclaim_weight +- cache_reclaim_enable
admin_reserve_kbytes @@ -1044,3 +1047,35 @@ of other processes running on other nodes will not be affected. Allowing regular swap effectively restricts allocations to the local node unless explicitly overridden by memory policies or cpuset configurations. + +cache_reclaim_s +=============== + +Cache_reclaim_s is used to set reclaim interval in periodical memory +reclaim. when periodical memory reclaim is enabled, it will relcaim +memory in every cache_reclaim_s second. + + +cache_reclaim_weight +==================== + +This is reclaim factor in every periodical reclaim. when periodical +memory reclaim is enabled, the reclaim amount in every reclaim can +calculate from: + reclaim_amount = cache_reclaim_weigh * SWAP_CLUSTER_MAX * nr_cpus_node(nid) + +SWAP_CLUSTER_MAX is defined in include/linux/swap.h. +nr_cpus_node is used to obtain the number of CPUs on node nid. + +Memory reclaim use workqueue mechanism, it will block the execution of +subsequent work, if memory reclaim tasks a lot of time, time sensitive +work may be affected. + +Note that if the parameters are not configured properly, such as setting +too large a memory reclaim amount, it may lead to unstable system +performance. + +cache_reclaim_enable +==================== + +This is used to switch on/off periodical memory reclaim feature. diff --git a/mm/Kconfig b/mm/Kconfig index ff0c36f42ca8..df87fab46621 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1323,6 +1323,19 @@ config ASCEND_OOM 0: disable oom killer 1: enable oom killer (default,compatible with mainline)
+config PAGE_CACHE_LIMIT + bool "Support page cache limit" + depends on MMU && SYSCTL + default n + help + Keeping a number of page cache can improve the performance of system, + but if there is a lot fo page cache in system, that will result in + short of memory, subsequent memory reclamation operations may lead + to performance degradation, so add periodical memory relciam to + avoid too many page cache. + + if unsure, say N to disable the PAGE_CACHE_LIMIT. + source "mm/damon/Kconfig"
endmenu diff --git a/mm/Makefile b/mm/Makefile index 6921fedacd07..07cf74abd241 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -141,3 +141,4 @@ obj-$(CONFIG_GENERIC_IOREMAP) += ioremap.o obj-$(CONFIG_SHRINKER_DEBUG) += shrinker_debug.o obj-$(CONFIG_SHARE_POOL) += share_pool.o obj-$(CONFIG_MEMCG_MEMFS_INFO) += memcg_memfs_info.o +obj-$(CONFIG_PAGE_CACHE_LIMIT) += page_cache_limit.o diff --git a/mm/internal.h b/mm/internal.h index bcb7f95783bf..1ebba69437d6 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1157,4 +1157,8 @@ struct vma_prepare {
void __meminit __init_single_page(struct page *page, unsigned long pfn, unsigned long zone, int nid); + +#ifdef CONFIG_PAGE_CACHE_LIMIT +unsigned long shrink_memory(unsigned long nr_to_reclaim, bool may_swap); +#endif /* CONFIG_PAGE_CACHE_LIMIT */ #endif /* __MM_INTERNAL_H */ diff --git a/mm/page_cache_limit.c b/mm/page_cache_limit.c new file mode 100644 index 000000000000..3ad89f21d585 --- /dev/null +++ b/mm/page_cache_limit.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Support for periodic memory reclaim and page cache limit + */ + +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/sysctl.h> +#include <linux/workqueue.h> + +#include "internal.h" + +static int vm_cache_reclaim_s __read_mostly; +static int vm_cache_reclaim_s_max = 43200; +static int vm_cache_reclaim_weight __read_mostly = 1; +static int vm_cache_reclaim_weight_max = 100; +static int vm_cache_reclaim_enable = 1; + +static void shrink_shepherd(struct work_struct *w); +static DECLARE_DEFERRABLE_WORK(shepherd, shrink_shepherd); +static struct work_struct vmscan_works[MAX_NUMNODES]; + +static bool should_periodical_reclaim(void) +{ + return vm_cache_reclaim_s && vm_cache_reclaim_enable; +} + +static unsigned long node_reclaim_num(void) +{ + int nid = numa_node_id(); + + return SWAP_CLUSTER_MAX * nr_cpus_node(nid) * vm_cache_reclaim_weight; +} + +int cache_reclaim_enable_handler(struct ctl_table *table, int write, + void *buffer, size_t *length, loff_t *ppos) +{ + int ret; + + ret = proc_dointvec_minmax(table, write, buffer, length, ppos); + if (ret || !write) + return ret; + + if (should_periodical_reclaim()) + schedule_delayed_work(&shepherd, round_jiffies_relative( + (unsigned long)vm_cache_reclaim_s * HZ)); + + return 0; +} + +int cache_reclaim_sysctl_handler(struct ctl_table *table, int write, + void *buffer, size_t *length, loff_t *ppos) +{ + int ret; + + ret = proc_dointvec_minmax(table, write, buffer, length, ppos); + if (ret || !write) + return ret; + + if (should_periodical_reclaim()) + mod_delayed_work(system_unbound_wq, &shepherd, + round_jiffies_relative( + (unsigned long)vm_cache_reclaim_s * HZ)); + + return ret; +} + +static void shrink_shepherd(struct work_struct *w) +{ + int node; + + if (!should_periodical_reclaim()) + return; + + for_each_online_node(node) { + if (!work_pending(&vmscan_works[node])) + queue_work_node(node, system_unbound_wq, &vmscan_works[node]); + } + + queue_delayed_work(system_unbound_wq, &shepherd, + round_jiffies_relative((unsigned long)vm_cache_reclaim_s * HZ)); +} + +static void shrink_page_work(struct work_struct *w) +{ + shrink_memory(node_reclaim_num(), true); +} + +static void shrink_shepherd_timer(void) +{ + int i; + + for (i = 0; i < MAX_NUMNODES; i++) + INIT_WORK(&vmscan_works[i], shrink_page_work); +} + +static struct ctl_table page_cache_limit_table[] = { + { + .procname = "cache_reclaim_s", + .data = &vm_cache_reclaim_s, + .maxlen = sizeof(vm_cache_reclaim_s), + .mode = 0644, + .proc_handler = cache_reclaim_sysctl_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = &vm_cache_reclaim_s_max, + }, + { + .procname = "cache_reclaim_weight", + .data = &vm_cache_reclaim_weight, + .maxlen = sizeof(vm_cache_reclaim_weight), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE, + .extra2 = &vm_cache_reclaim_weight_max, + }, + { + .procname = "cache_reclaim_enable", + .data = &vm_cache_reclaim_enable, + .maxlen = sizeof(vm_cache_reclaim_enable), + .mode = 0644, + .proc_handler = cache_reclaim_enable_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +}; + +static int __init shrink_page_init(void) +{ + shrink_shepherd_timer(); + + register_sysctl_init("vm", page_cache_limit_table); + + return 0; +} +late_initcall(shrink_page_init) diff --git a/mm/vmscan.c b/mm/vmscan.c index 6f13394b112e..7a676296af30 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -7880,6 +7880,45 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) } #endif /* CONFIG_HIBERNATION */
+#ifdef CONFIG_PAGE_CACHE_LIMIT +unsigned long shrink_memory(unsigned long nr_to_reclaim, bool may_swap) +{ + unsigned long nr_reclaimed; + unsigned int noreclaim_flag; + int nid = numa_node_id(); + struct scan_control sc = { + .gfp_mask = GFP_HIGHUSER_MOVABLE, + .reclaim_idx = ZONE_MOVABLE, + .may_writepage = !laptop_mode, + .nr_to_reclaim = nr_to_reclaim / 2, + .may_unmap = 1, + .may_swap = may_swap, + .priority = DEF_PRIORITY, + }; + + struct zonelist *zonelist = node_zonelist(nid, sc.gfp_mask); + struct scan_control orig_sc = sc; + + fs_reclaim_acquire(sc.gfp_mask); + noreclaim_flag = memalloc_noreclaim_save(); + set_task_reclaim_state(current, &sc.reclaim_state); + + /* Start with ZONE_MOVABLE and try to reclaim half of the target memory */ + nr_reclaimed = do_try_to_free_pages(zonelist, &sc); + sc = orig_sc; + sc.reclaim_idx--; + + /* Then try to reclaim remain half memory starting from ZONE_NORMAL */ + nr_reclaimed += do_try_to_free_pages(zonelist, &sc); + + set_task_reclaim_state(current, NULL); + memalloc_noreclaim_restore(noreclaim_flag); + fs_reclaim_release(sc.gfp_mask); + + return nr_reclaimed; +} +#endif /* CONFIG_PAGE_CACHE_LIMIT */ + /* * This kswapd start function will be called by init and node-hot-add. */