To implement this feature, a new function called "shrink_memory" has been added, which can recycle a specific amount of memory. This feature mainly supports the following two functions:
1) add periodical memory reclaim 2) add pagecache limit
Above functionalities depend on CONFIG_PAGE_CACHE_LIMIT, and the interface and functionality tests have been successfully completed.
ChangeLog: - Some minor changes about description and code, no functional changes - rename page_cache_shrink_memory to shrink_memory - move the declaration of the 'shrink_memory' function to 'internal.h', no functional changes - add description information for function "shrink_memory" - update documentation for using feature alerts
Ze Zuo (2): mm: support periodical memory reclaim mm: support pagecache limit
Documentation/admin-guide/sysctl/vm.rst | 43 +++++ mm/Kconfig | 13 ++ mm/Makefile | 1 + mm/internal.h | 4 + mm/page_cache_limit.c | 199 ++++++++++++++++++++++++ mm/vmscan.c | 39 +++++ 6 files changed, 299 insertions(+) create mode 100644 mm/page_cache_limit.c
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8NIKC
--------------------------------
Adding periodical memory reclaim support, there are three new interfaces:
1) /proc/sys/vm/cache_reclaim_s --- used to set reclaim interval 2) /proc/sys/vm/cache_reclaim_weight --- used to calculate reclaim amount 3) /proc/sys/vm/cache_reclaim_enable --- used to switch on/off this feature
Signed-off-by: Ze Zuo zuoze1@huawei.com --- Documentation/admin-guide/sysctl/vm.rst | 35 ++++++ mm/Kconfig | 13 +++ mm/Makefile | 1 + mm/internal.h | 4 + mm/page_cache_limit.c | 135 ++++++++++++++++++++++++ mm/vmscan.c | 39 +++++++ 6 files changed, 227 insertions(+) create mode 100644 mm/page_cache_limit.c
diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index 45ba1f4dc004..dc2705cbdcb8 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -75,6 +75,9 @@ Currently, these files are in /proc/sys/vm: - watermark_boost_factor - watermark_scale_factor - zone_reclaim_mode +- cache_reclaim_s +- cache_reclaim_weight +- cache_reclaim_enable
admin_reserve_kbytes @@ -1044,3 +1047,35 @@ of other processes running on other nodes will not be affected. Allowing regular swap effectively restricts allocations to the local node unless explicitly overridden by memory policies or cpuset configurations. + +cache_reclaim_s +=============== + +Cache_reclaim_s is used to set reclaim interval in periodical memory +reclaim. when periodical memory reclaim is enabled, it will relcaim +memory in every cache_reclaim_s second. + + +cache_reclaim_weight +==================== + +This is reclaim factor in every periodical reclaim. when periodical +memory reclaim is enabled, the reclaim amount in every reclaim can +calculate from: + reclaim_amount = cache_reclaim_weigh * SWAP_CLUSTER_MAX * nr_cpus_node(nid) + +SWAP_CLUSTER_MAX is defined in include/linux/swap.h. +nr_cpus_node is used to obtain the number of CPUs on node nid. + +Memory reclaim use workqueue mechanism, it will block the execution of +subsequent work, if memory reclaim tasks a lot of time, time sensitive +work may be affected. + +Note that if the parameters are not configured properly, such as setting +too large a memory reclaim amount, it may lead to unstable system +performance. + +cache_reclaim_enable +==================== + +This is used to switch on/off periodical memory reclaim feature. diff --git a/mm/Kconfig b/mm/Kconfig index ff0c36f42ca8..df87fab46621 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1323,6 +1323,19 @@ config ASCEND_OOM 0: disable oom killer 1: enable oom killer (default,compatible with mainline)
+config PAGE_CACHE_LIMIT + bool "Support page cache limit" + depends on MMU && SYSCTL + default n + help + Keeping a number of page cache can improve the performance of system, + but if there is a lot fo page cache in system, that will result in + short of memory, subsequent memory reclamation operations may lead + to performance degradation, so add periodical memory relciam to + avoid too many page cache. + + if unsure, say N to disable the PAGE_CACHE_LIMIT. + source "mm/damon/Kconfig"
endmenu diff --git a/mm/Makefile b/mm/Makefile index 6921fedacd07..07cf74abd241 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -141,3 +141,4 @@ obj-$(CONFIG_GENERIC_IOREMAP) += ioremap.o obj-$(CONFIG_SHRINKER_DEBUG) += shrinker_debug.o obj-$(CONFIG_SHARE_POOL) += share_pool.o obj-$(CONFIG_MEMCG_MEMFS_INFO) += memcg_memfs_info.o +obj-$(CONFIG_PAGE_CACHE_LIMIT) += page_cache_limit.o diff --git a/mm/internal.h b/mm/internal.h index bcb7f95783bf..1ebba69437d6 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1157,4 +1157,8 @@ struct vma_prepare {
void __meminit __init_single_page(struct page *page, unsigned long pfn, unsigned long zone, int nid); + +#ifdef CONFIG_PAGE_CACHE_LIMIT +unsigned long shrink_memory(unsigned long nr_to_reclaim, bool may_swap); +#endif /* CONFIG_PAGE_CACHE_LIMIT */ #endif /* __MM_INTERNAL_H */ diff --git a/mm/page_cache_limit.c b/mm/page_cache_limit.c new file mode 100644 index 000000000000..3ad89f21d585 --- /dev/null +++ b/mm/page_cache_limit.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Support for periodic memory reclaim and page cache limit + */ + +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/sysctl.h> +#include <linux/workqueue.h> + +#include "internal.h" + +static int vm_cache_reclaim_s __read_mostly; +static int vm_cache_reclaim_s_max = 43200; +static int vm_cache_reclaim_weight __read_mostly = 1; +static int vm_cache_reclaim_weight_max = 100; +static int vm_cache_reclaim_enable = 1; + +static void shrink_shepherd(struct work_struct *w); +static DECLARE_DEFERRABLE_WORK(shepherd, shrink_shepherd); +static struct work_struct vmscan_works[MAX_NUMNODES]; + +static bool should_periodical_reclaim(void) +{ + return vm_cache_reclaim_s && vm_cache_reclaim_enable; +} + +static unsigned long node_reclaim_num(void) +{ + int nid = numa_node_id(); + + return SWAP_CLUSTER_MAX * nr_cpus_node(nid) * vm_cache_reclaim_weight; +} + +int cache_reclaim_enable_handler(struct ctl_table *table, int write, + void *buffer, size_t *length, loff_t *ppos) +{ + int ret; + + ret = proc_dointvec_minmax(table, write, buffer, length, ppos); + if (ret || !write) + return ret; + + if (should_periodical_reclaim()) + schedule_delayed_work(&shepherd, round_jiffies_relative( + (unsigned long)vm_cache_reclaim_s * HZ)); + + return 0; +} + +int cache_reclaim_sysctl_handler(struct ctl_table *table, int write, + void *buffer, size_t *length, loff_t *ppos) +{ + int ret; + + ret = proc_dointvec_minmax(table, write, buffer, length, ppos); + if (ret || !write) + return ret; + + if (should_periodical_reclaim()) + mod_delayed_work(system_unbound_wq, &shepherd, + round_jiffies_relative( + (unsigned long)vm_cache_reclaim_s * HZ)); + + return ret; +} + +static void shrink_shepherd(struct work_struct *w) +{ + int node; + + if (!should_periodical_reclaim()) + return; + + for_each_online_node(node) { + if (!work_pending(&vmscan_works[node])) + queue_work_node(node, system_unbound_wq, &vmscan_works[node]); + } + + queue_delayed_work(system_unbound_wq, &shepherd, + round_jiffies_relative((unsigned long)vm_cache_reclaim_s * HZ)); +} + +static void shrink_page_work(struct work_struct *w) +{ + shrink_memory(node_reclaim_num(), true); +} + +static void shrink_shepherd_timer(void) +{ + int i; + + for (i = 0; i < MAX_NUMNODES; i++) + INIT_WORK(&vmscan_works[i], shrink_page_work); +} + +static struct ctl_table page_cache_limit_table[] = { + { + .procname = "cache_reclaim_s", + .data = &vm_cache_reclaim_s, + .maxlen = sizeof(vm_cache_reclaim_s), + .mode = 0644, + .proc_handler = cache_reclaim_sysctl_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = &vm_cache_reclaim_s_max, + }, + { + .procname = "cache_reclaim_weight", + .data = &vm_cache_reclaim_weight, + .maxlen = sizeof(vm_cache_reclaim_weight), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE, + .extra2 = &vm_cache_reclaim_weight_max, + }, + { + .procname = "cache_reclaim_enable", + .data = &vm_cache_reclaim_enable, + .maxlen = sizeof(vm_cache_reclaim_enable), + .mode = 0644, + .proc_handler = cache_reclaim_enable_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +}; + +static int __init shrink_page_init(void) +{ + shrink_shepherd_timer(); + + register_sysctl_init("vm", page_cache_limit_table); + + return 0; +} +late_initcall(shrink_page_init) diff --git a/mm/vmscan.c b/mm/vmscan.c index 6f13394b112e..7a676296af30 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -7880,6 +7880,45 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) } #endif /* CONFIG_HIBERNATION */
+#ifdef CONFIG_PAGE_CACHE_LIMIT +unsigned long shrink_memory(unsigned long nr_to_reclaim, bool may_swap) +{ + unsigned long nr_reclaimed; + unsigned int noreclaim_flag; + int nid = numa_node_id(); + struct scan_control sc = { + .gfp_mask = GFP_HIGHUSER_MOVABLE, + .reclaim_idx = ZONE_MOVABLE, + .may_writepage = !laptop_mode, + .nr_to_reclaim = nr_to_reclaim / 2, + .may_unmap = 1, + .may_swap = may_swap, + .priority = DEF_PRIORITY, + }; + + struct zonelist *zonelist = node_zonelist(nid, sc.gfp_mask); + struct scan_control orig_sc = sc; + + fs_reclaim_acquire(sc.gfp_mask); + noreclaim_flag = memalloc_noreclaim_save(); + set_task_reclaim_state(current, &sc.reclaim_state); + + /* Start with ZONE_MOVABLE and try to reclaim half of the target memory */ + nr_reclaimed = do_try_to_free_pages(zonelist, &sc); + sc = orig_sc; + sc.reclaim_idx--; + + /* Then try to reclaim remain half memory starting from ZONE_NORMAL */ + nr_reclaimed += do_try_to_free_pages(zonelist, &sc); + + set_task_reclaim_state(current, NULL); + memalloc_noreclaim_restore(noreclaim_flag); + fs_reclaim_release(sc.gfp_mask); + + return nr_reclaimed; +} +#endif /* CONFIG_PAGE_CACHE_LIMIT */ + /* * This kswapd start function will be called by init and node-hot-add. */
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8NIKC
--------------------------------
Add /pros/sys/vm/cache_limit_mbytes to set page cache limit. This interface set the upper limit of page cache, if usage of page cache is over cache_limit_mbytes, it will trigger memory reclaim, the reclaim size and reclaim interval are decided by interfaces /proc/sys/vm/cache_reclaim_s and /proc/sys/vm/cache_reclaim_weight, these two intefaces are introduced in previous patch.
Signed-off-by: Ze Zuo zuoze1@huawei.com --- Documentation/admin-guide/sysctl/vm.rst | 8 ++++ mm/page_cache_limit.c | 64 +++++++++++++++++++++++++ 2 files changed, 72 insertions(+)
diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index dc2705cbdcb8..2025b524028c 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -78,6 +78,7 @@ Currently, these files are in /proc/sys/vm: - cache_reclaim_s - cache_reclaim_weight - cache_reclaim_enable +- cache_limit_mbytes
admin_reserve_kbytes @@ -1079,3 +1080,10 @@ cache_reclaim_enable ====================
This is used to switch on/off periodical memory reclaim feature. + + +cache_limit_mbytes +================== + +This is used to set the upper limit of page cache in megabytes. +Page cache will be reclaimed periodically if page cache is over limit. diff --git a/mm/page_cache_limit.c b/mm/page_cache_limit.c index 3ad89f21d585..1ab00225f8ac 100644 --- a/mm/page_cache_limit.c +++ b/mm/page_cache_limit.c @@ -15,6 +15,7 @@ static int vm_cache_reclaim_s_max = 43200; static int vm_cache_reclaim_weight __read_mostly = 1; static int vm_cache_reclaim_weight_max = 100; static int vm_cache_reclaim_enable = 1; +static unsigned long vm_cache_limit_mbytes __read_mostly;
static void shrink_shepherd(struct work_struct *w); static DECLARE_DEFERRABLE_WORK(shepherd, shrink_shepherd); @@ -32,6 +33,31 @@ static unsigned long node_reclaim_num(void) return SWAP_CLUSTER_MAX * nr_cpus_node(nid) * vm_cache_reclaim_weight; }
+static bool page_cache_over_limit(void) +{ + unsigned long lru_file; + unsigned long limit; + + limit = vm_cache_limit_mbytes << (20 - PAGE_SHIFT); + lru_file = global_node_page_state(NR_ACTIVE_FILE) + + global_node_page_state(NR_INACTIVE_FILE); + if (lru_file > limit) + return true; + + return false; +} + +static bool should_reclaim_page_cache(void) +{ + if (!should_periodical_reclaim()) + return false; + + if (!vm_cache_limit_mbytes) + return false; + + return true; +} + int cache_reclaim_enable_handler(struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos) { @@ -65,6 +91,37 @@ int cache_reclaim_sysctl_handler(struct ctl_table *table, int write, return ret; }
+int cache_limit_mbytes_sysctl_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *length, loff_t *ppos) +{ + int ret; + unsigned long vm_cache_limit_mbytes_max; + unsigned long origin_mbytes = vm_cache_limit_mbytes; + int nr_retries = MAX_RECLAIM_RETRIES; + + vm_cache_limit_mbytes_max = totalram_pages() >> (20 - PAGE_SHIFT); + ret = proc_doulongvec_minmax(table, write, buffer, length, ppos); + if (ret || !write) + return ret; + + if (vm_cache_limit_mbytes > vm_cache_limit_mbytes_max) { + vm_cache_limit_mbytes = origin_mbytes; + return -EINVAL; + } + + if (write) { + while (should_reclaim_page_cache() && page_cache_over_limit() && + nr_retries--) { + if (signal_pending(current)) + return -EINTR; + + shrink_memory(node_reclaim_num(), false); + } + } + + return 0; +} + static void shrink_shepherd(struct work_struct *w) { int node; @@ -122,6 +179,13 @@ static struct ctl_table page_cache_limit_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, + { + .procname = "cache_limit_mbytes", + .data = &vm_cache_limit_mbytes, + .maxlen = sizeof(vm_cache_limit_mbytes), + .mode = 0644, + .proc_handler = cache_limit_mbytes_sysctl_handler, + }, };
static int __init shrink_page_init(void)
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/3582 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/C...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/3582 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/C...