
From: Nanyong Sun <sunnanyong@huawei.com> Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAOXER -------------------------------- The module thp_ctl will create a interface /dev/thp_ctl to let userspace control thp. Process can use ioctl() to control if use thp. If want change other process's thp usage, the caller must possess the CAP_SYS_NICE privilege. Currently, only PMD mapping THP control is supported. In the future, more sizes can be supported. A tool in later patch will add to simplify usage. Signed-off-by: Nanyong Sun <sunnanyong@huawei.com> Signed-off-by: Tong Tiangen <tongtiangen@huawei.com> --- arch/arm64/configs/openeuler_defconfig | 2 + arch/x86/configs/openeuler_defconfig | 2 + include/uapi/linux/thp_ctl.h | 19 +++ kernel/fork.c | 1 + mm/Kconfig | 7 ++ mm/Makefile | 1 + mm/thp_ctl.c | 157 +++++++++++++++++++++++++ 7 files changed, 189 insertions(+) create mode 100644 include/uapi/linux/thp_ctl.h create mode 100644 mm/thp_ctl.c diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 198e96de5ba8..b331d8b4ff85 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -1243,6 +1243,8 @@ CONFIG_DAMON_SYSFS=y CONFIG_DAMON_RECLAIM=y CONFIG_DAMON_LRU_SORT=y # end of Data Access Monitoring + +CONFIG_THP_CONTROL=y # end of Memory Management options CONFIG_NET=y diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 0469a5a77683..104b022ad379 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -1218,6 +1218,8 @@ CONFIG_DAMON_SYSFS=y CONFIG_DAMON_RECLAIM=y CONFIG_DAMON_LRU_SORT=y # end of Data Access Monitoring + +#CONFIG_THP_CONTROL is not set # end of Memory Management options CONFIG_NET=y diff --git a/include/uapi/linux/thp_ctl.h b/include/uapi/linux/thp_ctl.h new file mode 100644 index 000000000000..89af9f508919 --- /dev/null +++ b/include/uapi/linux/thp_ctl.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* Copyright (C) 2024 Huawei Technologies Co., Ltd. */ + +#ifndef _THP_CTL_H +#define _THP_CTL_H + +#include <linux/types.h> +#include <linux/ioctl.h> + +struct get_thp_status_arg { + __kernel_pid_t pid; + unsigned long thp_enable; +}; + +#define IOC_THP_STATUS_GET _IOWR('M', 0, struct get_thp_status_arg) +#define IOC_THP_SET_DISABLE _IOW('M', 1, __kernel_pid_t) +#define IOC_THP_SET_ENABLE _IOW('M', 2, __kernel_pid_t) + +#endif diff --git a/kernel/fork.c b/kernel/fork.c index 4b37cb915f7b..96c6a9e446ac 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1644,6 +1644,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) return mm; } +EXPORT_SYMBOL_GPL(mm_access); static void complete_vfork_done(struct task_struct *tsk) { diff --git a/mm/Kconfig b/mm/Kconfig index 88addd002bb5..bdd8372552ff 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1482,4 +1482,11 @@ config NUMABALANCING_MEM_SAMPLING source "mm/damon/Kconfig" +config THP_CONTROL + tristate "Support thp process control" + depends on TRANSPARENT_HUGEPAGE + default n + help + This provides interface to control thp policy. + endmenu diff --git a/mm/Makefile b/mm/Makefile index 674777b7c99f..5e45f01f56ce 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -148,3 +148,4 @@ obj-$(CONFIG_CLEAR_FREELIST_PAGE) += clear_freelist_page.o obj-$(CONFIG_MEMORY_RELIABLE) += mem_reliable.o obj-$(CONFIG_DYNAMIC_POOL) += dynamic_pool.o obj-$(CONFIG_MEM_SAMPLING) += mem_sampling.o +obj-$(CONFIG_THP_CONTROL) += thp_ctl.o diff --git a/mm/thp_ctl.c b/mm/thp_ctl.c new file mode 100644 index 000000000000..11acec1432f4 --- /dev/null +++ b/mm/thp_ctl.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2024 Huawei Technologies Co., Ltd. */ + +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/file.h> +#include <linux/mm.h> +#include <linux/ptrace.h> +#include <linux/thp_ctl.h> + +enum thp_ops { + THP_OPS_SET, + THP_OPS_GET +}; + +static inline struct mm_struct *get_mm_by_pid(pid_t pid) +{ + struct task_struct *task; + struct mm_struct *mm; + int ret; + + task = find_get_task_by_vpid(pid); + if (!task) + return ERR_PTR(-ESRCH); + + /* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */ + mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); + if (IS_ERR_OR_NULL(mm)) { + ret = -ESRCH; + goto release_task; + } + + if (mm != current->mm && !capable(CAP_SYS_NICE)) { + ret = -EPERM; + goto release_mm; + } + + put_task_struct(task); + return mm; + +release_mm: + mmput(mm); +release_task: + put_task_struct(task); + return ERR_PTR(ret); +} + +static int mm_get_thp_status(struct mm_struct *mm) +{ + return !test_bit(MMF_DISABLE_THP, &mm->flags); +} + +static int mm_set_thp_status(struct mm_struct *mm, bool enable) +{ + if (enable == !test_bit(MMF_DISABLE_THP, &mm->flags)) + return 0; + + if (mmap_write_lock_killable(mm)) + return -EINTR; + + if (enable) + clear_bit(MMF_DISABLE_THP, &mm->flags); + else + set_bit(MMF_DISABLE_THP, &mm->flags); + + mmap_write_unlock(mm); + + return 0; +} + +/* if ops is THP_OPS_GET, #enable is ignore. */ +static int thp_status_op(pid_t pid, enum thp_ops ops, bool enable) +{ + struct mm_struct *mm; + int ret; + + mm = get_mm_by_pid(pid); + if (IS_ERR(mm)) + return PTR_ERR(mm); + + if (ops == THP_OPS_SET) + ret = mm_set_thp_status(mm, enable); + else + ret = mm_get_thp_status(mm); + mmput(mm); + + return ret; +} + +static long get_thp_status(struct get_thp_status_arg __user *ubuf) +{ + pid_t pid; + int ret; + + if (get_user(pid, &ubuf->pid)) + return -EFAULT; + + if (pid < 0) + return -EINVAL; + + ret = thp_status_op(pid, THP_OPS_GET, false); + if (ret < 0) + return ret; + + if (put_user((unsigned long)ret, &ubuf->thp_enable)) + return -EFAULT; + + return 0; +} + +static long set_thp_status(pid_t __user *pidp, bool enable) +{ + pid_t pid; + + if (get_user(pid, pidp)) + return -EFAULT; + + if (pid < 0) + return -EINVAL; + + return (long)thp_status_op(pid, THP_OPS_SET, enable); +} + +static long thp_ctl_ioctl(struct file *f, unsigned int ioctl, + unsigned long arg) +{ + void __user *argp = (void __user *)arg; + + switch (ioctl) { + case IOC_THP_STATUS_GET: + return get_thp_status(argp); + case IOC_THP_SET_ENABLE: + return set_thp_status(argp, true); + case IOC_THP_SET_DISABLE: + return set_thp_status(argp, false); + default: + return -EINVAL; + } +} + +static const struct file_operations thp_ctl_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = thp_ctl_ioctl, +}; + +static struct miscdevice thp_ctl_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = "thp_ctl", + .fops = &thp_ctl_fops, + .mode = 0600, +}; +module_misc_device(thp_ctl_misc); + +MODULE_VERSION("0.0.1"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Nanyong Sun"); +MODULE_DESCRIPTION("Control process thp policy"); -- 2.25.1