[PATCH OLK-6.6 v2 0/2]Support thp policy control per process

A tool and driver are added to control the thp policy per-process. Currently, only supports PMD size THP control. In the future, more functions can be supported based on this tool and driver. v2: modify openeuler_defconfig and commit msg format. Nanyong Sun (2): mm: add thp process control interface module mm: tool: add task_thpctl to control thp per process arch/arm64/configs/openeuler_defconfig | 2 + arch/x86/configs/openeuler_defconfig | 2 + include/uapi/linux/thp_ctl.h | 19 +++ kernel/fork.c | 1 + mm/Kconfig | 7 ++ mm/Makefile | 1 + mm/thp_ctl.c | 157 ++++++++++++++++++++++++ tools/mm/Makefile | 4 +- tools/mm/task_thpctl.c | 161 +++++++++++++++++++++++++ 9 files changed, 352 insertions(+), 2 deletions(-) create mode 100644 include/uapi/linux/thp_ctl.h create mode 100644 mm/thp_ctl.c create mode 100644 tools/mm/task_thpctl.c -- 2.25.1

From: Nanyong Sun <sunnanyong@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAOXER -------------------------------- The module thp_ctl will create a interface /dev/thp_ctl to let userspace control thp. Process can use ioctl() to control if use thp. If want change other process's thp usage, the caller must possess the CAP_SYS_NICE privilege. Currently, only PMD mapping THP control is supported. In the future, more sizes can be supported. A tool in later patch will add to simplify usage. Signed-off-by: Nanyong Sun <sunnanyong@huawei.com> Signed-off-by: Tong Tiangen <tongtiangen@huawei.com> --- arch/arm64/configs/openeuler_defconfig | 2 + arch/x86/configs/openeuler_defconfig | 2 + include/uapi/linux/thp_ctl.h | 19 +++ kernel/fork.c | 1 + mm/Kconfig | 7 ++ mm/Makefile | 1 + mm/thp_ctl.c | 157 +++++++++++++++++++++++++ 7 files changed, 189 insertions(+) create mode 100644 include/uapi/linux/thp_ctl.h create mode 100644 mm/thp_ctl.c diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 198e96de5ba8..b331d8b4ff85 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -1243,6 +1243,8 @@ CONFIG_DAMON_SYSFS=y CONFIG_DAMON_RECLAIM=y CONFIG_DAMON_LRU_SORT=y # end of Data Access Monitoring + +CONFIG_THP_CONTROL=y # end of Memory Management options CONFIG_NET=y diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 0469a5a77683..79757b106e20 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -1218,6 +1218,8 @@ CONFIG_DAMON_SYSFS=y CONFIG_DAMON_RECLAIM=y CONFIG_DAMON_LRU_SORT=y # end of Data Access Monitoring + +# CONFIG_THP_CONTROL is not set # end of Memory Management options CONFIG_NET=y diff --git a/include/uapi/linux/thp_ctl.h b/include/uapi/linux/thp_ctl.h new file mode 100644 index 000000000000..89af9f508919 --- /dev/null +++ b/include/uapi/linux/thp_ctl.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* Copyright (C) 2024 Huawei Technologies Co., Ltd. */ + +#ifndef _THP_CTL_H +#define _THP_CTL_H + +#include <linux/types.h> +#include <linux/ioctl.h> + +struct get_thp_status_arg { + __kernel_pid_t pid; + unsigned long thp_enable; +}; + +#define IOC_THP_STATUS_GET _IOWR('M', 0, struct get_thp_status_arg) +#define IOC_THP_SET_DISABLE _IOW('M', 1, __kernel_pid_t) +#define IOC_THP_SET_ENABLE _IOW('M', 2, __kernel_pid_t) + +#endif diff --git a/kernel/fork.c b/kernel/fork.c index 4b37cb915f7b..96c6a9e446ac 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1644,6 +1644,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) return mm; } +EXPORT_SYMBOL_GPL(mm_access); static void complete_vfork_done(struct task_struct *tsk) { diff --git a/mm/Kconfig b/mm/Kconfig index 88addd002bb5..bdd8372552ff 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1482,4 +1482,11 @@ config NUMABALANCING_MEM_SAMPLING source "mm/damon/Kconfig" +config THP_CONTROL + tristate "Support thp process control" + depends on TRANSPARENT_HUGEPAGE + default n + help + This provides interface to control thp policy. + endmenu diff --git a/mm/Makefile b/mm/Makefile index 674777b7c99f..5e45f01f56ce 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -148,3 +148,4 @@ obj-$(CONFIG_CLEAR_FREELIST_PAGE) += clear_freelist_page.o obj-$(CONFIG_MEMORY_RELIABLE) += mem_reliable.o obj-$(CONFIG_DYNAMIC_POOL) += dynamic_pool.o obj-$(CONFIG_MEM_SAMPLING) += mem_sampling.o +obj-$(CONFIG_THP_CONTROL) += thp_ctl.o diff --git a/mm/thp_ctl.c b/mm/thp_ctl.c new file mode 100644 index 000000000000..11acec1432f4 --- /dev/null +++ b/mm/thp_ctl.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2024 Huawei Technologies Co., Ltd. */ + +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/file.h> +#include <linux/mm.h> +#include <linux/ptrace.h> +#include <linux/thp_ctl.h> + +enum thp_ops { + THP_OPS_SET, + THP_OPS_GET +}; + +static inline struct mm_struct *get_mm_by_pid(pid_t pid) +{ + struct task_struct *task; + struct mm_struct *mm; + int ret; + + task = find_get_task_by_vpid(pid); + if (!task) + return ERR_PTR(-ESRCH); + + /* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */ + mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); + if (IS_ERR_OR_NULL(mm)) { + ret = -ESRCH; + goto release_task; + } + + if (mm != current->mm && !capable(CAP_SYS_NICE)) { + ret = -EPERM; + goto release_mm; + } + + put_task_struct(task); + return mm; + +release_mm: + mmput(mm); +release_task: + put_task_struct(task); + return ERR_PTR(ret); +} + +static int mm_get_thp_status(struct mm_struct *mm) +{ + return !test_bit(MMF_DISABLE_THP, &mm->flags); +} + +static int mm_set_thp_status(struct mm_struct *mm, bool enable) +{ + if (enable == !test_bit(MMF_DISABLE_THP, &mm->flags)) + return 0; + + if (mmap_write_lock_killable(mm)) + return -EINTR; + + if (enable) + clear_bit(MMF_DISABLE_THP, &mm->flags); + else + set_bit(MMF_DISABLE_THP, &mm->flags); + + mmap_write_unlock(mm); + + return 0; +} + +/* if ops is THP_OPS_GET, #enable is ignore. */ +static int thp_status_op(pid_t pid, enum thp_ops ops, bool enable) +{ + struct mm_struct *mm; + int ret; + + mm = get_mm_by_pid(pid); + if (IS_ERR(mm)) + return PTR_ERR(mm); + + if (ops == THP_OPS_SET) + ret = mm_set_thp_status(mm, enable); + else + ret = mm_get_thp_status(mm); + mmput(mm); + + return ret; +} + +static long get_thp_status(struct get_thp_status_arg __user *ubuf) +{ + pid_t pid; + int ret; + + if (get_user(pid, &ubuf->pid)) + return -EFAULT; + + if (pid < 0) + return -EINVAL; + + ret = thp_status_op(pid, THP_OPS_GET, false); + if (ret < 0) + return ret; + + if (put_user((unsigned long)ret, &ubuf->thp_enable)) + return -EFAULT; + + return 0; +} + +static long set_thp_status(pid_t __user *pidp, bool enable) +{ + pid_t pid; + + if (get_user(pid, pidp)) + return -EFAULT; + + if (pid < 0) + return -EINVAL; + + return (long)thp_status_op(pid, THP_OPS_SET, enable); +} + +static long thp_ctl_ioctl(struct file *f, unsigned int ioctl, + unsigned long arg) +{ + void __user *argp = (void __user *)arg; + + switch (ioctl) { + case IOC_THP_STATUS_GET: + return get_thp_status(argp); + case IOC_THP_SET_ENABLE: + return set_thp_status(argp, true); + case IOC_THP_SET_DISABLE: + return set_thp_status(argp, false); + default: + return -EINVAL; + } +} + +static const struct file_operations thp_ctl_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = thp_ctl_ioctl, +}; + +static struct miscdevice thp_ctl_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = "thp_ctl", + .fops = &thp_ctl_fops, + .mode = 0600, +}; +module_misc_device(thp_ctl_misc); + +MODULE_VERSION("0.0.1"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Nanyong Sun"); +MODULE_DESCRIPTION("Control process thp policy"); -- 2.25.1

From: Nanyong Sun <sunnanyong@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAOXER --------------------------------------- The tool task_thpctl use /dev/thp_ctl to control thp usage of a process. Signed-off-by: Nanyong Sun <sunnanyong@huawei.com> Signed-off-by: Tong Tiangen <tongtiangen@huawei.com> --- tools/mm/Makefile | 4 +- tools/mm/task_thpctl.c | 161 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 163 insertions(+), 2 deletions(-) create mode 100644 tools/mm/task_thpctl.c diff --git a/tools/mm/Makefile b/tools/mm/Makefile index 7bb03606b9ea..902d0e4f349a 100644 --- a/tools/mm/Makefile +++ b/tools/mm/Makefile @@ -3,7 +3,7 @@ # include ../scripts/Makefile.include -BUILD_TARGETS=page-types slabinfo page_owner_sort +BUILD_TARGETS=page-types slabinfo page_owner_sort task_thpctl INSTALL_TARGETS = $(BUILD_TARGETS) thpmaps LIB_DIR = ../lib/api @@ -23,7 +23,7 @@ $(LIBS): $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) clean: - $(RM) page-types slabinfo page_owner_sort + $(RM) page-types slabinfo page_owner_sort task_thpctl make -C $(LIB_DIR) clean sbindir ?= /usr/sbin diff --git a/tools/mm/task_thpctl.c b/tools/mm/task_thpctl.c new file mode 100644 index 000000000000..3a01673c9e82 --- /dev/null +++ b/tools/mm/task_thpctl.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * task_thpctl.c - Show or change the thp behavior of a process + * + * Copyright (C) 2024 Nanyong Sun <sunnanyong@huawei.com> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <getopt.h> +#include <errno.h> +#include <stddef.h> +#include <string.h> +#include <stdbool.h> +#include <sys/ioctl.h> +#include <linux/thp_ctl.h> + +enum thp_actions { + ACTION_NONE, + GET_THP_STATUS, + THP_SET_DISABLE, + THP_SET_ENABLE, +}; + +#define THP_CTL_PATH "/dev/thp_ctl" + +static void print_usage(void) +{ + printf("Usage:\n"); + printf(" task_thpctl [options]\n"); + printf("\n"); + printf("Description:\n"); + printf(" Show or change the thp behavior of a process.\n"); + printf("\n"); + printf("Options:\n"); + printf(" -h, --help This message.\n"); + printf(" -p, --pid <pid> Operate on existing given pid.\n"); + printf(" -g, --get_thp_status Display thp status of a process\n"); + printf(" specified by --pid.\n"); + printf(" -s, --thp_set_enable Set thp enable for a process\n"); + printf(" specified by --pid.\n"); + printf(" -d, --thp_set_disable Set thp disable for a process\n"); + printf(" specified by --pid.\n"); + printf("\n"); + printf("Examples:\n"); + printf(" task_thpctl -s -p <pid>\n"); + printf(" task_thpctl -d -p <pid>\n"); +} + +static void exit_with_help(void) +{ + fprintf(stderr, "Try 'task_thpctl --help' for more information.\n"); + exit(EXIT_FAILURE); +} + +static void get_task_thp_status(int fd, pid_t pid) +{ + struct get_thp_status_arg stat = { + .pid = pid, + .thp_enable = 0 + }; + int err; + + err = ioctl(fd, IOC_THP_STATUS_GET, &stat); + if (err < 0) { + fprintf(stderr, "Task:%d get thp status failed: %s\n", + pid, strerror(errno)); + exit(EXIT_FAILURE); + } + + printf("Task %d thp status:\n", pid); + printf(" thp_enable: %ld\n", stat.thp_enable); +} + +static void set_task_thp_enable(int fd, pid_t pid, bool enable) +{ + unsigned int cmd = enable ? IOC_THP_SET_ENABLE : IOC_THP_SET_DISABLE; + int err; + + err = ioctl(fd, cmd, &pid); + if (err < 0) { + fprintf(stderr, "Task:%d set thp %s failed: %s\n", + pid, enable ? "enable" : "disable", + strerror(errno)); + exit(EXIT_FAILURE); + } +} + +static const char optstring[] = "+hp:gsd"; +static const struct option longopts[] = { + {"help", 0, NULL, 'h'}, + {"pid", required_argument, NULL, 'p'}, + {"get_thp_status", 0, NULL, 'g'}, + {"thp_set_enable", 0, NULL, 's'}, + {"thp_set_disable", 0, NULL, 'd'}, + {0, 0, NULL, 0} +}; + +int main(int argc, char **argv) +{ + enum thp_actions action = ACTION_NONE; + pid_t pid = 0; + int opt, fd; + + while ((opt = getopt_long(argc, argv, optstring, longopts, NULL)) != -1) { + switch (opt) { + case 'p': + if (optarg) + pid = atoi(optarg); + if (!pid) { + fprintf(stderr, "invalid PID argument\n"); + exit_with_help(); + } + break; + case 's': + action = THP_SET_ENABLE; + break; + case 'd': + action = THP_SET_DISABLE; + break; + case 'g': + action = GET_THP_STATUS; + break; + case 'h': + print_usage(); + exit(EXIT_SUCCESS); + default: + exit_with_help(); + } + } + + if (action == ACTION_NONE || !pid) + exit_with_help(); + + fd = open(THP_CTL_PATH, O_RDWR); + if (fd < 0) { + fprintf(stderr, "Open %s failed: %s\n", + THP_CTL_PATH, strerror(errno)); + exit(EXIT_FAILURE); + } + + switch (action) { + case GET_THP_STATUS: + get_task_thp_status(fd, pid); + exit(EXIT_SUCCESS); + case THP_SET_ENABLE: + set_task_thp_enable(fd, pid, true); + break; + case THP_SET_DISABLE: + set_task_thp_enable(fd, pid, false); + break; + default: + exit_with_help(); + } + + close(fd); + + return EXIT_SUCCESS; +} -- 2.25.1

反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/16509 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/VRN... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/16509 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/VRN...
participants (2)
-
patchwork bot
-
Tong Tiangen