hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9GZAQ CVE: NA
--------------------------------
Introduce the bpf helper to set preferred nodes for relationship group.
Signed-off-by: Hui Tang tanghui20@huawei.com --- include/linux/sched/relationship.h | 6 ++++++ include/uapi/linux/bpf.h | 7 +++++++ kernel/sched/bpf_sched.c | 20 ++++++++++++++++++++ kernel/sched/fair.c | 10 ++++++++++ scripts/bpf_helpers_doc.py | 4 ++++ tools/include/uapi/linux/bpf.h | 7 +++++++ tools/lib/bpf/libbpf_sched.h | 29 ++++++++++++++++++++++++++++- 7 files changed, 82 insertions(+), 1 deletion(-)
diff --git a/include/linux/sched/relationship.h b/include/linux/sched/relationship.h index fbc5c2bab5dc..43aa3f9706d4 100644 --- a/include/linux/sched/relationship.h +++ b/include/linux/sched/relationship.h @@ -45,6 +45,10 @@ struct bpf_relationship_get_args { struct bpf_net_relationship net; };
+struct bpf_relationship_set_args { + nodemask_t preferred_node; +}; + struct relationship_hdr { refcount_t refcount; spinlock_t lock; @@ -161,6 +165,8 @@ extern void numa_faults_update_and_sort(int nid, int new, extern void task_tick_relationship(struct rq *rq, struct task_struct *curr);
extern void task_preferred_node_work(struct callback_head *work); +extern void +sched_set_curr_preferred_node(struct bpf_relationship_set_args *args);
DECLARE_STATIC_KEY_FALSE(__relationship_switch); static inline bool task_relationship_used(void) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1274fe6d3ab8..8aba6670549c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3908,6 +3908,12 @@ union bpf_attr { * get relationship statistics of *tsk* and store in *stats*. * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_sched_set_curr_preferred_node(struct bpf_relationship_set_args *args, int len) + * Description + * set current task preferred node. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4082,6 +4088,7 @@ union bpf_attr { FN(cpus_share_cache), \ FN(nodemask_op), \ FN(get_task_relationship_stats),\ + FN(sched_set_curr_preferred_node),\ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/sched/bpf_sched.c b/kernel/sched/bpf_sched.c index 3cff265526b2..ac1b94ea6740 100644 --- a/kernel/sched/bpf_sched.c +++ b/kernel/sched/bpf_sched.c @@ -369,6 +369,24 @@ const struct bpf_func_proto bpf_get_task_relationship_stats_proto = { .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, }; + +BPF_CALL_2(bpf_sched_set_curr_preferred_node, + struct bpf_relationship_set_args *, args, int, len) +{ + if (!args || len != sizeof(*args)) + return -EINVAL; + + sched_set_curr_preferred_node(args); + return 0; +} + +const struct bpf_func_proto bpf_sched_set_curr_preferred_node_proto = { + .func = bpf_sched_set_curr_preferred_node, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_UNINIT_MEM, + .arg2_type = ARG_CONST_SIZE, +}; #endif
static const struct bpf_func_proto * @@ -398,6 +416,8 @@ bpf_sched_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) #ifdef CONFIG_SCHED_TASK_RELATIONSHIP case BPF_FUNC_get_task_relationship_stats: return &bpf_get_task_relationship_stats_proto; + case BPF_FUNC_sched_set_curr_preferred_node: + return &bpf_sched_set_curr_preferred_node_proto; #endif default: return bpf_base_func_proto(func_id); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 9ee9437ed982..c64055874a73 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3116,6 +3116,16 @@ void sched_get_mm_relationship(struct task_struct *tsk, } #endif } + +void sched_set_curr_preferred_node(struct bpf_relationship_set_args *args) +{ +#ifdef CONFIG_NUMA_BALANCING + struct numa_group *grp = rcu_dereference_raw(current->numa_group); + + grp->preferred_nid = args->preferred_node; + schedstat_inc(grp->nodes_switch_cnt); +#endif +} #endif
#endif diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index db43107ba6f0..4f6fac621f65 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -447,6 +447,8 @@ class PrinterHelpers(Printer): 'struct sched_migrate_node', 'struct nodemask_op_args', 'struct bpf_relationship_get_args', + 'struct bpf_relationship_set_args', + 'struct sched_preferred_node_ctx', ] known_types = { '...', @@ -502,6 +504,8 @@ class PrinterHelpers(Printer): 'struct sched_migrate_node', 'struct nodemask_op_args', 'struct bpf_relationship_get_args', + 'struct bpf_relationship_set_args', + 'struct sched_preferred_node_ctx', } mapped_types = { 'u8': '__u8', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index ac08b57826a2..5c04747f201c 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3908,6 +3908,12 @@ union bpf_attr { * get relationship statistics of *tsk* and store in *stats*. * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_sched_set_curr_preferred_node(struct bpf_relationship_set_args *args, int len) + * Description + * set current task preferred node. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4082,6 +4088,7 @@ union bpf_attr { FN(cpus_share_cache), \ FN(nodemask_op), \ FN(get_task_relationship_stats),\ + FN(sched_set_curr_preferred_node),\ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/tools/lib/bpf/libbpf_sched.h b/tools/lib/bpf/libbpf_sched.h index 7fa5f03e6ba4..04af73b92856 100644 --- a/tools/lib/bpf/libbpf_sched.h +++ b/tools/lib/bpf/libbpf_sched.h @@ -17,6 +17,7 @@
#include <linux/bpf_topology.h> #include <linux/numa.h> +#include <linux/sched/relationship.h> #include <linux/version.h> #include <uapi/linux/bpf.h> #include <bpf/bpf_helpers.h> @@ -27,7 +28,7 @@ #define INVALID_PTR ((void *)(0UL)) #define getVal(P) \ ({ \ - typeof(P) val = 0; \ + typeof(P) val; \ bpf_probe_read_kernel(&val, sizeof(val), &(P)); \ val; \ }) @@ -79,6 +80,13 @@ struct { __uint(max_entries, 1); } map_cpumask_info SEC(".maps");
+static struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, u32); + __type(value, struct bpf_relationship_get_args); + __uint(max_entries, 1); +} map_rship_stats SEC(".maps"); + static __always_inline void libbpf_nodes_and(nodemask_t *dst, nodemask_t *src1, nodemask_t *src2) { @@ -614,4 +622,23 @@ static __always_inline int libbpf_sched_se_tag_of(struct sched_entity *se)
return se_tag; } + +static __always_inline int +libbpf_mem_preferred_nid(struct task_struct *tsk, nodemask_t *preferred_node) +{ + struct bpf_relationship_get_args *stats; + int key = 0; + int ret; + + stats = bpf_map_lookup_elem(&map_rship_stats, &key); + if (!stats) + return NUMA_NO_NODE; + + ret = bpf_get_task_relationship_stats(tsk, &map_rship_stats, stats); + if (ret) + return NUMA_NO_NODE; + + *preferred_node = getVal(stats->mm.comm.preferred_node); + return 0; +} #endif